PyTorch 速查表
本文档提供 PyTorch 常用 API 的快速参考,涵盖从基础操作到高级功能的常用代码片段。
安装与配置
安装命令
# CPU 版本
pip install torch torchvision torchaudio
# GPU 版本(CUDA 12.4)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
# GPU 版本(CUDA 12.1)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# 使用 conda(注意:PyTorch 2.6+ 不再发布 conda 包)
conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
验证安装
import torch
print(f"PyTorch 版本: {torch.__version__}")
print(f"CUDA 可用: {torch.cuda.is_available()}")
print(f"CUDA 版本: {torch.version.cuda}")
print(f"GPU 数量: {torch.cuda.device_count()}")
print(f"当前 GPU: {torch.cuda.current_device()}")
print(f"GPU 名称: {torch.cuda.get_device_name(0)}")
张量创建
import torch
import numpy as np
# 从数据创建
torch.tensor([1, 2, 3])
torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
torch.as_tensor([1, 2, 3]) # 共享内存
torch.from_numpy(np.array([1, 2, 3])) # 从 NumPy
# 工厂函数
torch.zeros(2, 3) # 全零
torch.ones(2, 3) # 全一
torch.empty(2, 3) # 未初始化
torch.full((2, 3), 7) # 填充值
torch.eye(3) # 单位矩阵
# 随机张量
torch.rand(2, 3) # 均匀分布 [0, 1)
torch.randn(2, 3) # 标准正态分布
torch.randint(0, 10, (2, 3)) # 随机整数 [low, high)
torch.randperm(10) # 随机排列
# 序列
torch.arange(0, 10, 2) # [0, 2, 4, 6, 8]
torch.linspace(0, 1, 5) # 等间隔
torch.logspace(0, 2, 5) # 对数间隔
# 基于现有张量
torch.zeros_like(x)
torch.ones_like(x)
torch.empty_like(x)
torch.rand_like(x, dtype=torch.float)
torch.full_like(x, fill_value=7)
张量属性
x.shape # 形状(torch.Size)
x.size() # 形状
x.dim() # 维度数
x.ndim # 维度数
x.numel() # 元素总数
x.dtype # 数据类型
x.device # 设备
x.is_leaf # 是否叶子节点
x.requires_grad # 是否需要梯度
x.grad # 梯度值
x.data # 数据(不含梯度信息)
数据类型
# 类型转换
x.float() # torch.float32
x.double() # torch.float64
x.half() # torch.float16
x.bfloat16() # torch.bfloat16
x.int() # torch.int32
x.long() # torch.int64
x.short() # torch.int16
x.char() # torch.int8
x.byte() # torch.uint8
x.bool() # torch.bool
# 使用 to 方法
x.to(torch.float32)
x.to(dtype=torch.float64)
x.to(dtype=torch.half)
# 类型检查
x.is_floating_point()
x.is_complex()
索引与切片
# 基本索引
x[0, 1] # 单个元素
x[0] # 第一行
x[:, 0] # 第一列
x[-1] # 最后一行
x[-1, -1] # 最后一个元素
# 切片
x[0:2] # 前两行
x[:, 1:3] # 第2-3列
x[::2, ::2] # 步长为2
x[1:3, 2:4] # 子区域
# 布尔索引
x[x > 0] # 大于0的元素
x[torch.rand(3) > 0.5]
torch.masked_select(x, mask)
# 高级索引
x[[0, 2], [1, 2]] # (0,1) 和 (2,2)
torch.index_select(x, dim=0, index=torch.tensor([0, 2]))
torch.gather(x, dim=1, index=indices)
# 修改
x[0, 0] = 1
x[:, 0] = torch.tensor([1, 2, 3])
x[x < 0] = 0
x.clamp_(min=0, max=1) # 原地裁剪
形状操作
# 改变形状
x.reshape(2, 3) # 返回新视图
x.view(2, 3) # 需要连续内存
x.view(-1, 2) # -1 自动推断
x.flatten() # 展平
x.flatten(1) # 从第1维展平
x.ravel() # 展平(连续视图)
# 维度操作
x.unsqueeze(0) # 在位置0增加维度
x.squeeze() # 移除所有大小为1的维度
x.squeeze(0) # 移除指定维度
x.expand(3, 4) # 扩展(广播)
x.expand_as(other) # 扩展为other形状
x.repeat(2, 3) # 复制
# 维度重排
x.transpose(0, 1) # 交换两个维度
x.t() # 转置(仅2D)
x.permute(2, 0, 1) # 重新排列所有维度
x.movedim(0, 2) # 移动维度
# 内存连续性
x.is_contiguous() # 检查是否连续
x.contiguous() # 返回连续副本
拼接与分割
# 拼接(不增加维度)
torch.cat([x, y], dim=0) # 沿维度拼接
torch.cat([x, y], dim=1)
# 堆叠(增加新维度)
torch.stack([x, y], dim=0) # 新增维度0
# 分割
torch.split(x, 3, dim=0) # 每份3个
torch.split(x, [2, 3], dim=0) # 分成2和3
torch.chunk(x, 2, dim=0) # 均匀分成2份
# 分离
torch.unbind(x, dim=0) # 沿维度拆成元组
数学运算
逐元素运算
# 基本运算
x + y, x - y, x * y, x / y
x ** 2, x.pow(2)
torch.sqrt(x), x.sqrt()
torch.exp(x), x.exp()
torch.log(x), x.log()
torch.log10(x), torch.log2(x)
# 符号与绝对值
torch.abs(x), x.abs()
torch.neg(x), -x
torch.sign(x), x.sign()
# 三角函数
torch.sin(x), torch.cos(x), torch.tan(x)
torch.sinh(x), torch.cosh(x), torch.tanh(x)
torch.atan(x), torch.atan2(y, x)
# 取整
torch.round(x)
torch.floor(x), torch.ceil(x)
torch.trunc(x), torch.frac(x)
# 裁剪
torch.clamp(x, min=0, max=1)
x.clamp(min=0)
torch.clip(x, min=0, max=1)
聚合运算
# 基本聚合
torch.sum(x), x.sum()
torch.mean(x), x.mean()
torch.max(x), x.max()
torch.min(x), x.min()
torch.std(x), x.var()
torch.prod(x), x.prod()
torch.norm(x), x.norm()
# 按维度聚合
x.sum(dim=0)
x.mean(dim=1, keepdim=True) # 保持维度
x.max(dim=1) # 返回 (values, indices)
x.min(dim=1)
x.argmax(dim=1) # 最大值索引
x.argmin(dim=1) # 最小值索引
# 其他聚合
torch.median(x)
torch.mode(x)
torch.unique(x) # 唯一值
torch.sort(x) # 返回 (sorted, indices)
torch.topk(x, k, dim=1) # 前k个
矩阵运算
# 矩阵乘法
torch.matmul(x, y) # 通用矩阵乘法
x @ y # 矩阵乘法(推荐)
x.mm(y) # 2D矩阵乘法
x.bmm(y) # 批量矩阵乘法 (3D)
x.matmul(y)
# 向量运算
torch.dot(a, b) # 向量点积
torch.cross(a, b) # 向量叉积
# 矩阵属性
torch.inverse(x) # 逆矩阵
torch.det(x) # 行列式
torch.svd(x) # 奇异值分解
torch.eig(x) # 特征值
torch.matrix_rank(x) # 矩阵秩
# 矩阵变换
x.t() # 转置
x.T # 转置
x.H # 共轭转置
torch.trace(x) # 迹
比较运算
# 逐元素比较
x > y, x < y, x >= y, x <= y
x == y, x != y
torch.eq(x, y), torch.ne(x, y)
torch.gt(x, y), torch.lt(x, y)
torch.ge(x, y), torch.le(x, y)
# 布尔运算
torch.logical_and(x, y)
torch.logical_or(x, y)
torch.logical_not(x)
torch.logical_xor(x, y)
# 条件选择
torch.where(condition, x, y)
torch.masked_select(x, mask)
torch.masked_fill(x, mask, value)
# 比较聚合
torch.equal(x, y) # 完全相等
torch.allclose(x, y, rtol=1e-5, atol=1e-8) # 近似相等
torch.isfinite(x), torch.isinf(x), torch.isnan(x)
自动求导
# 创建需要梯度的张量
x = torch.tensor([1.0], requires_grad=True)
x = torch.randn(3, requires_grad=True)
# 运算
y = x ** 2
z = y.sum()
# 反向传播
z.backward()
x.grad # 梯度
# 禁用梯度
with torch.no_grad():
y = x * 2
# 分离张量
z = y.detach() # 新张量,不需要梯度
# 清零梯度
x.grad.zero_()
optimizer.zero_grad() # 训练时
# 梯度控制
x.requires_grad_(False) # 原地修改
x.requires_grad_(True) # 启用梯度
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
torch.nn.utils.clip_grad_value_(model.parameters(), clip_value=0.5)
# 高阶导数
x = torch.tensor([2.0], requires_grad=True)
y = x ** 3
grad1 = torch.autograd.grad(y, x, create_graph=True)[0] # 一阶导
grad2 = torch.autograd.grad(grad1, x)[0] # 二阶导
神经网络模块
import torch.nn as nn
import torch.nn.functional as F
# 常用层
nn.Linear(in_features, out_features)
nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0)
nn.ConvTranspose2d(in_channels, out_channels, kernel_size)
nn.MaxPool2d(kernel_size, stride=None, padding=0)
nn.AvgPool2d(kernel_size)
nn.AdaptiveAvgPool2d(output_size)
nn.BatchNorm1d(num_features)
nn.BatchNorm2d(num_features)
nn.LayerNorm(normalized_shape)
nn.Dropout(p=0.5)
nn.LSTM(input_size, hidden_size, num_layers)
nn.GRU(input_size, hidden_size, num_layers)
nn.Embedding(num_embeddings, embedding_dim)
nn.Transformer(d_model=512, nhead=8)
nn.MultiheadAttention(embed_dim, num_heads)
# 激活函数
nn.ReLU()
nn.ReLU6()
nn.LeakyReLU(negative_slope=0.01)
nn.PReLU()
nn.ELU()
nn.SELU()
nn.GELU()
nn.Sigmoid()
nn.Tanh()
nn.Softmax(dim=1)
nn.LogSoftmax(dim=1)
nn.Softplus()
nn.Softshrink(lambd=0.5)
# 损失函数
nn.MSELoss()
nn.L1Loss()
nn.SmoothL1Loss()
nn.CrossEntropyLoss()
nn.BCELoss()
nn.BCEWithLogitsLoss()
nn.NLLLoss()
nn.KLDivLoss()
nn.CosineSimilarityLoss()
nn.TripletMarginLoss()
nn.CTCLoss()
定义模型
class MyModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.bn1 = nn.BatchNorm1d(hidden_size)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self, x):
x = self.fc1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.dropout(x)
x = self.fc2(x)
return x
# 使用 Sequential
model = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(256, 10)
)
# 带命名
from collections import OrderedDict
model = nn.Sequential(OrderedDict([
('fc1', nn.Linear(784, 256)),
('relu', nn.ReLU()),
('fc2', nn.Linear(256, 10))
]))
模型操作
# 参数统计
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
# 访问参数
for name, param in model.named_parameters():
print(name, param.shape)
# 访问子模块
model.fc1
model.children() # 直接子模块
model.modules() # 所有模块
# 模型设备
model.to('cuda')
model.to('cpu')
model.cuda()
model.cpu()
# 模型状态
model.train() # 训练模式
model.eval() # 评估模式
# 参数初始化
def init_weights(m):
if isinstance(m, nn.Linear):
nn.init.xavier_uniform_(m.weight)
if m.bias is not None:
nn.init.zeros_(m.bias)
model.apply(init_weights)
优化器
import torch.optim as optim
# 常用优化器
optim.SGD(model.parameters(), lr=0.01)
optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)
optim.Adam(model.parameters(), lr=0.001)
optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=1e-4)
optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
optim.RMSprop(model.parameters(), lr=0.01)
optim.Adagrad(model.parameters(), lr=0.01)
# 训练步骤
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 学习率
for param_group in optimizer.param_groups:
print(param_group['lr'])
# 手动调整学习率
for param_group in optimizer.param_groups:
param_group['lr'] = 0.0001
# 参数组
optimizer = optim.Adam([
{'params': model.base.parameters(), 'lr': 1e-4},
{'params': model.classifier.parameters(), 'lr': 1e-3}
])
学习率调度器
from torch.optim import lr_scheduler
# 步骤衰减
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# 多步衰减
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60], gamma=0.1)
# 指数衰减
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.95)
# 余弦退火
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=1e-6)
# 余弦退火重启
scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
# 线性热身
scheduler = lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=10)
# 链式调度器
scheduler = lr_scheduler.SequentialLR(optimizer, [
lr_scheduler.LinearLR(optimizer, start_factor=0.1, total_iters=10),
lr_scheduler.CosineAnnealingLR(optimizer, T_max=90)
], milestones=[10])
# OneCycleLR
scheduler = lr_scheduler.OneCycleLR(
optimizer, max_lr=0.1, epochs=10, steps_per_epoch=len(train_loader)
)
# 使用方式
for epoch in range(epochs):
train(...)
validate(...)
scheduler.step() # 每个epoch后调用
# 或者每个batch后调用(OneCycleLR)
for batch in train_loader:
train_step(...)
scheduler.step()
数据加载
from torch.utils.data import Dataset, DataLoader
# 自定义数据集
class MyDataset(Dataset):
def __init__(self, data, labels, transform=None):
self.data = data
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
x = self.data[idx]
y = self.labels[idx]
if self.transform:
x = self.transform(x)
return x, y
# DataLoader
loader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
num_workers=4,
pin_memory=True,
drop_last=False,
collate_fn=None
)
# 迭代
for batch_x, batch_y in loader:
pass
数据变换
from torchvision import transforms
# 常用变换
transforms.ToTensor() # PIL/numpy -> tensor
transforms.ToPILImage() # tensor -> PIL
transforms.Resize((224, 224)) # 调整大小
transforms.CenterCrop(224) # 中心裁剪
transforms.RandomCrop(224) # 随机裁剪
transforms.RandomResizedCrop(224) # 随机裁剪+缩放
transforms.RandomHorizontalFlip(p=0.5) # 随机水平翻转
transforms.RandomVerticalFlip(p=0.5) # 随机垂直翻转
transforms.RandomRotation(degrees=15) # 随机旋转
transforms.ColorJitter(brightness=0.2, contrast=0.2) # 颜色抖动
transforms.Grayscale(num_output_channels=3) # 灰度化
transforms.RandomGrayscale(p=0.1) # 随机灰度化
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transforms.RandomErasing(p=0.5) # 随机擦除
# 组合变换
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# AutoAugment(自动增强)
transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET)
transforms.RandAugment(num_ops=2, magnitude=9)
transforms.TrivialAugmentWide()
设备管理
# 检查设备
torch.cuda.is_available()
torch.cuda.device_count()
torch.cuda.current_device()
torch.cuda.get_device_name(0)
torch.cuda.memory_allocated()
torch.cuda.memory_reserved()
torch.cuda.max_memory_allocated()
# Apple Silicon GPU
torch.backends.mps.is_available()
torch.backends.mps.is_built()
# 设备指定
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cuda:0')
device = torch.device('mps') # Apple Silicon
# 移动张量
x = x.to(device)
x = x.cuda()
x = x.cpu()
x = x.to('cuda:0')
# 移动模型
model = model.to(device)
# 清理显存
torch.cuda.empty_cache()
torch.cuda.synchronize()
模型保存与加载
# 保存/加载 state_dict(推荐)
torch.save(model.state_dict(), 'model.pth')
model.load_state_dict(torch.load('model.pth'))
# 保存/加载整个模型
torch.save(model, 'model.pth')
model = torch.load('model.pth')
# 保存/加载检查点
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'scheduler_state_dict': scheduler.state_dict(),
'loss': loss,
'best_acc': best_acc
}
torch.save(checkpoint, 'checkpoint.pth')
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
# 跨设备加载
model.load_state_dict(torch.load('model.pth', map_location='cpu'))
model.load_state_dict(torch.load('model.pth', map_location='cuda:0'))
# 加载部分参数
pretrained_dict = torch.load('pretrained.pth')
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
PyTorch 2.x 新特性
torch.compile
# 编译模型(自动优化)
model = torch.compile(model)
# 指定后端
model = torch.compile(model, backend='inductor')
# 编译模式
model = torch.compile(model, mode='default') # 默认
model = torch.compile(model, mode='reduce-overhead') # 减少开销
model = torch.compile(model, mode='max-autotune') # 最大优化
# 部分编译
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.conv = torch.compile(nn.Conv2d(3, 64, 3))
self.fc = nn.Linear(64, 10)
def forward(self, x):
return self.fc(self.conv(x).mean(dim=[2, 3]))
# 禁用编译(调试)
with torch.compiler.disable():
output = model(input)
混合精度训练
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
for data, target in dataloader:
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
with autocast():
output = model(data)
loss = criterion(output, target)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
# PyTorch 2.0+ 更简洁的方式
with torch.autocast(device_type='cuda', dtype=torch.float16):
output = model(input)
loss = criterion(output, target)
梯度检查点
from torch.utils.checkpoint import checkpoint
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.layers = nn.ModuleList([nn.Linear(100, 100) for _ in range(10)])
def forward(self, x):
for layer in self.layers:
# 使用检查点节省显存
x = checkpoint(layer, x, use_reentrant=False)
return x
训练模板
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
# 配置
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
epochs = 100
lr = 0.001
batch_size = 32
# 模型、损失、优化器
model = MyModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=lr)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
# 训练循环
best_acc = 0.0
for epoch in range(epochs):
# 训练
model.train()
train_loss = 0.0
train_correct = 0
train_total = 0
for batch_x, batch_y in train_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
optimizer.zero_grad()
outputs = model(batch_x)
loss = criterion(outputs, batch_y)
loss.backward()
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
optimizer.step()
train_loss += loss.item()
_, predicted = outputs.max(1)
train_total += batch_y.size(0)
train_correct += predicted.eq(batch_y).sum().item()
# 验证
model.eval()
val_correct = 0
val_total = 0
with torch.no_grad():
for batch_x, batch_y in val_loader:
batch_x, batch_y = batch_x.to(device), batch_y.to(device)
outputs = model(batch_x)
_, predicted = outputs.max(1)
val_total += batch_y.size(0)
val_correct += predicted.eq(batch_y).sum().item()
train_acc = 100. * train_correct / train_total
val_acc = 100. * val_correct / val_total
scheduler.step()
# 保存最佳模型
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), 'best_model.pth')
print(f'Epoch {epoch+1}: Train Acc={train_acc:.2f}%, Val Acc={val_acc:.2f}%')
常用技巧
可复现性
# 设置随机种子
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
np.random.seed(42)
import random
random.seed(42)
# 确定性模式
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
梯度累积
accumulation_steps = 4
optimizer.zero_grad()
for i, (batch_x, batch_y) in enumerate(train_loader):
outputs = model(batch_x)
loss = criterion(outputs, batch_y) / accumulation_steps
loss.backward()
if (i + 1) % accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
模型并行
# 单机多 GPU
model = nn.DataParallel(model)
# 指定 GPU
model = nn.DataParallel(model, device_ids=[0, 1, 2, 3])
# DistributedDataParallel(推荐)
from torch.nn.parallel import DistributedDataParallel as DDP
model = DDP(model, device_ids=[local_rank])
自定义层
class CustomLayer(nn.Module):
def __init__(self, in_features, out_features):
super().__init__()
self.weight = nn.Parameter(torch.randn(out_features, in_features))
self.bias = nn.Parameter(torch.zeros(out_features))
def forward(self, x):
return x @ self.weight.t() + self.bias
钩子函数
# 前向钩子
def forward_hook(module, input, output):
print(f'Output shape: {output.shape}')
layer.register_forward_hook(forward_hook)
# 反向钩子
def backward_hook(module, grad_input, grad_output):
print(f'Gradient shape: {grad_output[0].shape}')
layer.register_backward_hook(backward_hook)
# 移除钩子
handle = layer.register_forward_hook(forward_hook)
handle.remove()
TensorBoard 集成
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/experiment_1')
# 记录损失
writer.add_scalar('Loss/train', train_loss, epoch)
writer.add_scalar('Loss/val', val_loss, epoch)
# 记录学习率
writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)
# 记录模型图
writer.add_graph(model, input_to_model=torch.randn(1, 3, 224, 224))
# 记录图像
writer.add_image('input', img_tensor, epoch)
# 记录直方图
writer.add_histogram('layer1_weights', model.layer1.weight, epoch)
writer.close()