PyTorch 速查表
本文档提供 PyTorch 常用 API 的快速参考。
张量创建
import torch
# 从数据创建
torch.tensor([1, 2, 3])
torch.tensor([[1, 2], [3, 4]], dtype=torch.float32)
# 工厂函数
torch.zeros(2, 3) # 全零
torch.ones(2, 3) # 全一
torch.empty(2, 3) # 未初始化
torch.full((2, 3), 7) # 填充值
torch.eye(3) # 单位矩阵
# 随机张量
torch.rand(2, 3) # 均匀分布 [0, 1)
torch.randn(2, 3) # 标准正态分布
torch.randint(0, 10, (2, 3)) # 随机整数
# 序列
torch.arange(0, 10, 2) # [0, 2, 4, 6, 8]
torch.linspace(0, 1, 5) # 等间隔
torch.logspace(0, 2, 5) # 对数间隔
# 基于现有张量
torch.zeros_like(x)
torch.ones_like(x)
torch.rand_like(x, dtype=torch.float)
# NumPy 互转
torch.from_numpy(np_array)
tensor.numpy()
张量属性
x.shape # 形状
x.size() # 形状
x.dim() # 维度数
x.numel() # 元素总数
x.dtype # 数据类型
x.device # 设备
x.is_leaf # 是否叶子节点
x.requires_grad # 是否需要梯度
数据类型
# 类型转换
x.float() # torch.float32
x.double() # torch.float64
x.half() # torch.float16
x.int() # torch.int32
x.long() # torch.int64
x.short() # torch.int16
x.char() # torch.int8
x.byte() # torch.uint8
x.bool() # torch.bool
x.to(torch.float32)
x.to(dtype=torch.float64)
索引与切片
x[0, 1] # 单个元素
x[0] # 第一行
x[:, 0] # 第一列
x[0:2, 1:3] # 切片
x[::2, ::2] # 步长
# 布尔索引
x[x > 0]
x[torch.rand(3) > 0.5]
# 高级索引
x[[0, 2], [1, 2]] # (0,1) 和 (2,2)
torch.index_select(x, dim=0, index=torch.tensor([0, 2]))
torch.masked_select(x, mask)
# 修改
x[0, 0] = 1
x[:, 0] = torch.tensor([1, 2, 3])
x[x < 0] = 0
形状操作
x.reshape(2, 3) # 改变形状
x.view(2, 3) # 改变形状(需连续)
x.flatten() # 展平
x.flatten(1) # 从第1维展平
x.unsqueeze(0) # 增加维度
x.squeeze() # 移除大小为1的维度
x.squeeze(0) # 移除指定维度
x.transpose(0, 1) # 交换两个维度
x.permute(2, 0, 1) # 重新排列维度
x.t() # 转置(仅2D)
x.contiguous() # 使内存连续
拼接与分割
torch.cat([x, y], dim=0) # 拼接
torch.stack([x, y], dim=0) # 堆叠(新增维度)
torch.split(x, 3, dim=0) # 分割
torch.chunk(x, 2, dim=0) # 均匀分割
数学运算
# 逐元素运算
x + y, x - y, x * y, x / y
x ** 2, torch.sqrt(x)
torch.exp(x), torch.log(x)
torch.abs(x), torch.neg(x)
torch.sin(x), torch.cos(x)
# 聚合运算
torch.sum(x), x.sum()
torch.mean(x), x.mean()
torch.max(x), x.max()
torch.min(x), x.min()
torch.std(x), torch.var(x)
torch.prod(x)
torch.norm(x)
# 按维度聚合
x.sum(dim=0)
x.mean(dim=1, keepdim=True)
x.max(dim=1) # 返回 (values, indices)
# 矩阵运算
torch.matmul(x, y) # 矩阵乘法
x @ y # 矩阵乘法
x.mm(y) # 2D矩阵乘法
x.bmm(y) # 批量矩阵乘法
torch.inverse(x) # 逆矩阵
torch.det(x) # 行列式
自动求导
# 创建需要梯度的张量
x = torch.tensor([1.0], requires_grad=True)
# 运算
y = x ** 2
# 反向传播
y.backward()
x.grad # 梯度
# 禁用梯度
with torch.no_grad():
y = x * 2
# 分离张量
z = y.detach()
# 清零梯度
x.grad.zero_()
# 梯度裁剪
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
神经网络模块
import torch.nn as nn
import torch.nn.functional as F
# 常用层
nn.Linear(in_features, out_features)
nn.Conv2d(in_channels, out_channels, kernel_size)
nn.MaxPool2d(kernel_size)
nn.BatchNorm2d(num_features)
nn.Dropout(p=0.5)
nn.LSTM(input_size, hidden_size)
nn.Embedding(num_embeddings, embedding_dim)
# 激活函数
nn.ReLU()
nn.Sigmoid()
nn.Tanh()
nn.Softmax(dim=1)
nn.LeakyReLU(negative_slope=0.01)
# 损失函数
nn.MSELoss()
nn.CrossEntropyLoss()
nn.BCELoss()
nn.NLLLoss()
# 定义模型
class MyModel(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(10, 20)
self.fc2 = nn.Linear(20, 1)
def forward(self, x):
x = F.relu(self.fc1(x))
x = self.fc2(x)
return x
优化器
import torch.optim as optim
# 常用优化器
optim.SGD(model.parameters(), lr=0.01)
optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
optim.Adam(model.parameters(), lr=0.001)
optim.AdamW(model.parameters(), lr=0.001)
optim.RMSprop(model.parameters(), lr=0.01)
# 训练循环
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 学习率调度
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
scheduler.step()
数据加载
from torch.utils.data import Dataset, DataLoader
# 自定义数据集
class MyDataset(Dataset):
def __init__(self, data, labels):
self.data = data
self.labels = labels
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.labels[idx]
# 数据加载器
dataloader = DataLoader(
dataset,
batch_size=32,
shuffle=True,
num_workers=4,
drop_last=False
)
# 遍历
for batch_data, batch_labels in dataloader:
pass
设备管理
# 检查 GPU
torch.cuda.is_available()
torch.cuda.device_count()
# 设备指定
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = x.to(device)
model = model.to(device)
# 张量移动
x.cuda()
x.cpu()
x.to('cuda:0')
x.to('cpu')
# 设备信息
torch.cuda.current_device()
torch.cuda.get_device_name(0)
torch.cuda.memory_allocated()
模型保存与加载
# 保存模型
torch.save(model.state_dict(), 'model.pth')
torch.save({
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'epoch': epoch,
}, 'checkpoint.pth')
# 加载模型
model.load_state_dict(torch.load('model.pth'))
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# 保存整个模型
torch.save(model, 'model_full.pth')
model = torch.load('model_full.pth')
训练模板
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MyModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
for epoch in range(num_epochs):
model.train()
for batch_data, batch_labels in train_loader:
batch_data = batch_data.to(device)
batch_labels = batch_labels.to(device)
optimizer.zero_grad()
outputs = model(batch_data)
loss = criterion(outputs, batch_labels)
loss.backward()
optimizer.step()
model.eval()
with torch.no_grad():
total_correct = 0
total_samples = 0
for batch_data, batch_labels in val_loader:
batch_data = batch_data.to(device)
batch_labels = batch_labels.to(device)
outputs = model(batch_data)
_, predicted = torch.max(outputs, 1)
total_correct += (predicted == batch_labels).sum().item()
total_samples += batch_labels.size(0)
accuracy = total_correct / total_samples
print(f'Epoch {epoch}, Accuracy: {accuracy:.4f}')
常用技巧
# 设置随机种子
torch.manual_seed(42)
torch.cuda.manual_seed_all(42)
# 梯度检查
torch.autograd.set_detect_anomaly(True)
# 模型参数统计
sum(p.numel() for p in model.parameters())
sum(p.numel() for p in model.parameters() if p.requires_grad)
# 模型结构打印
print(model)
# 梯度检查点
from torch.utils.checkpoint import checkpoint
# 混合精度训练
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
with autocast():
outputs = model(inputs)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()