🎓 迁移学习
迁移学习(Transfer Learning) 是深度学习中最实用的技术之一:使用在大数据集上预训练好的模型,然后在自己的小数据集上微调。
🤔 为什么用迁移学习?
| 从头训练 | 迁移学习 |
|---|---|
| 需要大量数据 | 少量数据即可 |
| 训练时间长 | 训练快 |
| 需要大量GPU资源 | 普通GPU即可 |
| 效果难以保证 | 效果有保障 |
💡 类比
就像学习开不同品牌的车:
- 从头学习:完全不会开车,需要很长时间
- 迁移学习:已经会开车,只需要适应新车的特点
📦 PyTorch预训练模型
加载预训练模型
import torchvision.models as models
# 图像分类模型
resnet50 = models.resnet50(weights='IMAGENET1K_V1') # 推荐新写法
vgg16 = models.vgg16(weights='IMAGENET1K_V1')
efficientnet = models.efficientnet_b0(weights='IMAGENET1K_V1')
# 旧写法(仍然可用)
# resnet50 = models.resnet50(pretrained=True)
print(resnet50)
常用预训练模型
| 模型 | 参数量 | Top-1准确率 | 速度 |
|---|---|---|---|
| ResNet-18 | 11M | 69.8% | 快 |
| ResNet-50 | 25M | 76.1% | 中 |
| VGG-16 | 138M | 71.6% | 慢 |
| EfficientNet-B0 | 5M | 77.1% | 快 |
| EfficientNet-B7 | 66M | 84.3% | 慢 |
🔧 迁移学习策略
策略1:特征提取(冻结所有层)
只训练最后的分类层:
import torch
import torch.nn as nn
import torchvision.models as models
# 加载预训练模型
model = models.resnet50(weights='IMAGENET1K_V1')
# 冻结所有参数
for param in model.parameters():
param.requires_grad = False
# 替换最后的分类层
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 10) # 10个类别
# 只有model.fc的参数会被训练
print(f"可训练参数: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
策略2:微调(解冻部分层)
解冻最后几层一起训练:
import torch
import torch.nn as nn
import torchvision.models as models
model = models.resnet50(weights='IMAGENET1K_V1')
# 先冻结所有层
for param in model.parameters():
param.requires_grad = False
# 替换分类层
model.fc = nn.Linear(model.fc.in_features, 10)
# 解冻最后几层(layer4)
for param in model.layer4.parameters():
param.requires_grad = True
# 检查可训练参数
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"可训练参数: {trainable:,} / {total:,} ({100*trainable/total:.1f}%)")
策略3:全部微调
解冻所有层,用小学习率训练:
import torch.optim as optim
model = models.resnet50(weights='IMAGENET1K_V1')
model.fc = nn.Linear(model.fc.in_features, 10)
# 不同层使用不同学习率
optimizer = optim.Adam([
{'params': model.conv1.parameters(), 'lr': 1e-5},
{'params': model.layer1.parameters(), 'lr': 1e-5},
{'params': model.layer2.parameters(), 'lr': 1e-5},
{'params': model.layer3.parameters(), 'lr': 1e-4},
{'params': model.layer4.parameters(), 'lr': 1e-4},
{'params': model.fc.parameters(), 'lr': 1e-3}, # 新层学习率大
])
🎯 完整迁移学习示例
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
# 设备
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 数据预处理(使用ImageNet的标准化参数)
train_transform = transforms.Compose([
transforms.Resize(256),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
val_transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 加载数据(以CIFAR-10为例)
train_dataset = datasets.CIFAR10('./data', train=True, download=True, transform=train_transform)
val_dataset = datasets.CIFAR10('./data', train=False, transform=val_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
# 加载预训练模型
model = models.resnet18(weights='IMAGENET1K_V1')
# 冻结前面的层
for name, param in model.named_parameters():
if 'layer4' not in name and 'fc' not in name:
param.requires_grad = False
# 修改分类层
model.fc = nn.Sequential(
nn.Dropout(0.5),
nn.Linear(model.fc.in_features, 10)
)
model = model.to(device)
# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
# 训练
def train_epoch(model, loader, criterion, optimizer):
model.train()
total_loss, correct, total = 0, 0, 0
for data, labels in loader:
data, labels = data.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(data)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
_, predicted = outputs.max(1)
correct += predicted.eq(labels).sum().item()
total += labels.size(0)
return total_loss / len(loader), 100. * correct / total
def validate(model, loader, criterion):
model.eval()
total_loss, correct, total = 0, 0, 0
with torch.no_grad():
for data, labels in loader:
data, labels = data.to(device), labels.to(device)
outputs = model(data)
loss = criterion(outputs, labels)
total_loss += loss.item()
_, predicted = outputs.max(1)
correct += predicted.eq(labels).sum().item()
total += labels.size(0)
return total_loss / len(loader), 100. * correct / total
# 训练循环
best_acc = 0
for epoch in range(20):
train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
val_loss, val_acc = validate(model, val_loader, criterion)
scheduler.step()
print(f"Epoch {epoch+1}: "
f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}% | "
f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), 'best_model.pth')
print(f"最佳验证准确率: {best_acc:.2f}%")
🖼️ 特征提取器
将预训练模型作为特征提取器:
import torch
import torch.nn as nn
import torchvision.models as models
class FeatureExtractor(nn.Module):
def __init__(self):
super().__init__()
# 加载预训练ResNet
resnet = models.resnet50(weights='IMAGENET1K_V1')
# 去掉最后的分类层
self.features = nn.Sequential(*list(resnet.children())[:-1])
# 冻结
for param in self.features.parameters():
param.requires_grad = False
def forward(self, x):
x = self.features(x)
x = x.view(x.size(0), -1) # 展平
return x
# 使用
extractor = FeatureExtractor()
extractor.eval()
# 提取特征
images = torch.randn(10, 3, 224, 224)
with torch.no_grad():
features = extractor(images)
print(f"特征形状: {features.shape}") # [10, 2048]
# 可以用这些特征训练简单的分类器
classifier = nn.Sequential(
nn.Linear(2048, 256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 10)
)
📝 迁移学习最佳实践
1. 数据预处理要一致
# 必须使用与预训练时相同的标准化参数
normalize = transforms.Normalize(
mean=[0.485, 0.456, 0.406], # ImageNet均值
std=[0.229, 0.224, 0.225] # ImageNet标准差
)
2. 学习率设置
# 预训练层用小学习率,新层用大学习率
optimizer = optim.SGD([
{'params': model.features.parameters(), 'lr': 1e-4},
{'params': model.classifier.parameters(), 'lr': 1e-2}
], momentum=0.9)
3. 分阶段训练
# 阶段1:只训练新层
for param in model.features.parameters():
param.requires_grad = False
# 训练几个epoch...
# 阶段2:解冻所有层,用小学习率微调
for param in model.parameters():
param.requires_grad = True
# 继续训练...
4. 根据数据量选择策略
| 数据量 | 与原数据相似度 | 推荐策略 |
|---|---|---|
| 少 | 高 | 只训练分类层 |
| 少 | 低 | 只训练分类层(可能效果差) |
| 多 | 高 | 微调全部层 |
| 多 | 低 | 微调全部层或从头训练 |
🏋️ 练习
# 练习:使用预训练的VGG16进行猫狗分类
# 要求:
# 1. 加载VGG16预训练模型
# 2. 冻结特征提取部分
# 3. 替换分类器(输出2类)
# 4. 设置优化器只更新分类器参数
# 你的代码:
点击查看答案
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
# 1. 加载VGG16
model = models.vgg16(weights='IMAGENET1K_V1')
# 2. 冻结特征提取部分
for param in model.features.parameters():
param.requires_grad = False
# 3. 替换分类器
model.classifier = nn.Sequential(
nn.Linear(25088, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 512),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(512, 2) # 2类
)
# 4. 只优化分类器
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)
# 打印可训练参数
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"可训练参数: {trainable:,}")
下一步
最后,让我们学习如何创建自定义层!