PyTorch学习指南
首页
基础篇
进阶篇
高级篇
实战项目
🚀 编程指南
首页
基础篇
进阶篇
高级篇
实战项目
🚀 编程指南
  • 💡 实战项目

    • 💡 实战项目
    • ✍️ MNIST手写数字识别
    • 🖼️ 图像分类器
    • 💬 情感分析

✍️ MNIST手写数字识别

这是深度学习的"Hello World"项目!我们将构建一个能识别手写数字的模型。

🎯 项目目标

  • 识别0-9的手写数字
  • 准确率达到98%以上
  • 掌握完整的深度学习训练流程

📊 数据集介绍

MNIST数据集包含:

  • 60,000张训练图片
  • 10,000张测试图片
  • 图片大小:28×28像素
  • 灰度图(单通道)
样本示例:
┌────────────────────────┐
│    ▓▓▓▓▓▓              │
│      ▓▓▓▓              │
│        ▓▓              │
│        ▓▓              │
│        ▓▓              │  → 标签: 1
│        ▓▓              │
│        ▓▓              │
│        ▓▓              │
│      ▓▓▓▓▓▓            │
└────────────────────────┘

📝 完整代码

方案一:全连接网络(入门)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

# ==================== 配置 ====================
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"使用设备: {DEVICE}")

# ==================== 数据加载 ====================
# 数据预处理
transform = transforms.Compose([
    transforms.ToTensor(),  # 转为张量,值从[0,255]变为[0,1]
    transforms.Normalize((0.1307,), (0.3081,))  # MNIST的均值和标准差
])

# 下载并加载数据
train_dataset = datasets.MNIST(
    root='./data', 
    train=True, 
    download=True, 
    transform=transform
)
test_dataset = datasets.MNIST(
    root='./data', 
    train=False, 
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"训练集: {len(train_dataset)} 样本")
print(f"测试集: {len(test_dataset)} 样本")

# ==================== 可视化样本 ====================
def show_samples(dataset, num=10):
    """显示数据集中的样本"""
    fig, axes = plt.subplots(1, num, figsize=(12, 2))
    for i in range(num):
        image, label = dataset[i]
        axes[i].imshow(image.squeeze(), cmap='gray')
        axes[i].set_title(f'{label}')
        axes[i].axis('off')
    plt.tight_layout()
    plt.savefig('mnist_samples.png')
    plt.show()

# show_samples(train_dataset)

# ==================== 定义模型 ====================
class MLP(nn.Module):
    """多层感知机"""
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.layers = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        return self.layers(x)

model = MLP().to(DEVICE)
print(model)

# 统计参数量
total_params = sum(p.numel() for p in model.parameters())
print(f"总参数量: {total_params:,}")

# ==================== 训练配置 ====================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# ==================== 训练函数 ====================
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (data, target) in enumerate(loader):
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        total += target.size(0)
    
    return total_loss / len(loader), 100. * correct / total

# ==================== 测试函数 ====================
def test(model, loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)
            
            total_loss += loss.item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
            total += target.size(0)
    
    return total_loss / len(loader), 100. * correct / total

# ==================== 训练循环 ====================
history = {'train_loss': [], 'train_acc': [], 'test_loss': [], 'test_acc': []}

for epoch in range(EPOCHS):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, DEVICE)
    test_loss, test_acc = test(model, test_loader, criterion, DEVICE)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['test_loss'].append(test_loss)
    history['test_acc'].append(test_acc)
    
    print(f"Epoch {epoch+1}/{EPOCHS}")
    print(f"  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"  Test  Loss: {test_loss:.4f}, Test  Acc: {test_acc:.2f}%")

print(f"\n最终测试准确率: {history['test_acc'][-1]:.2f}%")

# ==================== 保存模型 ====================
torch.save(model.state_dict(), 'mnist_mlp.pth')
print("模型已保存到 mnist_mlp.pth")

# ==================== 可视化训练过程 ====================
def plot_history(history):
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
    
    # 损失曲线
    axes[0].plot(history['train_loss'], label='Train')
    axes[0].plot(history['test_loss'], label='Test')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Loss Curve')
    axes[0].legend()
    
    # 准确率曲线
    axes[1].plot(history['train_acc'], label='Train')
    axes[1].plot(history['test_acc'], label='Test')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Accuracy (%)')
    axes[1].set_title('Accuracy Curve')
    axes[1].legend()
    
    plt.tight_layout()
    plt.savefig('training_history.png')
    plt.show()

# plot_history(history)

方案二:CNN(更高准确率)

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# ==================== 配置 ====================
BATCH_SIZE = 128
EPOCHS = 15
LEARNING_RATE = 0.001
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ==================== 数据加载 ====================
transform_train = transforms.Compose([
    transforms.RandomRotation(10),  # 数据增强:随机旋转
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.MNIST('./data', train=False, transform=transform_test)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

# ==================== CNN模型 ====================
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        # 卷积层
        self.conv_layers = nn.Sequential(
            # 第1层: 1→32通道
            nn.Conv2d(1, 32, kernel_size=3, padding=1),  # 28x28
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 14x14
            nn.Dropout(0.25),
            
            # 第2层: 32→64通道
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),  # 7x7
            nn.Dropout(0.25),
        )
        
        # 全连接层
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model = CNN().to(DEVICE)
print(model)

# ==================== 训练 ====================
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

best_acc = 0

for epoch in range(EPOCHS):
    # 训练
    model.train()
    train_loss = 0
    train_correct = 0
    train_total = 0
    
    for data, target in train_loader:
        data, target = data.to(DEVICE), target.to(DEVICE)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        pred = output.argmax(dim=1)
        train_correct += pred.eq(target).sum().item()
        train_total += target.size(0)
    
    # 测试
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(DEVICE), target.to(DEVICE)
            output = model(data)
            pred = output.argmax(dim=1)
            test_correct += pred.eq(target).sum().item()
            test_total += target.size(0)
    
    train_acc = 100. * train_correct / train_total
    test_acc = 100. * test_correct / test_total
    
    print(f"Epoch {epoch+1}/{EPOCHS}: "
          f"Train Acc: {train_acc:.2f}%, Test Acc: {test_acc:.2f}%")
    
    # 保存最佳模型
    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), 'mnist_cnn_best.pth')
    
    scheduler.step()

print(f"\n最佳测试准确率: {best_acc:.2f}%")

🔍 预测示例

import torch
from PIL import Image
import torchvision.transforms as transforms

# 加载模型
model = CNN()
model.load_state_dict(torch.load('mnist_cnn_best.pth'))
model.eval()

# 预测函数
def predict(image_tensor):
    with torch.no_grad():
        output = model(image_tensor.unsqueeze(0))
        prob = torch.softmax(output, dim=1)
        pred = output.argmax(dim=1).item()
        confidence = prob[0][pred].item()
    return pred, confidence

# 从测试集预测
test_dataset = datasets.MNIST('./data', train=False, transform=transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
]))

# 预测前10个样本
for i in range(10):
    image, true_label = test_dataset[i]
    pred, conf = predict(image)
    print(f"样本{i}: 预测={pred}, 真实={true_label}, 置信度={conf:.2%}")

📈 优化建议

1. 提高准确率

  • 增加数据增强(旋转、缩放、平移)
  • 使用更深的网络
  • 使用学习率调度
  • 增加训练轮次

2. 加快训练

  • 使用GPU
  • 增大batch_size
  • 使用混合精度训练

3. 减少过拟合

  • 增加Dropout
  • 使用早停
  • 使用正则化(权重衰减)

❓ 常见问题

Q: 为什么我的准确率只有90%多?

A: 检查以下几点:

  1. 是否使用了数据标准化
  2. 学习率是否合适
  3. 是否训练了足够多的轮次

Q: 如何识别自己手写的数字?

A:

  1. 拍照或截图
  2. 转为灰度图并调整为28×28
  3. 标准化后输入模型

下一个项目

准备好了吗?让我们挑战更复杂的图像分类器!

上次更新: 2025/11/25 18:38
Prev
💡 实战项目
Next
🖼️ 图像分类器