🧠 构建神经网络

PyTorch使用nn.Module来构建神经网络。本节将教你如何从零搭建自己的神经网络。

📝 nn.Module基础

所有的神经网络都继承自nn.Module：

import torch
import torch.nn as nn

class SimpleNet(nn.Module):
    def __init__(self):
        super().__init__()  # 必须调用父类初始化
        # 在这里定义网络层
        self.fc1 = nn.Linear(10, 20)  # 全连接层: 10输入 → 20输出
        self.fc2 = nn.Linear(20, 1)   # 全连接层: 20输入 → 1输出
    
    def forward(self, x):
        # 定义前向传播逻辑
        x = self.fc1(x)
        x = torch.relu(x)  # 激活函数
        x = self.fc2(x)
        return x

# 创建模型实例
model = SimpleNet()
print(model)

输出：

SimpleNet(
  (fc1): Linear(in_features=10, out_features=20, bias=True)
  (fc2): Linear(in_features=20, out_features=1, bias=True)
)

使用模型

# 创建输入数据（batch_size=5, features=10）
x = torch.randn(5, 10)

# 前向传播（自动调用forward方法）
output = model(x)
print(output.shape)  # torch.Size([5, 1])

🧱 常用网络层

线性层 (全连接层)

import torch.nn as nn

# Linear(输入特征数, 输出特征数)
fc = nn.Linear(100, 50)

x = torch.randn(32, 100)  # 32个样本，100个特征
y = fc(x)
print(y.shape)  # torch.Size([32, 50])

激活函数

激活函数引入非线性，让网络能学习复杂模式：

import torch
import torch.nn as nn
import torch.nn.functional as F

x = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])

# ReLU: max(0, x)
print(torch.relu(x))         # tensor([0., 0., 0., 1., 2.])
print(F.relu(x))             # 函数式写法
print(nn.ReLU()(x))          # 模块式写法

# Sigmoid: 1/(1+e^(-x))，输出在(0,1)之间
print(torch.sigmoid(x))      # tensor([0.1192, 0.2689, 0.5000, 0.7311, 0.8808])

# Tanh: 输出在(-1,1)之间
print(torch.tanh(x))         # tensor([-0.9640, -0.7616, 0.0000, 0.7616, 0.9640])

# Softmax: 将输出转为概率分布（和为1）
logits = torch.tensor([1.0, 2.0, 3.0])
print(F.softmax(logits, dim=0))  # tensor([0.0900, 0.2447, 0.6652])

💡 如何选择激活函数？

隐藏层：首选ReLU，简单高效
二分类输出：Sigmoid（输出概率）
多分类输出：Softmax（输出概率分布）
回归输出：不用激活函数（直接输出）

Dropout层（防止过拟合）

import torch.nn as nn

# Dropout随机将一部分神经元置为0
dropout = nn.Dropout(p=0.5)  # 50%的概率被置0

x = torch.randn(5, 10)
print(x)

# 训练时启用Dropout
model.train()
y = dropout(x)
print(y)  # 部分值为0

# 评估时禁用Dropout
model.eval()
y = dropout(x)
print(y)  # 所有值都保留

批归一化层 (BatchNorm)

import torch.nn as nn

# 对每个特征做归一化
bn = nn.BatchNorm1d(100)  # 100个特征

x = torch.randn(32, 100)
y = bn(x)
print(y.mean(dim=0).mean())  # 接近0
print(y.std(dim=0).mean())   # 接近1

🔧 构建网络的方式

方式1：继承nn.Module（最灵活）

import torch
import torch.nn as nn

class MLP(nn.Module):
    """多层感知机"""
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = torch.relu(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = torch.relu(x)
        x = self.dropout(x)
        
        x = self.fc3(x)
        return x

model = MLP(input_size=784, hidden_size=256, output_size=10)
print(model)

方式2：nn.Sequential（简单情况）

import torch.nn as nn

# 按顺序堆叠层
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.5),
    
    nn.Linear(256, 256),
    nn.BatchNorm1d(256),
    nn.ReLU(),
    nn.Dropout(0.5),
    
    nn.Linear(256, 10)
)

print(model)

# 使用
x = torch.randn(32, 784)
y = model(x)
print(y.shape)  # torch.Size([32, 10])

方式3：nn.ModuleList（动态网络）

import torch.nn as nn

class DynamicMLP(nn.Module):
    def __init__(self, layer_sizes):
        super().__init__()
        self.layers = nn.ModuleList()
        
        for i in range(len(layer_sizes) - 1):
            self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
    
    def forward(self, x):
        for i, layer in enumerate(self.layers[:-1]):
            x = torch.relu(layer(x))
        x = self.layers[-1](x)  # 最后一层不加激活
        return x

# 创建任意层数的网络
model = DynamicMLP([784, 512, 256, 128, 10])
print(model)

📊 查看模型信息

查看参数

model = MLP(784, 256, 10)

# 查看所有参数
for name, param in model.named_parameters():
    print(f"{name}: {param.shape}")

# 计算总参数量
total_params = sum(p.numel() for p in model.parameters())
print(f"总参数量: {total_params:,}")

# 可训练参数量
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"可训练参数量: {trainable_params:,}")

使用torchsummary（可选）

pip install torchsummary

from torchsummary import summary

model = MLP(784, 256, 10)
summary(model, input_size=(784,))

🎯 实战：构建手写数字分类网络

import torch
import torch.nn as nn

class DigitClassifier(nn.Module):
    """
    手写数字分类网络
    输入: 28x28的灰度图片 (展平为784维向量)
    输出: 10个类别的概率
    """
    def __init__(self):
        super().__init__()
        
        # 特征提取层
        self.features = nn.Sequential(
            nn.Linear(784, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),
            
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),
        )
        
        # 分类器
        self.classifier = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )
    
    def forward(self, x):
        # 展平图片
        x = x.view(x.size(0), -1)  # (batch, 1, 28, 28) → (batch, 784)
        
        # 提取特征
        x = self.features(x)
        
        # 分类
        x = self.classifier(x)
        
        return x  # 返回logits，不需要softmax

# 创建模型
model = DigitClassifier()

# 测试
dummy_input = torch.randn(32, 1, 28, 28)  # 32张28x28的图片
output = model(dummy_input)
print(f"输出形状: {output.shape}")  # torch.Size([32, 10])

# 预测类别
predictions = output.argmax(dim=1)
print(f"预测类别: {predictions}")

📝 模型设计建议

网络深度

从简单开始，逐渐增加层数
更深的网络不一定更好

宽度（每层神经元数）

通常第一层最宽，逐层变窄
如：784 → 512 → 256 → 128 → 10

正则化

使用Dropout防止过拟合（一般0.3-0.5）
使用BatchNorm加速训练

激活函数

隐藏层用ReLU（简单高效）
输出层根据任务选择

🏋️ 练习

import torch
import torch.nn as nn

# 练习1：创建一个用于二分类的神经网络
# 输入: 20维特征
# 结构: 20 → 64 → 32 → 1
# 要求: 使用ReLU激活，最后输出Sigmoid
# 你的代码：


# 练习2：使用nn.Sequential创建同样的网络
# 你的代码：


# 练习3：统计练习1中模型的参数量
# 你的代码：

点击查看答案

import torch
import torch.nn as nn

# 练习1
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(20, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

model1 = BinaryClassifier()

# 练习2
model2 = nn.Sequential(
    nn.Linear(20, 64),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Linear(32, 1),
    nn.Sigmoid()
)

# 练习3
total_params = sum(p.numel() for p in model1.parameters())
print(f"总参数量: {total_params}")  # 20*64+64 + 64*32+32 + 32*1+1 = 3457

下一步

学会了构建网络，接下来让我们学习如何训练模型！