🧠 构建神经网络
PyTorch使用nn.Module来构建神经网络。本节将教你如何从零搭建自己的神经网络。
📝 nn.Module基础
所有的神经网络都继承自nn.Module:
import torch
import torch.nn as nn
class SimpleNet(nn.Module):
def __init__(self):
super().__init__() # 必须调用父类初始化
# 在这里定义网络层
self.fc1 = nn.Linear(10, 20) # 全连接层: 10输入 → 20输出
self.fc2 = nn.Linear(20, 1) # 全连接层: 20输入 → 1输出
def forward(self, x):
# 定义前向传播逻辑
x = self.fc1(x)
x = torch.relu(x) # 激活函数
x = self.fc2(x)
return x
# 创建模型实例
model = SimpleNet()
print(model)
输出:
SimpleNet(
(fc1): Linear(in_features=10, out_features=20, bias=True)
(fc2): Linear(in_features=20, out_features=1, bias=True)
)
使用模型
# 创建输入数据(batch_size=5, features=10)
x = torch.randn(5, 10)
# 前向传播(自动调用forward方法)
output = model(x)
print(output.shape) # torch.Size([5, 1])
🧱 常用网络层
线性层 (全连接层)
import torch.nn as nn
# Linear(输入特征数, 输出特征数)
fc = nn.Linear(100, 50)
x = torch.randn(32, 100) # 32个样本,100个特征
y = fc(x)
print(y.shape) # torch.Size([32, 50])
激活函数
激活函数引入非线性,让网络能学习复杂模式:
import torch
import torch.nn as nn
import torch.nn.functional as F
x = torch.tensor([-2.0, -1.0, 0.0, 1.0, 2.0])
# ReLU: max(0, x)
print(torch.relu(x)) # tensor([0., 0., 0., 1., 2.])
print(F.relu(x)) # 函数式写法
print(nn.ReLU()(x)) # 模块式写法
# Sigmoid: 1/(1+e^(-x)),输出在(0,1)之间
print(torch.sigmoid(x)) # tensor([0.1192, 0.2689, 0.5000, 0.7311, 0.8808])
# Tanh: 输出在(-1,1)之间
print(torch.tanh(x)) # tensor([-0.9640, -0.7616, 0.0000, 0.7616, 0.9640])
# Softmax: 将输出转为概率分布(和为1)
logits = torch.tensor([1.0, 2.0, 3.0])
print(F.softmax(logits, dim=0)) # tensor([0.0900, 0.2447, 0.6652])
💡 如何选择激活函数?
- 隐藏层:首选ReLU,简单高效
- 二分类输出:Sigmoid(输出概率)
- 多分类输出:Softmax(输出概率分布)
- 回归输出:不用激活函数(直接输出)
Dropout层(防止过拟合)
import torch.nn as nn
# Dropout随机将一部分神经元置为0
dropout = nn.Dropout(p=0.5) # 50%的概率被置0
x = torch.randn(5, 10)
print(x)
# 训练时启用Dropout
model.train()
y = dropout(x)
print(y) # 部分值为0
# 评估时禁用Dropout
model.eval()
y = dropout(x)
print(y) # 所有值都保留
批归一化层 (BatchNorm)
import torch.nn as nn
# 对每个特征做归一化
bn = nn.BatchNorm1d(100) # 100个特征
x = torch.randn(32, 100)
y = bn(x)
print(y.mean(dim=0).mean()) # 接近0
print(y.std(dim=0).mean()) # 接近1
🔧 构建网络的方式
方式1:继承nn.Module(最灵活)
import torch
import torch.nn as nn
class MLP(nn.Module):
"""多层感知机"""
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.bn1 = nn.BatchNorm1d(hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.bn2 = nn.BatchNorm1d(hidden_size)
self.fc3 = nn.Linear(hidden_size, output_size)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.fc1(x)
x = self.bn1(x)
x = torch.relu(x)
x = self.dropout(x)
x = self.fc2(x)
x = self.bn2(x)
x = torch.relu(x)
x = self.dropout(x)
x = self.fc3(x)
return x
model = MLP(input_size=784, hidden_size=256, output_size=10)
print(model)
方式2:nn.Sequential(简单情况)
import torch.nn as nn
# 按顺序堆叠层
model = nn.Sequential(
nn.Linear(784, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(256, 10)
)
print(model)
# 使用
x = torch.randn(32, 784)
y = model(x)
print(y.shape) # torch.Size([32, 10])
方式3:nn.ModuleList(动态网络)
import torch.nn as nn
class DynamicMLP(nn.Module):
def __init__(self, layer_sizes):
super().__init__()
self.layers = nn.ModuleList()
for i in range(len(layer_sizes) - 1):
self.layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
def forward(self, x):
for i, layer in enumerate(self.layers[:-1]):
x = torch.relu(layer(x))
x = self.layers[-1](x) # 最后一层不加激活
return x
# 创建任意层数的网络
model = DynamicMLP([784, 512, 256, 128, 10])
print(model)
📊 查看模型信息
查看参数
model = MLP(784, 256, 10)
# 查看所有参数
for name, param in model.named_parameters():
print(f"{name}: {param.shape}")
# 计算总参数量
total_params = sum(p.numel() for p in model.parameters())
print(f"总参数量: {total_params:,}")
# 可训练参数量
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"可训练参数量: {trainable_params:,}")
使用torchsummary(可选)
pip install torchsummary
from torchsummary import summary
model = MLP(784, 256, 10)
summary(model, input_size=(784,))
🎯 实战:构建手写数字分类网络
import torch
import torch.nn as nn
class DigitClassifier(nn.Module):
"""
手写数字分类网络
输入: 28x28的灰度图片 (展平为784维向量)
输出: 10个类别的概率
"""
def __init__(self):
super().__init__()
# 特征提取层
self.features = nn.Sequential(
nn.Linear(784, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(0.3),
nn.Linear(512, 256),
nn.BatchNorm1d(256),
nn.ReLU(),
nn.Dropout(0.3),
)
# 分类器
self.classifier = nn.Sequential(
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 10)
)
def forward(self, x):
# 展平图片
x = x.view(x.size(0), -1) # (batch, 1, 28, 28) → (batch, 784)
# 提取特征
x = self.features(x)
# 分类
x = self.classifier(x)
return x # 返回logits,不需要softmax
# 创建模型
model = DigitClassifier()
# 测试
dummy_input = torch.randn(32, 1, 28, 28) # 32张28x28的图片
output = model(dummy_input)
print(f"输出形状: {output.shape}") # torch.Size([32, 10])
# 预测类别
predictions = output.argmax(dim=1)
print(f"预测类别: {predictions}")
📝 模型设计建议
网络深度
- 从简单开始,逐渐增加层数
- 更深的网络不一定更好
宽度(每层神经元数)
- 通常第一层最宽,逐层变窄
- 如:784 → 512 → 256 → 128 → 10
正则化
- 使用Dropout防止过拟合(一般0.3-0.5)
- 使用BatchNorm加速训练
激活函数
- 隐藏层用ReLU(简单高效)
- 输出层根据任务选择
🏋️ 练习
import torch
import torch.nn as nn
# 练习1:创建一个用于二分类的神经网络
# 输入: 20维特征
# 结构: 20 → 64 → 32 → 1
# 要求: 使用ReLU激活,最后输出Sigmoid
# 你的代码:
# 练习2:使用nn.Sequential创建同样的网络
# 你的代码:
# 练习3:统计练习1中模型的参数量
# 你的代码:
点击查看答案
import torch
import torch.nn as nn
# 练习1
class BinaryClassifier(nn.Module):
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(20, 64)
self.fc2 = nn.Linear(64, 32)
self.fc3 = nn.Linear(32, 1)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = torch.sigmoid(self.fc3(x))
return x
model1 = BinaryClassifier()
# 练习2
model2 = nn.Sequential(
nn.Linear(20, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 1),
nn.Sigmoid()
)
# 练习3
total_params = sum(p.numel() for p in model1.parameters())
print(f"总参数量: {total_params}") # 20*64+64 + 64*32+32 + 32*1+1 = 3457
下一步
学会了构建网络,接下来让我们学习如何训练模型!