📝 RNN循环神经网络
RNN(Recurrent Neural Network) 是处理序列数据的利器,适用于文本、语音、时间序列等任务。
🤔 为什么需要RNN?
普通神经网络无法处理变长序列和时序关系:
普通神经网络:
输入 → [固定长度] → 输出
每个输入独立,互不关联
RNN:
输入1 → 输入2 → 输入3 → ... → 输出
↓ ↓ ↓
隐藏状态传递,记住之前的信息
💡 生活例子
读句子"我 喜欢 吃 苹果":
- 普通网络:同时看所有词,不知道顺序
- RNN:按顺序读每个词,并记住前面读过的内容
📝 RNN原理
基本公式
h_t = tanh(W_hh * h_{t-1} + W_xh * x_t + b)
h_t: 当前隐藏状态
h_{t-1}: 上一时刻隐藏状态
x_t: 当前输入
W_hh, W_xh: 权重矩阵
图示
┌───────────────────────────────────────┐
│ │
↓ │
x_1 → RNN → h_1 → RNN → h_2 → RNN → h_3 → 输出
↑ ↑
└───────────┘
隐藏状态不断传递
🔧 PyTorch中的RNN
基本RNN
import torch
import torch.nn as nn
# 创建RNN层
rnn = nn.RNN(
input_size=10, # 输入特征维度
hidden_size=20, # 隐藏层维度
num_layers=2, # RNN层数
batch_first=True # 输入形状为(batch, seq, feature)
)
# 输入: (batch, seq_len, input_size)
x = torch.randn(3, 5, 10) # 3个样本,序列长度5,特征维度10
# 初始隐藏状态: (num_layers, batch, hidden_size)
h0 = torch.zeros(2, 3, 20)
# 前向传播
output, hn = rnn(x, h0)
print(f"输出形状: {output.shape}") # [3, 5, 20] 每个时间步的输出
print(f"最终隐藏状态: {hn.shape}") # [2, 3, 20] 最后一个时间步的隐藏状态
LSTM(长短期记忆)
LSTM解决了RNN的梯度消失问题,能记住更长的序列:
import torch
import torch.nn as nn
# 创建LSTM层
lstm = nn.LSTM(
input_size=10,
hidden_size=20,
num_layers=2,
batch_first=True,
dropout=0.5 # 层间dropout
)
x = torch.randn(3, 5, 10)
# LSTM有两个状态:h(隐藏状态)和c(细胞状态)
h0 = torch.zeros(2, 3, 20)
c0 = torch.zeros(2, 3, 20)
output, (hn, cn) = lstm(x, (h0, c0))
print(f"输出形状: {output.shape}") # [3, 5, 20]
print(f"隐藏状态: {hn.shape}") # [2, 3, 20]
print(f"细胞状态: {cn.shape}") # [2, 3, 20]
GRU(门控循环单元)
GRU是LSTM的简化版,参数更少,效果相近:
import torch.nn as nn
gru = nn.GRU(
input_size=10,
hidden_size=20,
num_layers=2,
batch_first=True
)
x = torch.randn(3, 5, 10)
h0 = torch.zeros(2, 3, 20)
output, hn = gru(x, h0)
print(f"输出形状: {output.shape}") # [3, 5, 20]
选择建议
| 模型 | 优点 | 缺点 | 适用场景 |
|---|---|---|---|
| RNN | 简单 | 梯度消失 | 短序列 |
| LSTM | 记忆长 | 参数多、慢 | 长序列 |
| GRU | 折中 | - | 通用 |
🏗️ 构建RNN模型
文本分类模型
import torch
import torch.nn as nn
class TextClassifier(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
super().__init__()
# 词嵌入层
self.embedding = nn.Embedding(vocab_size, embed_dim)
# LSTM层
self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True,
num_layers=2, dropout=0.5, bidirectional=True)
# 分类器
self.fc = nn.Linear(hidden_dim * 2, num_classes) # 双向所以*2
self.dropout = nn.Dropout(0.5)
def forward(self, x):
# x: (batch, seq_len) 词索引序列
embedded = self.embedding(x) # (batch, seq_len, embed_dim)
# LSTM
output, (hn, cn) = self.lstm(embedded)
# 取最后一个时间步的输出(双向拼接)
# hn: (num_layers*2, batch, hidden_dim)
last_hidden = torch.cat([hn[-2], hn[-1]], dim=1) # (batch, hidden_dim*2)
# 分类
out = self.dropout(last_hidden)
out = self.fc(out)
return out
# 使用
model = TextClassifier(vocab_size=10000, embed_dim=128, hidden_dim=256, num_classes=2)
x = torch.randint(0, 10000, (32, 100)) # 32个样本,每个100个词
output = model(x)
print(f"输出形状: {output.shape}") # [32, 2]
序列到序列模型
import torch
import torch.nn as nn
class Seq2Seq(nn.Module):
"""简单的序列到序列模型"""
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
# 编码器
self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)
# 解码器
self.decoder = nn.LSTM(output_size, hidden_size, batch_first=True)
# 输出层
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, src, tgt):
# 编码
_, (hn, cn) = self.encoder(src)
# 解码
decoder_output, _ = self.decoder(tgt, (hn, cn))
# 输出
output = self.fc(decoder_output)
return output
📊 处理变长序列
实际文本长度不同,需要使用pack_padded_sequence:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class VariableLengthLSTM(nn.Module):
def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, num_classes)
def forward(self, x, lengths):
# x: (batch, max_seq_len) 已填充序列
# lengths: 每个序列的真实长度
embedded = self.embedding(x)
# 打包变长序列
packed = pack_padded_sequence(embedded, lengths.cpu(),
batch_first=True, enforce_sorted=False)
# LSTM处理
packed_output, (hn, cn) = self.lstm(packed)
# 解包(如果需要所有输出)
# output, _ = pad_packed_sequence(packed_output, batch_first=True)
# 取最后隐藏状态
out = self.fc(hn[-1])
return out
# 使用
model = VariableLengthLSTM(10000, 128, 256, 2)
x = torch.randint(0, 10000, (4, 50)) # 4个样本,最大长度50
lengths = torch.tensor([50, 35, 42, 28]) # 真实长度
output = model(x, lengths)
print(f"输出: {output.shape}") # [4, 2]
🎯 实战:情感分析
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# 简单的情感分析数据集(实际应用中用真实数据)
class SentimentDataset(Dataset):
def __init__(self, texts, labels, vocab, max_len=100):
self.texts = texts
self.labels = labels
self.vocab = vocab
self.max_len = max_len
def __len__(self):
return len(self.texts)
def __getitem__(self, idx):
# 简单分词(实际应用中用更好的分词器)
tokens = self.texts[idx].lower().split()
# 转为索引
indices = [self.vocab.get(token, 1) for token in tokens] # 1是UNK
# 截断或填充
if len(indices) > self.max_len:
indices = indices[:self.max_len]
else:
indices = indices + [0] * (self.max_len - len(indices)) # 0是PAD
return torch.tensor(indices), torch.tensor(self.labels[idx])
# 模型
class SentimentLSTM(nn.Module):
def __init__(self, vocab_size, embed_dim=128, hidden_dim=256):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True,
num_layers=2, dropout=0.5, bidirectional=True)
self.fc = nn.Sequential(
nn.Linear(hidden_dim * 2, 64),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(64, 1)
)
def forward(self, x):
embedded = self.embedding(x)
_, (hn, _) = self.lstm(embedded)
hidden = torch.cat([hn[-2], hn[-1]], dim=1)
out = self.fc(hidden)
return out.squeeze()
# 训练函数
def train_sentiment():
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 模拟数据(实际中用真实数据)
texts = ["i love this movie", "terrible film", "great acting", "boring story"] * 100
labels = [1, 0, 1, 0] * 100
# 构建词表
vocab = {"<PAD>": 0, "<UNK>": 1}
for text in texts:
for word in text.lower().split():
if word not in vocab:
vocab[word] = len(vocab)
# 数据集
dataset = SentimentDataset(texts, labels, vocab)
loader = DataLoader(dataset, batch_size=32, shuffle=True)
# 模型
model = SentimentLSTM(len(vocab)).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练
for epoch in range(10):
model.train()
total_loss = 0
for text, label in loader:
text, label = text.to(device), label.float().to(device)
optimizer.zero_grad()
output = model(text)
loss = criterion(output, label)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss/len(loader):.4f}")
# train_sentiment()
⚠️ RNN使用注意事项
1. 梯度裁剪
# 防止梯度爆炸
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
2. 隐藏状态初始化
# 每个batch重新初始化隐藏状态
def init_hidden(batch_size):
return (torch.zeros(num_layers, batch_size, hidden_size).to(device),
torch.zeros(num_layers, batch_size, hidden_size).to(device))
3. 使用预训练词向量
# 使用GloVe等预训练词向量
embedding = nn.Embedding(vocab_size, 300)
embedding.weight.data.copy_(pretrained_weights)
embedding.weight.requires_grad = False # 可选:冻结词向量
🏋️ 练习
# 练习:实现一个字符级别的语言模型
# 给定前面的字符,预测下一个字符
# 提示:
# 1. 输入是字符索引序列
# 2. 输出是每个位置下一个字符的预测
# 3. 使用LSTM或GRU
# 你的代码:
下一步
掌握了CNN和RNN后,让我们学习如何使用迁移学习!