📝 RNN循环神经网络

RNN（Recurrent Neural Network） 是处理序列数据的利器，适用于文本、语音、时间序列等任务。

🤔 为什么需要RNN？

普通神经网络无法处理变长序列和时序关系：

普通神经网络：
输入 → [固定长度] → 输出
     每个输入独立，互不关联

RNN：
输入1 → 输入2 → 输入3 → ... → 输出
   ↓       ↓       ↓
 隐藏状态传递，记住之前的信息

💡 生活例子

读句子"我喜欢吃苹果"：

普通网络：同时看所有词，不知道顺序
RNN：按顺序读每个词，并记住前面读过的内容

📝 RNN原理

基本公式

h_t = tanh(W_hh * h_{t-1} + W_xh * x_t + b)

h_t: 当前隐藏状态
h_{t-1}: 上一时刻隐藏状态
x_t: 当前输入
W_hh, W_xh: 权重矩阵

图示

      ┌───────────────────────────────────────┐
      │                                       │
      ↓                                       │
 x_1 → RNN → h_1 → RNN → h_2 → RNN → h_3 → 输出
               ↑           ↑
               └───────────┘
            隐藏状态不断传递

🔧 PyTorch中的RNN

基本RNN

import torch
import torch.nn as nn

# 创建RNN层
rnn = nn.RNN(
    input_size=10,    # 输入特征维度
    hidden_size=20,   # 隐藏层维度
    num_layers=2,     # RNN层数
    batch_first=True  # 输入形状为(batch, seq, feature)
)

# 输入: (batch, seq_len, input_size)
x = torch.randn(3, 5, 10)  # 3个样本，序列长度5，特征维度10

# 初始隐藏状态: (num_layers, batch, hidden_size)
h0 = torch.zeros(2, 3, 20)

# 前向传播
output, hn = rnn(x, h0)
print(f"输出形状: {output.shape}")  # [3, 5, 20] 每个时间步的输出
print(f"最终隐藏状态: {hn.shape}")  # [2, 3, 20] 最后一个时间步的隐藏状态

LSTM（长短期记忆）

LSTM解决了RNN的梯度消失问题，能记住更长的序列：

import torch
import torch.nn as nn

# 创建LSTM层
lstm = nn.LSTM(
    input_size=10,
    hidden_size=20,
    num_layers=2,
    batch_first=True,
    dropout=0.5       # 层间dropout
)

x = torch.randn(3, 5, 10)

# LSTM有两个状态：h(隐藏状态)和c(细胞状态)
h0 = torch.zeros(2, 3, 20)
c0 = torch.zeros(2, 3, 20)

output, (hn, cn) = lstm(x, (h0, c0))
print(f"输出形状: {output.shape}")  # [3, 5, 20]
print(f"隐藏状态: {hn.shape}")      # [2, 3, 20]
print(f"细胞状态: {cn.shape}")      # [2, 3, 20]

GRU（门控循环单元）

GRU是LSTM的简化版，参数更少，效果相近：

import torch.nn as nn

gru = nn.GRU(
    input_size=10,
    hidden_size=20,
    num_layers=2,
    batch_first=True
)

x = torch.randn(3, 5, 10)
h0 = torch.zeros(2, 3, 20)

output, hn = gru(x, h0)
print(f"输出形状: {output.shape}")  # [3, 5, 20]

选择建议

模型	优点	缺点	适用场景
RNN	简单	梯度消失	短序列
LSTM	记忆长	参数多、慢	长序列
GRU	折中	-	通用

🏗️ 构建RNN模型

文本分类模型

import torch
import torch.nn as nn

class TextClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
        super().__init__()
        # 词嵌入层
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        # LSTM层
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True, 
                           num_layers=2, dropout=0.5, bidirectional=True)
        # 分类器
        self.fc = nn.Linear(hidden_dim * 2, num_classes)  # 双向所以*2
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        # x: (batch, seq_len) 词索引序列
        embedded = self.embedding(x)  # (batch, seq_len, embed_dim)
        
        # LSTM
        output, (hn, cn) = self.lstm(embedded)
        
        # 取最后一个时间步的输出（双向拼接）
        # hn: (num_layers*2, batch, hidden_dim)
        last_hidden = torch.cat([hn[-2], hn[-1]], dim=1)  # (batch, hidden_dim*2)
        
        # 分类
        out = self.dropout(last_hidden)
        out = self.fc(out)
        return out

# 使用
model = TextClassifier(vocab_size=10000, embed_dim=128, hidden_dim=256, num_classes=2)
x = torch.randint(0, 10000, (32, 100))  # 32个样本，每个100个词
output = model(x)
print(f"输出形状: {output.shape}")  # [32, 2]

序列到序列模型

import torch
import torch.nn as nn

class Seq2Seq(nn.Module):
    """简单的序列到序列模型"""
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        # 编码器
        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)
        # 解码器
        self.decoder = nn.LSTM(output_size, hidden_size, batch_first=True)
        # 输出层
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, src, tgt):
        # 编码
        _, (hn, cn) = self.encoder(src)
        
        # 解码
        decoder_output, _ = self.decoder(tgt, (hn, cn))
        
        # 输出
        output = self.fc(decoder_output)
        return output

📊 处理变长序列

实际文本长度不同，需要使用pack_padded_sequence：

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

class VariableLengthLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)
    
    def forward(self, x, lengths):
        # x: (batch, max_seq_len) 已填充序列
        # lengths: 每个序列的真实长度
        
        embedded = self.embedding(x)
        
        # 打包变长序列
        packed = pack_padded_sequence(embedded, lengths.cpu(), 
                                       batch_first=True, enforce_sorted=False)
        
        # LSTM处理
        packed_output, (hn, cn) = self.lstm(packed)
        
        # 解包（如果需要所有输出）
        # output, _ = pad_packed_sequence(packed_output, batch_first=True)
        
        # 取最后隐藏状态
        out = self.fc(hn[-1])
        return out

# 使用
model = VariableLengthLSTM(10000, 128, 256, 2)
x = torch.randint(0, 10000, (4, 50))  # 4个样本，最大长度50
lengths = torch.tensor([50, 35, 42, 28])  # 真实长度
output = model(x, lengths)
print(f"输出: {output.shape}")  # [4, 2]

🎯 实战：情感分析

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# 简单的情感分析数据集（实际应用中用真实数据）
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, vocab, max_len=100):
        self.texts = texts
        self.labels = labels
        self.vocab = vocab
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        # 简单分词（实际应用中用更好的分词器）
        tokens = self.texts[idx].lower().split()
        
        # 转为索引
        indices = [self.vocab.get(token, 1) for token in tokens]  # 1是UNK
        
        # 截断或填充
        if len(indices) > self.max_len:
            indices = indices[:self.max_len]
        else:
            indices = indices + [0] * (self.max_len - len(indices))  # 0是PAD
        
        return torch.tensor(indices), torch.tensor(self.labels[idx])

# 模型
class SentimentLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim, padding_idx=0)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True, 
                           num_layers=2, dropout=0.5, bidirectional=True)
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim * 2, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, 1)
        )
    
    def forward(self, x):
        embedded = self.embedding(x)
        _, (hn, _) = self.lstm(embedded)
        hidden = torch.cat([hn[-2], hn[-1]], dim=1)
        out = self.fc(hidden)
        return out.squeeze()

# 训练函数
def train_sentiment():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # 模拟数据（实际中用真实数据）
    texts = ["i love this movie", "terrible film", "great acting", "boring story"] * 100
    labels = [1, 0, 1, 0] * 100
    
    # 构建词表
    vocab = {"<PAD>": 0, "<UNK>": 1}
    for text in texts:
        for word in text.lower().split():
            if word not in vocab:
                vocab[word] = len(vocab)
    
    # 数据集
    dataset = SentimentDataset(texts, labels, vocab)
    loader = DataLoader(dataset, batch_size=32, shuffle=True)
    
    # 模型
    model = SentimentLSTM(len(vocab)).to(device)
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    # 训练
    for epoch in range(10):
        model.train()
        total_loss = 0
        for text, label in loader:
            text, label = text.to(device), label.float().to(device)
            
            optimizer.zero_grad()
            output = model(text)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}, Loss: {total_loss/len(loader):.4f}")

# train_sentiment()

⚠️ RNN使用注意事项

1. 梯度裁剪

# 防止梯度爆炸
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

2. 隐藏状态初始化

# 每个batch重新初始化隐藏状态
def init_hidden(batch_size):
    return (torch.zeros(num_layers, batch_size, hidden_size).to(device),
            torch.zeros(num_layers, batch_size, hidden_size).to(device))

3. 使用预训练词向量

# 使用GloVe等预训练词向量
embedding = nn.Embedding(vocab_size, 300)
embedding.weight.data.copy_(pretrained_weights)
embedding.weight.requires_grad = False  # 可选：冻结词向量

🏋️ 练习

# 练习：实现一个字符级别的语言模型
# 给定前面的字符，预测下一个字符
# 提示：
#   1. 输入是字符索引序列
#   2. 输出是每个位置下一个字符的预测
#   3. 使用LSTM或GRU

# 你的代码：

下一步

掌握了CNN和RNN后，让我们学习如何使用迁移学习！