🔢 张量基础

张量（Tensor） 是PyTorch中最核心的数据结构，你可以把它理解为多维数组。

🤔 什么是张量？

用一张图来理解不同维度的张量：

标量(0维): 5                        → 一个数字
向量(1维): [1, 2, 3]                → 一行数字
矩阵(2维): [[1, 2], [3, 4]]         → 一张表格
3维张量:   [[[1,2],[3,4]],          → 多张表格（如彩色图片）
            [[5,6],[7,8]]]

💡 生活中的例子

标量：你的年龄（25）
向量：一周的气温 [20, 22, 19, 23, 25, 24, 21]
矩阵：一张灰度图片（每个像素一个值）
3维张量：一张彩色图片（RGB三个通道）
4维张量：一批彩色图片

📝 创建张量

方式1：从Python列表创建

import torch

# 从列表创建
x = torch.tensor([1, 2, 3, 4])
print(x)  # tensor([1, 2, 3, 4])

# 创建2维张量（矩阵）
matrix = torch.tensor([[1, 2, 3],
                       [4, 5, 6]])
print(matrix)
# tensor([[1, 2, 3],
#         [4, 5, 6]])

# 指定数据类型
float_tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
print(float_tensor)  # tensor([1., 2., 3.])

方式2：使用内置函数创建

import torch

# 全0张量
zeros = torch.zeros(3, 4)  # 3行4列
print(zeros)
# tensor([[0., 0., 0., 0.],
#         [0., 0., 0., 0.],
#         [0., 0., 0., 0.]])

# 全1张量
ones = torch.ones(2, 3)
print(ones)
# tensor([[1., 1., 1.],
#         [1., 1., 1.]])

# 随机张量（0到1之间的均匀分布）
rand = torch.rand(2, 3)
print(rand)
# tensor([[0.1234, 0.5678, 0.9012],
#         [0.3456, 0.7890, 0.2345]])

# 标准正态分布随机张量
randn = torch.randn(2, 3)
print(randn)

# 等差数列
arange = torch.arange(0, 10, 2)  # 从0到10，步长2
print(arange)  # tensor([0, 2, 4, 6, 8])

# 线性空间
linspace = torch.linspace(0, 1, 5)  # 从0到1，分成5份
print(linspace)  # tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])

# 单位矩阵
eye = torch.eye(3)
print(eye)
# tensor([[1., 0., 0.],
#         [0., 1., 0.],
#         [0., 0., 1.]])

方式3：从NumPy创建

import torch
import numpy as np

# NumPy数组转张量
np_array = np.array([1, 2, 3])
tensor_from_np = torch.from_numpy(np_array)
print(tensor_from_np)  # tensor([1, 2, 3])

# 张量转NumPy数组
back_to_np = tensor_from_np.numpy()
print(back_to_np)  # [1 2 3]

⚠️ 注意

torch.from_numpy() 创建的张量与原NumPy数组共享内存，修改一个会影响另一个！

📐 张量的属性

import torch

x = torch.rand(3, 4, 5)  # 创建一个3x4x5的随机张量

# 形状
print(f"形状: {x.shape}")       # torch.Size([3, 4, 5])
print(f"形状: {x.size()}")      # torch.Size([3, 4, 5])  # 另一种写法

# 维度数量
print(f"维度: {x.dim()}")       # 3
print(f"维度: {x.ndim}")        # 3  # 另一种写法

# 元素总数
print(f"元素数: {x.numel()}")   # 60 (3*4*5)

# 数据类型
print(f"类型: {x.dtype}")       # torch.float32

# 设备（CPU还是GPU）
print(f"设备: {x.device}")      # cpu

➕ 张量运算

算术运算

import torch

a = torch.tensor([1, 2, 3], dtype=torch.float32)
b = torch.tensor([4, 5, 6], dtype=torch.float32)

# 加法
print(a + b)            # tensor([5., 7., 9.])
print(torch.add(a, b))  # 等价写法

# 减法
print(a - b)            # tensor([-3., -3., -3.])

# 乘法（逐元素）
print(a * b)            # tensor([4., 10., 18.])

# 除法
print(a / b)            # tensor([0.2500, 0.4000, 0.5000])

# 幂运算
print(a ** 2)           # tensor([1., 4., 9.])

# 开方
print(torch.sqrt(a))    # tensor([1.0000, 1.4142, 1.7321])

矩阵运算

import torch

A = torch.tensor([[1, 2],
                  [3, 4]], dtype=torch.float32)
B = torch.tensor([[5, 6],
                  [7, 8]], dtype=torch.float32)

# 矩阵乘法（三种等价写法）
print(A @ B)
print(torch.mm(A, B))
print(torch.matmul(A, B))
# tensor([[19., 22.],
#         [43., 50.]])

# 转置
print(A.T)
print(A.transpose(0, 1))  # 等价写法
# tensor([[1., 3.],
#         [2., 4.]])

聚合运算

import torch

x = torch.tensor([[1, 2, 3],
                  [4, 5, 6]], dtype=torch.float32)

# 求和
print(x.sum())           # tensor(21.)  所有元素求和
print(x.sum(dim=0))      # tensor([5., 7., 9.])  按列求和
print(x.sum(dim=1))      # tensor([6., 15.])  按行求和

# 平均值
print(x.mean())          # tensor(3.5000)

# 最大值
print(x.max())           # tensor(6.)
print(x.argmax())        # tensor(5)  最大值的索引

# 最小值
print(x.min())           # tensor(1.)

🔧 形状变换

reshape / view

import torch

x = torch.arange(12)
print(x)  # tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

# 变形为3x4矩阵
y = x.reshape(3, 4)
print(y)
# tensor([[ 0,  1,  2,  3],
#         [ 4,  5,  6,  7],
#         [ 8,  9, 10, 11]])

# 使用-1自动计算维度
z = x.reshape(3, -1)  # -1表示自动计算：12/3=4
print(z.shape)  # torch.Size([3, 4])

# view的效果相同（但要求内存连续）
w = x.view(2, 6)
print(w)

squeeze / unsqueeze

import torch

# squeeze: 去除大小为1的维度
x = torch.zeros(1, 3, 1, 4)
print(x.shape)           # torch.Size([1, 3, 1, 4])
print(x.squeeze().shape) # torch.Size([3, 4])

# unsqueeze: 增加大小为1的维度
y = torch.zeros(3, 4)
print(y.shape)              # torch.Size([3, 4])
print(y.unsqueeze(0).shape) # torch.Size([1, 3, 4])
print(y.unsqueeze(1).shape) # torch.Size([3, 1, 4])

拼接张量

import torch

a = torch.tensor([[1, 2], [3, 4]])
b = torch.tensor([[5, 6], [7, 8]])

# cat: 沿现有维度拼接
print(torch.cat([a, b], dim=0))  # 沿行拼接
# tensor([[1, 2],
#         [3, 4],
#         [5, 6],
#         [7, 8]])

print(torch.cat([a, b], dim=1))  # 沿列拼接
# tensor([[1, 2, 5, 6],
#         [3, 4, 7, 8]])

# stack: 沿新维度堆叠
print(torch.stack([a, b], dim=0).shape)  # torch.Size([2, 2, 2])

🎯 索引和切片

import torch

x = torch.tensor([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])

# 基本索引
print(x[0])        # tensor([1, 2, 3])  第一行
print(x[0, 1])     # tensor(2)  第一行第二列
print(x[0][1])     # tensor(2)  等价写法

# 切片
print(x[:, 0])     # tensor([1, 4, 7])  所有行的第一列
print(x[1:, :2])   # 第1行到最后，前2列
# tensor([[4, 5],
#         [7, 8]])

# 布尔索引
mask = x > 5
print(mask)
# tensor([[False, False, False],
#         [False, False,  True],
#         [ True,  True,  True]])
print(x[mask])     # tensor([6, 7, 8, 9])

# 花式索引
indices = torch.tensor([0, 2])
print(x[indices])  # 取第0行和第2行
# tensor([[1, 2, 3],
#         [7, 8, 9]])

📋 常用数据类型

类型	说明	创建方式
`torch.float32`	32位浮点（默认）	`torch.tensor([1.0])`
`torch.float64`	64位浮点	`torch.tensor([1.0], dtype=torch.float64)`
`torch.int32`	32位整数	`torch.tensor([1], dtype=torch.int32)`
`torch.int64`	64位整数（默认整数）	`torch.tensor([1])`
`torch.bool`	布尔类型	`torch.tensor([True, False])`

# 类型转换
x = torch.tensor([1, 2, 3])
print(x.dtype)        # torch.int64

x_float = x.float()   # 转为float32
x_float = x.to(torch.float32)  # 等价写法
print(x_float.dtype)  # torch.float32

📡 广播机制（Broadcasting）

广播是PyTorch处理不同形状张量运算的强大机制：

import torch

# 标量与张量运算
a = torch.tensor([1, 2, 3])
b = 2
print(a + b)  # tensor([3, 4, 5])  # 2被广播到[2, 2, 2]

# 不同形状张量运算
x = torch.ones(3, 4)      # 形状: (3, 4)
y = torch.ones(4)          # 形状: (4,)
print((x + y).shape)       # torch.Size([3, 4])  # y被广播到(3, 4)

# 更复杂的例子
a = torch.ones(5, 3, 4, 1)  # 形状: (5, 3, 4, 1)
b = torch.ones(   3, 1, 2)  # 形状:    (3, 1, 2)
print((a + b).shape)        # torch.Size([5, 3, 4, 2])

广播规则

规则：从最后一个维度开始对齐，逐维比较
1. 如果维度相等 → OK
2. 如果其中一个是1 → 扩展到另一个的大小
3. 如果一个张量维度不足 → 在前面补1

示例：
   A: (5, 3, 4, 1)
   B:    (3, 1, 2)
   ↓ B补齐为 (1, 3, 1, 2)
   结果: (5, 3, 4, 2)

⚠️ 广播可能导致意外

# 看起来像逐元素相加，实际上在广播
a = torch.randn(3, 1)
b = torch.randn(1, 3)
c = a + b  # 形状: (3, 3)，不是(3,)！

# 避免意外：使用expand_as显式广播
a_expanded = a.expand_as(b)

🖥️ 设备操作（CPU与GPU）

检查设备可用性

import torch

# 检查CUDA（NVIDIA GPU）是否可用
print(f"CUDA可用: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU数量: {torch.cuda.device_count()}")
    print(f"GPU名称: {torch.cuda.get_device_name(0)}")
    print(f"当前GPU: {torch.cuda.current_device()}")

# 检查MPS（Apple Silicon GPU）是否可用
print(f"MPS可用: {torch.backends.mps.is_available()}")

在设备间移动张量

import torch

# 创建CPU张量
x_cpu = torch.randn(3, 3)
print(f"设备: {x_cpu.device}")  # cpu

# 移动到GPU
if torch.cuda.is_available():
    x_gpu = x_cpu.to('cuda')
    # 或者
    x_gpu = x_cpu.cuda()
    print(f"设备: {x_gpu.device}")  # cuda:0

# 移动回CPU
x_back = x_gpu.to('cpu')
# 或者
x_back = x_gpu.cpu()

# 设备无关代码（推荐）
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.randn(3, 3).to(device)

直接在GPU上创建张量

import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 方法1：创建时指定设备
x = torch.randn(3, 3, device=device)

# 方法2：使用cuda张量类型
if torch.cuda.is_available():
    x = torch.cuda.FloatTensor(3, 3)

多GPU操作

import torch

if torch.cuda.device_count() > 1:
    # 在指定GPU上创建张量
    x = torch.randn(3, 3, device='cuda:0')
    y = torch.randn(3, 3, device='cuda:1')
    
    # 跨GPU运算需要先移到同一设备
    y = y.to('cuda:0')
    z = x + y

💡 最佳实践

# 在代码开头定义设备，后续统一使用
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# 所有张量和模型都用.to(device)
model = MyModel().to(device)
data = data.to(device)

💾 内存管理

共享内存与拷贝

import torch
import numpy as np

# 情况1：view/reshape 共享内存
x = torch.arange(6)
y = x.view(2, 3)
y[0, 0] = 999
print(x)  # tensor([999,   1,   2,   3,   4,   5])  # x也变了！

# 情况2：from_numpy 共享内存
np_arr = np.array([1, 2, 3])
tensor = torch.from_numpy(np_arr)
tensor[0] = 999
print(np_arr)  # [999   2   3]  # numpy数组也变了！

# 情况3：clone 创建独立拷贝
x = torch.tensor([1, 2, 3])
y = x.clone()  # 完全独立的拷贝
y[0] = 999
print(x)  # tensor([1, 2, 3])  # x不变

# 情况4：contiguous 确保内存连续
x = torch.randn(3, 4)
y = x.T  # 转置后内存不连续
print(y.is_contiguous())  # False
z = y.contiguous()  # 创建连续的拷贝
print(z.is_contiguous())  # True

内存优化技巧

import torch

# 1. 原地操作（节省内存）
x = torch.randn(1000, 1000)
x.add_(1)  # 原地加法，下划线表示原地操作
x.mul_(2)  # 原地乘法
x.zero_()  # 原地清零

# 2. 释放GPU内存
if torch.cuda.is_available():
    x = torch.randn(1000, 1000, device='cuda')
    del x  # 删除引用
    torch.cuda.empty_cache()  # 释放缓存

# 3. 查看内存使用
if torch.cuda.is_available():
    print(f"已分配: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
    print(f"缓存: {torch.cuda.memory_reserved() / 1024**2:.2f} MB")

📌 pin_memory与non_blocking

高效的CPU到GPU数据传输：

import torch
from torch.utils.data import DataLoader

# pin_memory：将数据固定在内存中，加速传输到GPU
train_loader = DataLoader(
    dataset,
    batch_size=32,
    pin_memory=True  # 启用固定内存
)

# non_blocking：异步传输，不阻塞CPU
device = torch.device('cuda')

for data, labels in train_loader:
    # 异步传输数据到GPU
    data = data.to(device, non_blocking=True)
    labels = labels.to(device, non_blocking=True)
    
    # CPU可以继续做其他事情...
    
    # 使用数据时会自动同步
    output = model(data)

💡 什么时候用？

pin_memory: 当使用GPU训练且数据较多时启用
non_blocking: 配合pin_memory使用，实现CPU/GPU并行

🔢 张量的高级索引

高级索引技巧

import torch

x = torch.arange(12).reshape(3, 4)
print(x)
# tensor([[ 0,  1,  2,  3],
#         [ 4,  5,  6,  7],
#         [ 8,  9, 10, 11]])

# gather: 按索引收集元素
indices = torch.tensor([[0, 2], [1, 3], [0, 2]])
result = torch.gather(x, dim=1, index=indices)
print(result)
# tensor([[ 0,  2],
#         [ 5,  7],
#         [ 8, 10]])

# scatter: 按索引分散元素
src = torch.ones(3, 2)
indices = torch.tensor([[0, 2], [1, 3], [0, 2]])
result = torch.zeros(3, 4).scatter(dim=1, index=indices, src=src)
print(result)
# tensor([[1., 0., 1., 0.],
#         [0., 1., 0., 1.],
#         [1., 0., 1., 0.]])

# where: 条件选择
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
condition = torch.tensor([True, False, True])
result = torch.where(condition, a, b)
print(result)  # tensor([1, 5, 3])

masked操作

import torch

x = torch.randn(3, 3)
mask = x > 0

# 获取满足条件的元素
positive = x[mask]  # 或 x.masked_select(mask)
print(f"正数个数: {len(positive)}")

# 条件赋值
x_clipped = x.masked_fill(x < 0, 0)  # 负数变0
print(x_clipped)

🎲 随机数与可复现性

import torch
import random
import numpy as np

# 设置随机种子，确保可复现
def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    
    # 确保CUDA卷积的确定性
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# 现在随机结果可复现
x1 = torch.randn(3)
set_seed(42)
x2 = torch.randn(3)
print(torch.equal(x1, x2))  # True

# 常用随机张量创建
torch.rand(3, 3)      # 均匀分布 [0, 1)
torch.randn(3, 3)     # 标准正态分布
torch.randint(0, 10, (3, 3))  # 整数随机
torch.randperm(10)    # 0-9的随机排列

🏋️ 练习

试着完成以下练习：

import torch

# 练习1：创建一个5x5的随机矩阵，然后找出所有大于0.5的元素
# 你的代码：


# 练习2：创建两个3x3的矩阵，计算它们的矩阵乘法
# 你的代码：


# 练习3：创建一个1到100的向量，reshape成10x10的矩阵，然后计算每行的和
# 你的代码：


# 练习4：验证广播机制 - 创建形状为(3,1)和(1,4)的张量相加，预测结果形状
# 你的代码：


# 练习5：创建一个GPU张量（如果可用），进行运算后移回CPU
# 你的代码：

点击查看答案

# 练习1
x = torch.rand(5, 5)
print(x[x > 0.5])

# 练习2
a = torch.rand(3, 3)
b = torch.rand(3, 3)
print(a @ b)

# 练习3
x = torch.arange(1, 101).reshape(10, 10)
print(x.sum(dim=1))

# 练习4
a = torch.ones(3, 1)
b = torch.ones(1, 4)
c = a + b
print(f"结果形状: {c.shape}")  # torch.Size([3, 4])

# 练习5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x = torch.randn(3, 3, device=device)
y = x @ x.T  # 在GPU上运算
y_cpu = y.cpu()  # 移回CPU
print(f"计算设备: {device}, 结果设备: {y_cpu.device}")

下一步

掌握了张量的基本操作，接下来让我们学习PyTorch最强大的功能：自动求导！