当前位置：首页 > news >正文

YOLO入门教程（番外）：卷积神经网络—卷积神经网络（LeNet）

news 2025/10/5 11:38:15

LeNet：卷积神经网络的开创者与时尚识别实战

从ATM支票识别到Fashion-MNIST分类：探索LeNet的过去与现在

在深度学习的发展历程中，有一个网络架构因其开创性贡献而被永远铭记——这就是LeNet。由Yann LeCun在1989年提出的LeNet，不仅是卷积神经网络的先驱，更是将深度学习理论成功应用于实际问题的第一个典范。

LeNet：跨越时代的架构设计

历史背景与意义

在上世纪90年代，LeNet的出现代表了神经网络研究的重要突破。当时，这个模型在与支持向量机的竞争中表现出色，成为了监督学习的主流方法。最令人惊叹的是，LeNet被成功应用于自动取款机（ATM）中，用于识别支票上的手写数字。直到今天，一些ATM机仍在运行基于LeCun和同事Leon Bottou在上世纪90年代编写的代码！

网络架构解析

LeNet（特指LeNet-5）的整体结构可以分为两个主要部分：

卷积编码器：由两个卷积层组成
全连接密集块：由三个全连接层组成

这种设计理念至今仍在影响现代卷积神经网络的设计。

LeNet的现代实现

使用PyTorch实现LeNet

import torch
import torch.nn as nnclass LeNet(nn.Module):def __init__(self):super(LeNet, self).__init__()# 卷积编码器部分self.conv_encoder = nn.Sequential(# 第一卷积层：1个输入通道→6个输出通道nn.Conv2d(1, 6, kernel_size=5, padding=2),nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),# 第二卷积层：6个输入通道→16个输出通道nn.Conv2d(6, 16, kernel_size=5),nn.Sigmoid(),nn.AvgPool2d(kernel_size=2, stride=2),)# 全连接分类器部分self.classifier = nn.Sequential(nn.Linear(16 * 5 * 5, 120),nn.Sigmoid(),nn.Linear(120, 84),nn.Sigmoid(),nn.Linear(84, 10))def forward(self, x):x = self.conv_encoder(x)x = x.view(x.size(0), -1)  # 展平操作x = self.classifier(x)return x

网络各层维度变化

让我们通过一个28×28的输入图像，观察LeNet各层的维度变化：

输入: (1, 1, 28, 28)       # [批次, 通道, 高度, 宽度]
第一卷积层: (1, 6, 28, 28)  # 使用padding=2保持尺寸
第一池化层: (1, 6, 14, 14)  # 下采样2倍
第二卷积层: (1, 16, 10, 10) # 无padding，尺寸减小
第二池化层: (1, 16, 5, 5)   # 下采样2倍
展平: (1, 400)             # 16×5×5=400
全连接层1: (1, 120)
全连接层2: (1, 84)
输出层: (1, 10)            # 对应10个分类

LeNet在Fashion-MNIST上的实战

数据准备与加载

from torchvision import datasets, transforms
from torch.utils.data import DataLoader# 数据预处理
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,))
])# 加载Fashion-MNIST数据集
train_dataset = datasets.FashionMNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root='./data', train=False, download=True, transform=transform)# 创建数据加载器
batch_size = 256
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

模型训练与评估

import torch.optim as optim
from tqdm import tqdmdef train_lenet(model, train_loader, test_loader, num_epochs=10, lr=0.01, device='cpu'):# 将模型移动到指定设备model = model.to(device)# 定义损失函数和优化器criterion = nn.CrossEntropyLoss()optimizer = optim.SGD(model.parameters(), lr=lr)# 训练循环for epoch in range(num_epochs):model.train()running_loss = 0.0# 使用进度条progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{num_epochs}')for i, (inputs, labels) in enumerate(progress_bar):inputs, labels = inputs.to(device), labels.to(device)# 前向传播outputs = model(inputs)loss = criterion(outputs, labels)# 反向传播和优化optimizer.zero_grad()loss.backward()optimizer.step()running_loss += loss.item()progress_bar.set_postfix({'Loss': f'{loss.item():.4f}'})# 每个epoch结束后在测试集上评估test_acc = evaluate_accuracy(model, test_loader, device)print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}, Test Acc: {test_acc:.4f}')return modeldef evaluate_accuracy(model, data_loader, device='cpu'):model.eval()correct = 0total = 0with torch.no_grad():for inputs, labels in data_loader:inputs, labels = inputs.to(device), labels.to(device)outputs = model(inputs)_, predicted = torch.max(outputs.data, 1)total += labels.size(0)correct += (predicted == labels).sum().item()return correct / total

开始训练

# 检查GPU可用性
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'使用设备: {device}')# 创建模型实例
lenet_model = LeNet()# 训练模型
trained_model = train_lenet(model=lenet_model,train_loader=train_loader,test_loader=test_loader,num_epochs=10,lr=0.01,device=device
)

LeNet的现代改进

虽然原始LeNet使用了sigmoid激活函数和平均池化，但现代实现中我们通常会进行以下改进：

1. 使用ReLU激活函数

class ModernLeNet(nn.Module):def __init__(self):super(ModernLeNet, self).__init__()self.features = nn.Sequential(nn.Conv2d(1, 6, 5, padding=2),nn.ReLU(inplace=True),nn.MaxPool2d(2),nn.Conv2d(6, 16, 5),nn.ReLU(inplace=True),nn.MaxPool2d(2),)self.classifier = nn.Sequential(nn.Linear(16 * 5 * 5, 120),nn.ReLU(inplace=True),nn.Linear(120, 84),nn.ReLU(inplace=True),nn.Linear(84, 10))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)x = self.classifier(x)return x

2. 添加Batch Normalization

class BNLeNet(nn.Module):def __init__(self):super(BNLeNet, self).__init__()self.features = nn.Sequential(nn.Conv2d(1, 6, 5, padding=2),nn.BatchNorm2d(6),nn.ReLU(inplace=True),nn.MaxPool2d(2),nn.Conv2d(6, 16, 5),nn.BatchNorm2d(16),nn.ReLU(inplace=True),nn.MaxPool2d(2),)self.classifier = nn.Sequential(nn.Linear(16 * 5 * 5, 120),nn.BatchNorm1d(120),nn.ReLU(inplace=True),nn.Linear(120, 84),nn.BatchNorm1d(84),nn.ReLU(inplace=True),nn.Linear(84, 10))def forward(self, x):x = self.features(x)x = x.view(x.size(0), -1)x = self.classifier(x)return x

可视化与结果分析

训练过程可视化

import matplotlib.pyplot as pltdef plot_training_history(losses, accuracies):fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))# 绘制损失曲线ax1.plot(losses)ax1.set_title('Training Loss')ax1.set_xlabel('Epoch')ax1.set_ylabel('Loss')# 绘制准确率曲线ax2.plot(accuracies)ax2.set_title('Test Accuracy')ax2.set_xlabel('Epoch')ax2.set_ylabel('Accuracy')plt.tight_layout()plt.show()

特征可视化

def visualize_features(model, test_loader, device):model.eval()with torch.no_grad():# 获取一个批次的数据data_iter = iter(test_loader)images, labels = next(data_iter)images = images.to(device)# 获取各层的输出layer_outputs = []x = images# 遍历模型的每一层并保存输出for layer in model.features:x = layer(x)if isinstance(layer, nn.Conv2d):layer_outputs.append(x.cpu())# 可视化卷积层的特征图fig, axes = plt.subplots(2, 3, figsize=(15, 10))for i in range(2):  # 前两个样本for j in range(3):  # 前三个特征图axes[i, j].imshow(layer_outputs[0][i, j].numpy(), cmap='viridis')axes[i, j].axis('off')axes[i, j].set_title(f'Sample {i+1}, Feature {j+1}')plt.tight_layout()plt.show()