DAY 52 神经网络调参指南
知识点回顾:
- 随机种子
- 内参的初始化
- 神经网络调参指南
- 参数的分类
- 调参的顺序
- 各部分参数的调整心得
作业:对于day'41的简单cnn,看看是否可以借助调参指南进一步提高精度。
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm# 设置随机种子确保结果可复现
def set_seed(seed=42):random.seed(seed)np.random.seed(seed)torch.manual_seed(seed)if torch.cuda.is_available():torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = False# 定义CNN模型
class SimpleCNN(nn.Module):def __init__(self):super(SimpleCNN, self).__init__()# 第一个卷积层:输入通道1,输出通道32,卷积核3x3self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)self.bn1 = nn.BatchNorm2d(32)self.relu1 = nn.ReLU()self.pool1 = nn.MaxPool2d(2)# 第二个卷积层:输入通道32,输出通道64,卷积核3x3self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)self.bn2 = nn.BatchNorm2d(64)self.relu2 = nn.ReLU()self.pool2 = nn.MaxPool2d(2)# 全连接层self.fc1 = nn.Linear(64 * 7 * 7, 128)self.dropout = nn.Dropout(0.5)self.fc2 = nn.Linear(128, 10)def forward(self, x):# 卷积层1x = self.conv1(x)x = self.bn1(x)x = self.relu1(x)x = self.pool1(x)# 卷积层2x = self.conv2(x)x = self.bn2(x)x = self.relu2(x)x = self.pool2(x)# 展平x = x.view(-1, 64 * 7 * 7)# 全连接层x = self.fc1(x)x = self.relu1(x)x = self.dropout(x)x = self.fc2(x)return x# 参数初始化
def init_weights(m):if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')if m.bias is not None:nn.init.constant_(m.bias, 0)elif isinstance(m, nn.BatchNorm2d):nn.init.constant_(m.weight, 1)nn.init.constant_(m.bias, 0)elif isinstance(m, nn.Linear):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')nn.init.constant_(m.bias, 0)# 训练函数
def train(model, train_loader, criterion, optimizer, device):model.train()train_loss = 0correct = 0total = 0for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):inputs, targets = inputs.to(device), targets.to(device)optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, targets)loss.backward()optimizer.step()train_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn train_loss / len(train_loader), acc# 测试函数
def test(model, test_loader, criterion, device):model.eval()test_loss = 0correct = 0total = 0with torch.no_grad():for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):inputs, targets = inputs.to(device), targets.to(device)outputs = model(inputs)loss = criterion(outputs, targets)test_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn test_loss / len(test_loader), acc# 主函数
def main():# 设置随机种子set_seed(42)# 设置设备device = 'cuda' if torch.cuda.is_available() else 'cpu'print(f"使用设备: {device}")# 数据预处理transform_train = transforms.Compose([transforms.RandomRotation(10),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])# 加载数据集train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)# 创建数据加载器batch_size = 128 # 调参参数1train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)# 初始化模型model = SimpleCNN().to(device)model.apply(init_weights)# 定义损失函数和优化器criterion = nn.CrossEntropyLoss()learning_rate = 0.01 # 调参参数2optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # 调参参数3scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)# 训练模型epochs = 15 # 调参参数4train_losses = []train_accs = []test_losses = []test_accs = []best_acc = 0for epoch in range(epochs):print(f"\nEpoch: {epoch+1}/{epochs}")train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)test_loss, test_acc = test(model, test_loader, criterion, device)train_losses.append(train_loss)train_accs.append(train_acc)test_losses.append(test_loss)test_accs.append(test_acc)print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")# 学习率调整scheduler.step(test_loss)# 保存最佳模型if test_acc > best_acc:best_acc = test_acctorch.save(model.state_dict(), 'best_model.pth')print(f"Model saved with accuracy: {best_acc:.2f}%")# 加载最佳模型model.load_state_dict(torch.load('best_model.pth'))_, final_acc = test(model, test_loader, criterion, device)print(f"\nFinal Test Accuracy: {final_acc:.2f}%")# 绘制训练过程plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.plot(train_losses, label='Train Loss')plt.plot(test_losses, label='Test Loss')plt.xlabel('Epoch')plt.ylabel('Loss')plt.legend()plt.title('Loss Curves')plt.subplot(1, 2, 2)plt.plot(train_accs, label='Train Accuracy')plt.plot(test_accs, label='Test Accuracy')plt.xlabel('Epoch')plt.ylabel('Accuracy (%)')plt.legend()plt.title('Accuracy Curves')plt.tight_layout()plt.savefig('training_curves.png')plt.show()if __name__ == '__main__':main()