当前位置: 首页 > news >正文

DAY 52 神经网络调参指南

知识点回顾:

  1. 随机种子
  2. 内参的初始化
  3. 神经网络调参指南
    1. 参数的分类
    2. 调参的顺序
    3. 各部分参数的调整心得

作业:对于day'41的简单cnn,看看是否可以借助调参指南进一步提高精度。

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm# 设置随机种子确保结果可复现
def set_seed(seed=42):random.seed(seed)np.random.seed(seed)torch.manual_seed(seed)if torch.cuda.is_available():torch.cuda.manual_seed(seed)torch.cuda.manual_seed_all(seed)torch.backends.cudnn.deterministic = Truetorch.backends.cudnn.benchmark = False# 定义CNN模型
class SimpleCNN(nn.Module):def __init__(self):super(SimpleCNN, self).__init__()# 第一个卷积层:输入通道1,输出通道32,卷积核3x3self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)self.bn1 = nn.BatchNorm2d(32)self.relu1 = nn.ReLU()self.pool1 = nn.MaxPool2d(2)# 第二个卷积层:输入通道32,输出通道64,卷积核3x3self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)self.bn2 = nn.BatchNorm2d(64)self.relu2 = nn.ReLU()self.pool2 = nn.MaxPool2d(2)# 全连接层self.fc1 = nn.Linear(64 * 7 * 7, 128)self.dropout = nn.Dropout(0.5)self.fc2 = nn.Linear(128, 10)def forward(self, x):# 卷积层1x = self.conv1(x)x = self.bn1(x)x = self.relu1(x)x = self.pool1(x)# 卷积层2x = self.conv2(x)x = self.bn2(x)x = self.relu2(x)x = self.pool2(x)# 展平x = x.view(-1, 64 * 7 * 7)# 全连接层x = self.fc1(x)x = self.relu1(x)x = self.dropout(x)x = self.fc2(x)return x# 参数初始化
def init_weights(m):if isinstance(m, nn.Conv2d):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')if m.bias is not None:nn.init.constant_(m.bias, 0)elif isinstance(m, nn.BatchNorm2d):nn.init.constant_(m.weight, 1)nn.init.constant_(m.bias, 0)elif isinstance(m, nn.Linear):nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')nn.init.constant_(m.bias, 0)# 训练函数
def train(model, train_loader, criterion, optimizer, device):model.train()train_loss = 0correct = 0total = 0for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):inputs, targets = inputs.to(device), targets.to(device)optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, targets)loss.backward()optimizer.step()train_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn train_loss / len(train_loader), acc# 测试函数
def test(model, test_loader, criterion, device):model.eval()test_loss = 0correct = 0total = 0with torch.no_grad():for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)):inputs, targets = inputs.to(device), targets.to(device)outputs = model(inputs)loss = criterion(outputs, targets)test_loss += loss.item()_, predicted = outputs.max(1)total += targets.size(0)correct += predicted.eq(targets).sum().item()acc = 100. * correct / totalreturn test_loss / len(test_loader), acc# 主函数
def main():# 设置随机种子set_seed(42)# 设置设备device = 'cuda' if torch.cuda.is_available() else 'cpu'print(f"使用设备: {device}")# 数据预处理transform_train = transforms.Compose([transforms.RandomRotation(10),transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])transform_test = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))])# 加载数据集train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test)# 创建数据加载器batch_size = 128  # 调参参数1train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False, num_workers=2)# 初始化模型model = SimpleCNN().to(device)model.apply(init_weights)# 定义损失函数和优化器criterion = nn.CrossEntropyLoss()learning_rate = 0.01  # 调参参数2optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)  # 调参参数3scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, factor=0.5)# 训练模型epochs = 15  # 调参参数4train_losses = []train_accs = []test_losses = []test_accs = []best_acc = 0for epoch in range(epochs):print(f"\nEpoch: {epoch+1}/{epochs}")train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)test_loss, test_acc = test(model, test_loader, criterion, device)train_losses.append(train_loss)train_accs.append(train_acc)test_losses.append(test_loss)test_accs.append(test_acc)print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")print(f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")# 学习率调整scheduler.step(test_loss)# 保存最佳模型if test_acc > best_acc:best_acc = test_acctorch.save(model.state_dict(), 'best_model.pth')print(f"Model saved with accuracy: {best_acc:.2f}%")# 加载最佳模型model.load_state_dict(torch.load('best_model.pth'))_, final_acc = test(model, test_loader, criterion, device)print(f"\nFinal Test Accuracy: {final_acc:.2f}%")# 绘制训练过程plt.figure(figsize=(12, 5))plt.subplot(1, 2, 1)plt.plot(train_losses, label='Train Loss')plt.plot(test_losses, label='Test Loss')plt.xlabel('Epoch')plt.ylabel('Loss')plt.legend()plt.title('Loss Curves')plt.subplot(1, 2, 2)plt.plot(train_accs, label='Train Accuracy')plt.plot(test_accs, label='Test Accuracy')plt.xlabel('Epoch')plt.ylabel('Accuracy (%)')plt.legend()plt.title('Accuracy Curves')plt.tight_layout()plt.savefig('training_curves.png')plt.show()if __name__ == '__main__':main()

相关文章:

  • 小白讲强化学习:从零开始的4x4网格世界探索
  • C/C++内存分布和管理
  • 以楼宇自控技术赋能节能,驱动绿色建筑可持续发展进程
  • PCL 导入VS配置的大量依赖项名称快速读取
  • git报错fatal: 远端意外挂断了
  • 简述Unity的资源加载和内存管理
  • 【地图服务限制范围】
  • SAP ERS 自动化发票
  • 图像处理与机器学习项目:特征提取、PCA与分类器评估
  • 多参表达式Hive UDF
  • 达梦数据库中无效触发器的排查与解决方案指南
  • 【狂飙AGI】第2课:大模型方向市场分析
  • 第四讲 基础运算之小数运算
  • 无外接物理显示器的Ubuntu系统的远程桌面连接(升级版)
  • 深度学习编译器
  • Java中wait()为何必须同步调用?
  • 手机射频功放测试学习(一)——手机线性功放的主要测试指标
  • Cesium距离测量、角度测量、面积测量
  • Redis初识第一期
  • 1.线性表的顺序存储-顺序表
  • 学做网站要会哪些/百度注册页面
  • 网站轮换图片怎么做/百度招聘官网首页
  • 商城类网站怎么推广/地推拉新app推广接单平台免费
  • 自己怎样做网站/品牌宣传策划方案
  • 巩义网站建设报价/国内设计公司前十名
  • 小学网站建设方案书/百度的广告推广需要多少费用