当前位置: 首页 > news >正文

6.4 打卡

DAY 44 预训练模型

知识点回顾:

  1. 预训练的概念
  2. 常见的分类预训练模型
  3. 图像预训练模型的发展史
  4. 预训练的策略
  5. 预训练代码实战:resnet18

作业:

  1. 尝试在cifar10对比如下其他的预训练模型,观察差异,尽可能和他人选择的不同
  2. 尝试通过ctrl进入resnet的内部,观察残差究竟是什么
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import time
from tqdm import tqdm # 用于显示训练进度条# --- 1. 超参数配置 ---
NUM_CLASSES = 10 # CIFAR-10有10个类别
BATCH_SIZE = 64
LEARNING_RATE = 1e-4 # 对于微调,通常使用较小的学习率
NUM_EPOCHS = 10 # 训练轮数,可以根据需要调整
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")# 你想比较的预训练模型列表。
# 请选择至少2-3个,并尝试与他人选择不同的模型。
# 常见的有:resnet18, vgg16, googlenet, densenet121, mobilenet_v2, efficientnet_b0, vit_b_16
MODEL_NAMES = ["resnet18","vgg16","densenet121","mobilenet_v2",# "efficientnet_b0", # 需要单独安装 timm 库或更高版本的 torchvision# "vit_b_16",        # 需要单独安装 timm 库或更高版本的 torchvision
]# --- 2. 数据加载与预处理 ---
# ImageNet预训练模型通常期望224x224的输入和特定的归一化参数
transform = transforms.Compose([transforms.Resize(224), # 将CIFAR-10的32x32图像调整为224x224transforms.ToTensor(),# ImageNet的均值和标准差,用于归一化transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)print(f"Loaded CIFAR-10: {len(train_dataset)} training images, {len(test_dataset)} testing images.")
print(f"Using device: {DEVICE}")# --- 3. 模型加载与修改函数 ---
def load_and_modify_model(model_name, num_classes):print(f"\nLoading {model_name}...")# 动态加载模型if hasattr(torchvision.models, model_name):model = getattr(torchvision.models, model_name)(pretrained=True)else:# For newer models like EfficientNet, ViT, ConvNeXt which might require specific imports# or are in newer torchvision versions.# If you want to use them, uncomment and ensure `timm` or latest `torchvision` is installed.# import timm# model = timm.create_model(model_name, pretrained=True)print(f"Model {model_name} not found in torchvision.models. Skipping.")return None# 修改模型的最后一层以适应新的类别数# 不同模型的分类器层名称可能不同,需要分别处理if hasattr(model, 'fc'): # ResNet, GoogLeNet, DenseNet, InceptionV3num_ftrs = model.fc.in_featuresmodel.fc = nn.Linear(num_ftrs, num_classes)elif hasattr(model, 'classifier'): # VGG, MobileNet, EfficientNet, ViTif isinstance(model.classifier, nn.Linear): # VGG, MobileNetV2num_ftrs = model.classifier.in_featuresmodel.classifier = nn.Linear(num_ftrs, num_classes)elif isinstance(model.classifier, nn.Sequential): # MobileNetV3, EfficientNet# EfficientNet和MobileNetV3的classifier是Sequential,最后一层是Linearnum_ftrs = model.classifier[-1].in_featuresmodel.classifier[-1] = nn.Linear(num_ftrs, num_classes)else:print(f"Warning: Could not automatically find classification head for {model_name}. Please inspect manually.")# Attempt a generic approach, might not work for all modelstry:# Find the last Linear layer in the modelfor name, module in model.named_modules():if isinstance(module, nn.Linear):last_linear_name = namelast_linear_module = moduleif last_linear_name:num_ftrs = last_linear_module.in_featuressetattr(model, last_linear_name.split('.')[-1], nn.Linear(num_ftrs, num_classes))print(f"Automatically modified layer '{last_linear_name}'.")else:raise AttributeErrorexcept AttributeError:print(f"Error: No identifiable classification head found for {model_name}.")return Nonemodel = model.to(DEVICE)print(f"{model_name} loaded and modified.")return model# --- 4. 训练函数 ---
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):model.train()print("Starting training...")for epoch in range(num_epochs):running_loss = 0.0correct_predictions = 0total_samples = 0# 使用tqdm包装DataLoader,显示进度条for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):inputs, labels = inputs.to(device), labels.to(device)optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()running_loss += loss.item() * inputs.size(0)_, predicted = torch.max(outputs.data, 1)total_samples += labels.size(0)correct_predictions += (predicted == labels).sum().item()epoch_loss = running_loss / total_samplesepoch_accuracy = correct_predictions / total_samplesprint(f"Epoch {epoch+1} finished. Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.4f}")print("Training complete.")# --- 5. 评估函数 ---
def evaluate_model(model, test_loader, device):model.eval() # 设置模型为评估模式correct = 0total = 0with torch.no_grad(): # 在评估阶段不需要计算梯度for inputs, labels in test_loader:inputs, labels = inputs.to(device), labels.to(device)outputs = model(inputs)_, predicted = torch.max(outputs.data, 1)total += labels.size(0)correct += (predicted == labels).sum().item()accuracy = 100 * correct / totalreturn accuracy# --- 6. 主程序:循环比较模型 ---
results = []for model_name in MODEL_NAMES:start_time = time.time()model = load_and_modify_model(model_name, NUM_CLASSES)if model is None:continue # 跳过无法加载或修改的模型criterion = nn.CrossEntropyLoss()# 对于每个模型,重新初始化优化器,确保它优化的是当前模型的参数optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_model(model, train_loader, criterion, optimizer, NUM_EPOCHS, DEVICE)accuracy = evaluate_model(model, test_loader, DEVICE)end_time = time.time()time_taken = end_time - start_timeprint(f"\n--- {model_name} Results ---")print(f"Test Accuracy: {accuracy:.2f}%")print(f"Total Time: {time_taken:.2f} seconds")# 记录结果results.append({"model_name": model_name,"accuracy": accuracy,"time_taken": time_taken,"parameters": sum(p.numel() for p in model.parameters() if p.requires_grad) # 可选:记录可训练参数数量})# --- 7. 打印最终比较结果 ---
print("\n" + "="*50)
print("             Model Comparison Results             ")
print("="*50)
print(f"{'Model':<20} | {'Accuracy (%)':<15} | {'Time (s)':<12} | {'Trainable Params (M)':<20}")
print("-" * 75)
for res in sorted(results, key=lambda x: x['accuracy'], reverse=True):print(f"{res['model_name']:<20} | {res['accuracy']:.2f}{'':<13} | {res['time_taken']:.2f}{'':<10} | {res['parameters']/1e6:.2f}{'':<18}")
print("="*75)# 8. 观察差异并总结 (请根据运行结果填写)
print("\n--- 观察差异与总结 ---")
print("1. 准确率差异:")
print("   - 表现最好的模型是:[填写模型名称],准确率 [数值]%。")
print("   - 表现较差的模型是:[填写模型名称],准确率 [数值]%。")
print("   - (分析原因:例如,某些模型可能对小数据集更鲁棒,或者其架构更适合ImageNet这种大规模预训练,但微调到小数据集上不一定能发挥最佳效果。或者模型复杂度/参数量是否与数据集规模匹配?)")
print("\n2. 训练时间差异:")
print("   - 训练时间最短的模型是:[填写模型名称],耗时 [数值] 秒。")
print("   - 训练时间最长的模型是:[填写模型名称],耗时 [数值] 秒。")
print("   - (分析原因:通常模型参数量和计算量越大,训练时间越长。像MobileNet系列因其轻量级设计,通常训练速度快。)")
print("\n3. 模型大小/参数量差异 (可选):")
print("   - 参数量最大的模型是:[填写模型名称],约 [数值] M。")
print("   - 参数量最小的模型是:[填写模型名称],约 [数值] M。")
print("   - (分析原因:参数量大的模型理论上表达能力更强,但在小数据集上可能更容易过拟合。轻量级网络旨在用更少的参数达到可接受的性能。)")
print("\n4. 总体结论:")
print("   - (结合准确率、时间、参数量等因素,对不同模型在CIFAR-10上的表现进行综合评价。例如:对于CIFAR-10这类图像较小的分类任务,中等大小的预训练模型通常能取得不错的平衡。更大型的模型可能需要更长的训练时间或更大的数据集才能充分发挥优势。)")

相关文章:

  • 每日一令:Linux 极简通关指南 - 汇总
  • [华为eNSP] 在eNSP上实现IPv4地址以及IPv4静态路由的配置
  • 操作系统入门:核心概念与设计逻辑
  • GO语言----基础类型取别名
  • 使用osqp求解简单二次规划问题
  • 路凯智行助力华润水泥长治矿区开启无人运输新场景
  • w377集团门户网站设计与实现
  • 实训日记 1
  • windows资源管理器无响应,预览时卡死
  • 【动手学MCP从0到1】2.1 SDK介绍和第一个MCP创建的步骤详解
  • CodeTop100 Day22
  • 第八部分:第五节 - 生命周期与副作用 (`useEffect` Hook):组件的幕后工作
  • Java对象创建过程
  • Python-多线程(一)
  • Qwen3与MCP协议:重塑大气科学的智能研究范式
  • JSON基础知识
  • Java框架面试题
  • 【HarmonyOS 5】游戏开发教程
  • 国标GB28181视频平台EasyGBS视频实时监控系统打造换热站全景可视化管理方案
  • 第二章 2.2 数据存储安全风险之数据存储风险分析
  • 南宁做网站推广的公司哪家好/铜川网络推广
  • 保定网站建设/百度平台商家我的订单查询
  • 宣传片拍摄方案策划书/商丘网站seo
  • 提升网站的访问速度/seo专家是什么意思
  • 专业做网站广州/百度com百度一下你
  • 专业网站建设好不好/建网站费用