6.4 打卡
DAY 44 预训练模型
知识点回顾:
- 预训练的概念
- 常见的分类预训练模型
- 图像预训练模型的发展史
- 预训练的策略
- 预训练代码实战:resnet18
作业:
- 尝试在cifar10对比如下其他的预训练模型,观察差异,尽可能和他人选择的不同
- 尝试通过ctrl进入resnet的内部,观察残差究竟是什么
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import time
from tqdm import tqdm # 用于显示训练进度条# --- 1. 超参数配置 ---
NUM_CLASSES = 10 # CIFAR-10有10个类别
BATCH_SIZE = 64
LEARNING_RATE = 1e-4 # 对于微调,通常使用较小的学习率
NUM_EPOCHS = 10 # 训练轮数,可以根据需要调整
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")# 你想比较的预训练模型列表。
# 请选择至少2-3个,并尝试与他人选择不同的模型。
# 常见的有:resnet18, vgg16, googlenet, densenet121, mobilenet_v2, efficientnet_b0, vit_b_16
MODEL_NAMES = ["resnet18","vgg16","densenet121","mobilenet_v2",# "efficientnet_b0", # 需要单独安装 timm 库或更高版本的 torchvision# "vit_b_16", # 需要单独安装 timm 库或更高版本的 torchvision
]# --- 2. 数据加载与预处理 ---
# ImageNet预训练模型通常期望224x224的输入和特定的归一化参数
transform = transforms.Compose([transforms.Resize(224), # 将CIFAR-10的32x32图像调整为224x224transforms.ToTensor(),# ImageNet的均值和标准差,用于归一化transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)print(f"Loaded CIFAR-10: {len(train_dataset)} training images, {len(test_dataset)} testing images.")
print(f"Using device: {DEVICE}")# --- 3. 模型加载与修改函数 ---
def load_and_modify_model(model_name, num_classes):print(f"\nLoading {model_name}...")# 动态加载模型if hasattr(torchvision.models, model_name):model = getattr(torchvision.models, model_name)(pretrained=True)else:# For newer models like EfficientNet, ViT, ConvNeXt which might require specific imports# or are in newer torchvision versions.# If you want to use them, uncomment and ensure `timm` or latest `torchvision` is installed.# import timm# model = timm.create_model(model_name, pretrained=True)print(f"Model {model_name} not found in torchvision.models. Skipping.")return None# 修改模型的最后一层以适应新的类别数# 不同模型的分类器层名称可能不同,需要分别处理if hasattr(model, 'fc'): # ResNet, GoogLeNet, DenseNet, InceptionV3num_ftrs = model.fc.in_featuresmodel.fc = nn.Linear(num_ftrs, num_classes)elif hasattr(model, 'classifier'): # VGG, MobileNet, EfficientNet, ViTif isinstance(model.classifier, nn.Linear): # VGG, MobileNetV2num_ftrs = model.classifier.in_featuresmodel.classifier = nn.Linear(num_ftrs, num_classes)elif isinstance(model.classifier, nn.Sequential): # MobileNetV3, EfficientNet# EfficientNet和MobileNetV3的classifier是Sequential,最后一层是Linearnum_ftrs = model.classifier[-1].in_featuresmodel.classifier[-1] = nn.Linear(num_ftrs, num_classes)else:print(f"Warning: Could not automatically find classification head for {model_name}. Please inspect manually.")# Attempt a generic approach, might not work for all modelstry:# Find the last Linear layer in the modelfor name, module in model.named_modules():if isinstance(module, nn.Linear):last_linear_name = namelast_linear_module = moduleif last_linear_name:num_ftrs = last_linear_module.in_featuressetattr(model, last_linear_name.split('.')[-1], nn.Linear(num_ftrs, num_classes))print(f"Automatically modified layer '{last_linear_name}'.")else:raise AttributeErrorexcept AttributeError:print(f"Error: No identifiable classification head found for {model_name}.")return Nonemodel = model.to(DEVICE)print(f"{model_name} loaded and modified.")return model# --- 4. 训练函数 ---
def train_model(model, train_loader, criterion, optimizer, num_epochs, device):model.train()print("Starting training...")for epoch in range(num_epochs):running_loss = 0.0correct_predictions = 0total_samples = 0# 使用tqdm包装DataLoader,显示进度条for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):inputs, labels = inputs.to(device), labels.to(device)optimizer.zero_grad()outputs = model(inputs)loss = criterion(outputs, labels)loss.backward()optimizer.step()running_loss += loss.item() * inputs.size(0)_, predicted = torch.max(outputs.data, 1)total_samples += labels.size(0)correct_predictions += (predicted == labels).sum().item()epoch_loss = running_loss / total_samplesepoch_accuracy = correct_predictions / total_samplesprint(f"Epoch {epoch+1} finished. Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.4f}")print("Training complete.")# --- 5. 评估函数 ---
def evaluate_model(model, test_loader, device):model.eval() # 设置模型为评估模式correct = 0total = 0with torch.no_grad(): # 在评估阶段不需要计算梯度for inputs, labels in test_loader:inputs, labels = inputs.to(device), labels.to(device)outputs = model(inputs)_, predicted = torch.max(outputs.data, 1)total += labels.size(0)correct += (predicted == labels).sum().item()accuracy = 100 * correct / totalreturn accuracy# --- 6. 主程序:循环比较模型 ---
results = []for model_name in MODEL_NAMES:start_time = time.time()model = load_and_modify_model(model_name, NUM_CLASSES)if model is None:continue # 跳过无法加载或修改的模型criterion = nn.CrossEntropyLoss()# 对于每个模型,重新初始化优化器,确保它优化的是当前模型的参数optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE) train_model(model, train_loader, criterion, optimizer, NUM_EPOCHS, DEVICE)accuracy = evaluate_model(model, test_loader, DEVICE)end_time = time.time()time_taken = end_time - start_timeprint(f"\n--- {model_name} Results ---")print(f"Test Accuracy: {accuracy:.2f}%")print(f"Total Time: {time_taken:.2f} seconds")# 记录结果results.append({"model_name": model_name,"accuracy": accuracy,"time_taken": time_taken,"parameters": sum(p.numel() for p in model.parameters() if p.requires_grad) # 可选:记录可训练参数数量})# --- 7. 打印最终比较结果 ---
print("\n" + "="*50)
print(" Model Comparison Results ")
print("="*50)
print(f"{'Model':<20} | {'Accuracy (%)':<15} | {'Time (s)':<12} | {'Trainable Params (M)':<20}")
print("-" * 75)
for res in sorted(results, key=lambda x: x['accuracy'], reverse=True):print(f"{res['model_name']:<20} | {res['accuracy']:.2f}{'':<13} | {res['time_taken']:.2f}{'':<10} | {res['parameters']/1e6:.2f}{'':<18}")
print("="*75)# 8. 观察差异并总结 (请根据运行结果填写)
print("\n--- 观察差异与总结 ---")
print("1. 准确率差异:")
print(" - 表现最好的模型是:[填写模型名称],准确率 [数值]%。")
print(" - 表现较差的模型是:[填写模型名称],准确率 [数值]%。")
print(" - (分析原因:例如,某些模型可能对小数据集更鲁棒,或者其架构更适合ImageNet这种大规模预训练,但微调到小数据集上不一定能发挥最佳效果。或者模型复杂度/参数量是否与数据集规模匹配?)")
print("\n2. 训练时间差异:")
print(" - 训练时间最短的模型是:[填写模型名称],耗时 [数值] 秒。")
print(" - 训练时间最长的模型是:[填写模型名称],耗时 [数值] 秒。")
print(" - (分析原因:通常模型参数量和计算量越大,训练时间越长。像MobileNet系列因其轻量级设计,通常训练速度快。)")
print("\n3. 模型大小/参数量差异 (可选):")
print(" - 参数量最大的模型是:[填写模型名称],约 [数值] M。")
print(" - 参数量最小的模型是:[填写模型名称],约 [数值] M。")
print(" - (分析原因:参数量大的模型理论上表达能力更强,但在小数据集上可能更容易过拟合。轻量级网络旨在用更少的参数达到可接受的性能。)")
print("\n4. 总体结论:")
print(" - (结合准确率、时间、参数量等因素,对不同模型在CIFAR-10上的表现进行综合评价。例如:对于CIFAR-10这类图像较小的分类任务,中等大小的预训练模型通常能取得不错的平衡。更大型的模型可能需要更长的训练时间或更大的数据集才能充分发挥优势。)")