当前位置: 首页 > news >正文

深度学习模型基本框架

简介:

归纳了一套基本框架,以帮助使用者快速创建新的模型,同时有paddlepaddle版本和pytorch版本的,它们虽有差别,但是对于初级使用者,只是两种不同但是很相近的语法而已。都采用paddle平台作为载体来存项目。

模型框架paddle

这里用paddlepaddle的图像分类项目为例,这是项目链接:
花卉分类:https://aistudio.baidu.com/projectdetail/9165955?sUid=15411139&shared=1&ts=1747486226503

1.导入库与参数配置

import os
import zipfile
import random
import json
import paddle
import sys
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from paddle.io import Datasettrain_parameters = {"input_size": [3, 224, 224],  # 输入图片的shape"class_dim": -1,  # 分类数"src_path": "/home/aistudio/data/data6504/flower7595.zip",  # 原始数据集路径"target_path": "/home/aistudio/data/",  # 要解压的路径"train_list_path": "/home/aistudio/data/train.txt",  # train.txt路径"eval_list_path": "/home/aistudio/data/eval.txt",  # eval.txt路径"readme_path": "/home/aistudio/data/readme.json",  # readme.json路径"label_dict": {},  # 标签字典"num_epochs": 20,  # 训练轮数"train_bath_size": 8,  # 训练时每个批次的大小"skip_steps": 10,"save_steps": 300,"learning_strategy": {  # 优化函数相关的配置"lr": 0.0001  # 超参数学习率},"checkpoints": "/home/aistudio/work/checkpoints"  # 保存的路径}

2.数据准备

解压等预处理

生成数据列表(路径-标签)get_data_list得到eval.txt和train.txt

建立数据读取器Reader:

        __init__使用txt得到列表(path--labels)

        __getitem用序号依次读取列表,使用path时直接加载图片

# # 一、数据准备
# (1)解压原始数据集
#
# (2)按照比例划分训练集与验证集
#
# (3)乱序,生成数据列表
#
# (4)定义数据读取器# In[35]:def unzip_data(src_path, target_path):# 解压原始数据集,将src_path路径下的zip压缩包解压至target_path目录下if (not os.path.isdir(target_path + "Chinese Medicine")):z = zipfile.ZipFile(src_path, "r")z.extractall(path=target_path)z.close()# In[36]:#就是建立2个txt,按照样本--标签的形式
# 函数 生成数据列表
def get_data_list(target_path, train_list_path, eval_list_path):# 存放所有类别的信息class_detail = []# 获取所有类别保存的文件夹名称data_list_path = target_path + "flowers/"class_dirs = os.listdir(data_list_path)# 总的图像数量all_class_images = 0# 存放类别标签class_label = 0# 存放类别数目class_dim = 0# 存储要写进eval.txt和train.txt中的内容trainer_list = []eval_list = []# 读取每个类别for class_dir in class_dirs:if class_dir != ".DS_Store":class_dim += 1# 每个类别的信息class_detail_list = {}eval_sum = 0trainer_sum = 0# 统计每个类别有多少张图片class_sum = 0# 获取类别路径path = data_list_path + class_dir# 获取所有图片img_paths = os.listdir(path)for img_path in img_paths:  # 遍历文件夹下的每个图片if img_path.split(".")[-1] == "jpg":name_path = path + '/' + img_path  # 每张图片的路径if class_sum % 8 == 0:  # 每8张图片取一个做验证数据eval_sum += 1  # test_sum为测试数据的数目eval_list.append(name_path + "\t%d" % class_label + "\n")else:trainer_sum += 1trainer_list.append(name_path + "\t%d" % class_label + "\n")class_sum += 1  # 每类图片的数目all_class_images += 1  # 所有类图片的数目else:continue# 说明的json文件的class_detail数据class_detail_list['class_name'] = class_dir  # 类别名称class_detail_list['class_label'] = class_label  # 类别标签class_detail_list['class_eval_images'] = eval_sum  # 该类数据的测试集数目class_detail_list['class_trainer_images'] = trainer_sum  # 该类数据的训练集数目class_detail.append(class_detail_list)# 初始化标签列表train_parameters['label_dict'][str(class_label)] = class_dirclass_label += 1# 初始化分类树train_parameters['class_dim'] = class_dim# 乱序random.shuffle(eval_list)with open(eval_list_path, 'a') as f:for eval_image in eval_list:f.write(eval_image)random.shuffle(trainer_list)with open(train_list_path, 'a') as f2:for train_image in trainer_list:f2.write(train_image)# 说明的json文件信息readjson = {}readjson['all_class_name'] = data_list_path  # 文件父目录readjson['all_class_images'] = all_class_imagesreadjson['class_detail'] = class_detailjsons = json.dumps(readjson, sort_keys=True, indent=4, separators=(',', ': '))with open(train_parameters['readme_path'], 'w') as f:f.write(jsons)print('生成数据列表完成!')# In[37]:# 参数初始化src_path = train_parameters['src_path']
target_path = train_parameters['target_path']
train_list_path = train_parameters['train_list_path']
eval_list_path = train_parameters['eval_list_path']# 解压原始数据到指定路径unzip_data(src_path, target_path)# 划分训练集与验证集,乱序,生成数据列表
# 每次生成数据列表前,首先清空 train.txt 和 eval.txt
with open(train_list_path, 'w') as f:f.seek(0)f.truncate()
with open(eval_list_path, 'w') as f:f.seek(0)f.truncate()# 生成数据列表
get_data_list(target_path, train_list_path, eval_list_path)# In[38]:sclass Reader(Dataset):def __init__(self, data_path, mode='train'):'''数据读取器:param data_path:数据集所在路径:param mode: train or eval'''super().__init__()self.data_path = data_pathself.img_paths = []self.labels = []if mode == 'train':with open(os.path.join(self.data_path, "train.txt"), "r", encoding="utf-8") as f:self.info = f.readlines()for img_info in self.info:img_path, label = img_info.strip().split('\t')self.img_paths.append(img_path)self.labels.append(int(label))else:with open(os.path.join(self.data_path, "eval.txt"), "r", encoding="utf-8") as f:self.info = f.readlines()for img_info in self.info:img_path, label = img_info.strip().split('\t')self.img_paths.append(img_path)self.labels.append(int(label))def __getitem__(self, index):# 获取一组数据    :param index: 文件索引号# 第一步打开图像文件并获取label值img_path = self.img_paths[index]img = Image.open(img_path)if img.mode != 'RGB':img = img.convert('RGB')img = img.resize((224, 224), Image.BILINEAR)img = np.array(img).astype('float32')img = img.transpose((2, 0, 1)) / 255label = self.labels[index]label = np.array([label], dtype="int64")return img, labeldef print_sample(self, index: int = 0):print("文件名", self.img_paths[index], "\t标签值", self.labels[index])def __len__(self):return len(self.img_paths)# In[39]:# 训练数据加载
train_dataset = Reader('/home/aistudio/data', mode='train')
train_loader = paddle.io.DataLoader(train_dataset, batch_size=16, shuffle=True)
# 测试数据加载
eval_dataset = Reader('/home/aistudio/data', mode='eval')
eval_loader = paddle.io.DataLoader(eval_dataset, batch_size=8, shuffle=False)# In[40]:train_dataset.print_sample(200)
print(train_dataset.__len__())
eval_dataset.print_sample(0)
print(eval_dataset.__len__())
print(eval_dataset.__getitem__(10)[0].shape)
print(eval_dataset.__getitem__(10)[1].shape)

3.模型构建

根据数据集类型和格式建立模型(我这个初学者喜欢用ai生成然后分析一下就结束)

class ConvPool(paddle.nn.Layer):# 卷积+池化def __init__(self, num_channels, num_filters, filter_size, pool_size, pool_stride, groups, conv_stride=1,conv_padding=1, ):super(ConvPool, self).__init__()for i in range(groups):self.add_sublayer(  # 添加子层实例'bb_%d' % i,paddle.nn.Conv2D(  # layerin_channels=num_channels,  # 通道数out_channels=num_filters,  # 卷积核个数kernel_size=filter_size,  # 卷积核大小stride=conv_stride,  # 步长padding=conv_padding  # padding))self.add_sublayer('relu%d' % i,paddle.nn.ReLU())num_channels = num_filtersself.add_sublayer('Maxpool',paddle.nn.MaxPool2D(kernel_size=pool_size,  # 池化核大小stride=pool_stride  # 池化步长))def forward(self, inputs):x = inputsfor prefix, sub_layer in self.named_children():# print(prefix,sub_layer)x = sub_layer(x)return x# In[42]:class VGGNet(paddle.nn.Layer):# 卷积+池化def __init__(self, num_classes=1000):super(VGGNet, self).__init__()# 第一组:64个卷积核,2个卷积层self.convpool01 = ConvPool(num_channels=3,  # 输入通道数(RGB图像)num_filters=64,filter_size=3,pool_size=2,pool_stride=2,groups=2  # 两个连续卷积层)# 第二组:128个卷积核,2个卷积层self.convpool02 = ConvPool(num_channels=64,num_filters=128,filter_size=3,pool_size=2,pool_stride=2,groups=2)# 第三组:256个卷积核,3个卷积层self.convpool03 = ConvPool(num_channels=128,num_filters=256,filter_size=3,pool_size=2,pool_stride=2,groups=3)# 第四组:512个卷积核,3个卷积层self.convpool04 = ConvPool(num_channels=256,num_filters=512,filter_size=3,pool_size=2,pool_stride=2,groups=3)# 第五组:512个卷积核,3个卷积层self.convpool05 = ConvPool(num_channels=512,num_filters=512,filter_size=3,pool_size=2,pool_stride=2,groups=3)# 全连接层self.fc1 = paddle.nn.Linear(512 * 7 * 7, 4096)  # 根据输入尺寸计算self.fc2 = paddle.nn.Linear(4096, 4096)self.fc3 = paddle.nn.Linear(4096, num_classes)def forward(self, x, label=None):"""前向计算"""# 卷积层部分out = self.convpool01(x)out = self.convpool02(out)out = self.convpool03(out)out = self.convpool04(out)out = self.convpool05(out)# 展平操作out = paddle.flatten(out, start_axis=1, stop_axis=-1)# 全连接层out = paddle.nn.functional.relu(self.fc1(out))out = paddle.nn.functional.relu(self.fc2(out))out = self.fc3(out)# 计算准确率(如果提供了label)if label is not None:acc = paddle.metric.accuracy(input=out, label=label)return out, accelse:return out

4.模型训练与预估

# In[43]:def draw_process(title, color, iters, data, label):plt.title(title, fontsize=24)plt.xlabel("iter", fontsize=20)plt.ylabel(label, fontsize=20)plt.plot(iters, data, color=color, label=label)plt.legend()plt.grid()plt.show()# In[44]:print(train_parameters['class_dim'])
print(train_parameters['label_dict'])# In[45]:from paddle.optimizer.lr import CosineAnnealingDecaymodel = VGGNet()
model.train()
cross_entropy = paddle.nn.CrossEntropyLoss()
optimizer = paddle.optimizer.Adam(learning_rate=train_parameters['learning_strategy']['lr'],parameters=model.parameters())steps = 0
Iters, total_loss, total_acc = [], [], []for epo in range(train_parameters['num_epochs']):for _, data in enumerate(train_loader()):steps += 1x_data = data[0]y_data = data[1]predicts, acc = model(x_data, y_data)loss = cross_entropy(predicts, y_data)loss.backward()optimizer.step()optimizer.clear_grad()if steps % train_parameters["skip_steps"] == 0:Iters.append(steps)total_loss.append(loss.numpy()[0])total_acc.append(acc.numpy()[0])# 打印中间过程print('epo: {}, step: {}, loss is: {}, acc is: {}'.format(epo, steps, loss.numpy(), acc.numpy()))# 保存模型参数if steps % train_parameters["save_steps"] == 0:save_path = train_parameters["checkpoints"] + "/" + "save_dir_" + str(steps) + '.pdparams'print('save model to: ' + save_path)paddle.save(model.state_dict(), save_path)
paddle.save(model.state_dict(), train_parameters["checkpoints"] + "/" + "save_dir_final.pdparams")
draw_process("trainning loss", "red", Iters, total_loss, "trainning loss")
draw_process("trainning acc", "green", Iters, total_acc, "trainning acc")# # 四、模型评估# In[46]:'''
模型评估
'''
from paddle.optimizer.lr import CosineAnnealingDecaymodel__state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')
model_eval = VGGNet()
model_eval.set_state_dict(model__state_dict)
model_eval.eval()
accs = []for _, data in enumerate(eval_loader()):x_data = data[0]y_data = data[1]predicts = model_eval(x_data)acc = paddle.metric.accuracy(predicts, y_data)accs.append(acc.numpy()[0])
print('模型在验证集上的准确率为:', np.mean(accs))# # 五、模型预测# In[47]:def load_image(img_path):'''预测图片预处理'''img = Image.open(img_path)if img.mode != 'RGB':img = img.convert('RGB')img = img.resize((224, 224), Image.BILINEAR)img = np.array(img).astype('float32')img = img.transpose((2, 0, 1)) / 255  # HWC to CHW 及归一化return imginfer_dst_path = 'data/flowers/rose/'label_dic = train_parameters['label_dict']# In[48]:model__state_dict = paddle.load('work/checkpoints/save_dir_final.pdparams')
model_predict = VGGNet()
model_predict.set_state_dict(model__state_dict)
model_predict.eval()
infer_imgs_path = os.listdir(infer_dst_path)
# print(infer_imgs_path)
for infer_img_path in infer_imgs_path[:10]:infer_img = load_image(infer_dst_path + infer_img_path)infer_img = infer_img[np.newaxis, :, :, :]  # reshape(-1,3,224,224)infer_img = paddle.to_tensor(infer_img)result = model_predict(infer_img)lab = np.argmax(result.numpy())print("rose样本: {},被预测为:{}".format(infer_img_path, label_dic[str(lab)]))

模型框架pytorch

这里以pytorch的声音情绪识别分类为例,以下是项目链接:


语音情绪识别:https://aistudio.baidu.com/projectdetail/9166158?sUid=15411139&shared=1&ts=1747488195503

这个pytorch仅作为框架示例,改改模型就可以直接用

相关文章:

  • 谷歌浏览器(Google Chrome)136.0.7103.93便携增强版|Win中文|安装教程
  • 【Redis】零碎知识点(易忘 / 易错)总结回顾
  • C++学习:六个月从基础到就业——C++11/14:decltype关键字
  • Redis持久化机制详解:保障数据安全的关键策略
  • 深入理解 TypeScript 中的 unknown 类型:安全处理未知数据的最佳实践
  • C语言—再学习(结构体)
  • 高频面试题(含笔试高频算法整理)基本总结回顾120
  • 3、ubantu系统 | 通过vscode远程安装并配置anaconda
  • B站锁定三倍速(自用)
  • C/C++实践(十)C语言冒泡排序深度解析:发展历史、技术方法与应用场景
  • Windows系统信息收集指南
  • python如何做数据预测
  • C++ deque双端队列、deque对象创建、deque赋值操作
  • 软件设计师教程—— 第二章 程序设计语言基础知识(上)
  • DeepSeek指令微调与强化学习对齐:从SFT到RLHF
  • 【Linux笔记】——Linux线程封装
  • Transformer 架构在目标检测中的应用:YOLO 系列模型解析
  • 进阶-数据结构部分:3、常用查找算法
  • [Windows] 系统综合优化工具 RyTuneX 1.3.1
  • 最小二乘法拟合平面(线性回归法、梯度下降、PCA法)
  • 光速晋级!2025年多哈世乒赛孙颖莎4比0战胜对手
  • 金融月评|尽早增强政策力度、调整施策点
  • 上交所五方面落实募资新规:强化关键少数责任和股东权利保障
  • 病重老人被要求亲自取钱在农业银行门口去世?株洲警方介入
  • 青海省交通运输厅副厅长田明有接受审查调查
  • 六连板成飞集成:航空零部件业务收入占比为1.74%,市场环境没有重大调整