当前位置：首页 > news >正文

RTDETRv2 pytorch 官方版自己数据集训练遇到的问题解决

news 来源：原创 2025/6/16 5:51:52

rtdetrv2 训练问题遇到的问题。

pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2  --index-url https://download.pytorch.org/whl/cu117

Please make sure torchvision version >= 0.15.2

发现自己实际装的是 torchvison==0.15.2+cu117

修改_misc.py中修改为实际版本

if importlib.metadata.version('torchvision') == '0.15.2+cu117':

Assertion `index >= -sizes[i] && index < sizes[i] && "index out of bounds"

报错很多行。

标注的coco序号不对，自己的数据集，分类需要id从0开始，

coco像这种格式。“info”字段也要有，不然也会报错。

{"info": {"description": "COCO Dataset"},"licenses": [{"name": ""}],"images": [{"id": 1,"file_name": "00002.png","height": 1080,"width": 1920},{"id": 2,"file_name": "00009.png","height": 1080,"width": 1920}],"annotations": [{"id": 1,"image_id": 1,"category_id": 0,"segmentation": [[642.6923076923077,234.23076923076925,1377.3076923076924,234.23076923076925,1377.3076923076924,782.3076923076923,642.6923076923077,782.3076923076923]],"area": 402625.7396449703,"bbox": [642.6923076923077,234.23076923076925,734.6153846153846,548.076923076923],"iscrowd": 0},{"id": 2,"image_id": 2,"category_id": 1,"segmentation": [[490.76923076923083,222.6923076923077,1252.3076923076924,222.6923076923077,1252.3076923076924,784.2307692307692,490.76923076923083,784.2307692307692]],"area": 427633.1360946745,"bbox": [490.76923076923083,222.6923076923077,761.5384615384615,561.5384615384614],"iscrowd": 0}],"categories": [{"id": 0,"name": "ng","supercategory": ""},{"id": 1,"name": "ok","supercategory": ""}]
}

完整转换脚本，从xml转为coco。

import os
import json
import xml.etree.ElementTree as ET
from collections import defaultdict
from tqdm import tqdm
import argparse
import shutil
import cv2
import numpy as npdef parse_args():parser = argparse.ArgumentParser(description='Convert Pascal VOC XML annotations to COCO format')parser.add_argument('--xml_dir', type=str, required=True, help='Directory containing XML annotation files')parser.add_argument('--img_dir', type=str, required=True,help='Directory containing corresponding images')parser.add_argument('--output_json', type=str, required=True,help='Output COCO format JSON file path')parser.add_argument('--copy_images', action='store_true',help='Copy images to a new directory structure')parser.add_argument('--output_img_dir', type=str, default='coco_dataset',help='Output directory for images if copying is enabled')return parser.parse_args()def get_image_size(image_path):"""获取图像尺寸"""try:img = cv2.imread(image_path)if img is None:raise IOError(f"无法读取图像: {image_path}")return img.shape[1], img.shape[0]  # 宽度, 高度except Exception as e:print(f"错误: {e}")return 0, 0def convert_xml_to_coco(xml_dir, img_dir, output_json, copy_images=False, output_img_dir=None):# 创建COCO数据结构coco_data = {"info": {"description": "COCO Dataset converted from Pascal VOC XML","version": "1.0","year": 2023,"contributor": "XML to COCO Converter","date_created": "2023-01-01"},"licenses": [{"url": "https://creativecommons.org/licenses/by/4.0/","id": 1,"name": "CC BY 4.0"}],"images": [],"annotations": [],"categories": []}# 处理类别category_dict = {}next_category_id = 0# 处理图像和标注image_dict = {}next_image_id = 1next_ann_id = 1# 收集所有XML文件xml_files = [f for f in os.listdir(xml_dir) if f.endswith('.xml')]# 创建输出图像目录（如果需要）if copy_images and output_img_dir:os.makedirs(output_img_dir, exist_ok=True)print(f"找到 {len(xml_files)} 个XML文件，开始转换...")# 处理每个XML文件for xml_file in tqdm(xml_files):xml_path = os.path.join(xml_dir, xml_file)try:# 解析XMLtree = ET.parse(xml_path)root = tree.getroot()# 获取图像文件名filename = root.find('filename').textimg_path = os.path.join(img_dir, filename)# 如果图像不存在，跳过if not os.path.exists(img_path):print(f"警告: 图像文件不存在 - {img_path}")continue# 获取图像尺寸size = root.find('size')if size is not None:width = int(size.find('width').text)height = int(size.find('height').text)else:# 如果XML中没有尺寸信息，从图像读取width, height = get_image_size(img_path)if width == 0 or height == 0:print(f"警告: 无法获取图像尺寸 - {img_path}")continue# 如果复制图像if copy_images and output_img_dir:new_img_path = os.path.join(output_img_dir, filename)shutil.copy2(img_path, new_img_path)# 创建图像条目if filename not in image_dict:image_entry = {"id": next_image_id,"file_name": filename,"width": width,"height": height,"license": 1,"date_captured": "2023-01-01"}coco_data["images"].append(image_entry)image_dict[filename] = next_image_idnext_image_id += 1image_id = image_dict[filename]# 处理每个对象for obj in root.findall('object'):# 类别处理name = obj.find('name').textif name not in category_dict:category_entry = {"id": next_category_id,"name": name,"supercategory": "object"}coco_data["categories"].append(category_entry)category_dict[name] = next_category_idnext_category_id += 1category_id = category_dict[name]# 边界框处理bbox = obj.find('bndbox')if bbox is None:continuexmin = float(bbox.find('xmin').text)ymin = float(bbox.find('ymin').text)xmax = float(bbox.find('xmax').text)ymax = float(bbox.find('ymax').text)# 转换为COCO格式 [x, y, width, height]bbox_width = xmax - xminbbox_height = ymax - ymin# 创建标注条目ann_entry = {"id": next_ann_id,"image_id": image_id,"category_id": category_id,"bbox": [xmin, ymin, bbox_width, bbox_height],"area": bbox_width * bbox_height,"segmentation": [],"iscrowd": 0}coco_data["annotations"].append(ann_entry)next_ann_id += 1except Exception as e:print(f"处理文件 {xml_file} 时出错: {str(e)}")# 保存为JSON文件with open(output_json, 'w') as f:json.dump(coco_data, f, indent=2)print(f"转换完成!")print(f"共处理 {len(coco_data['images'])} 张图像")print(f"共处理 {len(coco_data['annotations'])} 个标注")print(f"共发现 {len(coco_data['categories'])} 个类别")print(f"结果已保存到: {output_json}")# 保存类别映射文件category_map_path = os.path.join(os.path.dirname(output_json), 'category_mapping.txt')with open(category_map_path, 'w') as f:for name, cid in category_dict.items():f.write(f"{name}: {cid}\n")print(f"类别映射已保存到: {category_map_path}")return coco_dataif __name__ == "__main__":args = parse_args()# 运行转换coco_data = convert_xml_to_coco(args.xml_dir,args.img_dir,args.output_json,args.copy_images,args.output_img_dir)

调用：生成coco的json

python xml_to_coco.py --xml_dir train2017 --img_dir train2017 --output_json annotations/instances_train2017.json

python xml_to_coco.py --xml_dir val2017 --img_dir val2017 --output_json annotations/instances_val2017.json

数据集结构图：

然后训练：

python tools/train.py  --config=configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml   --use-amp --seed=0

转换onnx

python tools/export_onnx.py -c=configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml -r last.pth --check

转换trt，python 版本

python tools/export_trt.py -i model.onnx

或者装了tensorrt 的用直接命令行。

tensorrt 版本要大于8.5.2，不然有的算子不支持，会报错。

trtexec --onnx=model.onnx --saveEngine=model.trt

上面python 文件夹whl可以直接pip install tensorrt-8.6.0-cp39-none-win_amd64.whl

安装tensorrt python版本。针对直接装python装不上的情况。

生成的权重还是挺大的，个人感觉没有yolo好用。

SQL Server 2025 预览版发布：AI深度集成、开发者体验飞跃与混合云新篇章

Composer 的 PHP 依赖库提交教程

Linux安装LLaMA Factory

Python数字信号处理——利用块间系数相关性的DCT域鲁棒盲图像水印（PyQT5界面）

linux云计算学习第八周，第九周

sqli-labs靶场54-65关（次数限制，数据更新）

Python爬虫实战：研究Mr. Queue相关技术

【图像处理入门】7. 特征描述子：从LBP到HOG的特征提取之道

智能土木通 - 土木工程专业知识问答系统02-RAG检索模块搭建

幂级数（0，R）； R ；（R，+oo）

图数据库neo4j部分用法浅讲

计算机网络学习笔记：TCP三报文握手、四报文挥手

lua版的Frpc

网页后端开发（基础2--maven单元测试）

卷积神经网络中的通道注意力机制

Vue 3.5.13 中 `defineModel` 的局限性及解决方案

SpringAI+DeepSeek大模型应用开发——6基于MongDB持久化对话

使用 MCP 驱动的分布式智能扩展 Space-O-RAN

sql解析，日期 trunc sysdate

Leetcode 刷题记录 12 —— 二叉树第三弹

做建材营销型网站/小红书seo是什么

长期供应网站设计制作/磁力猫最好磁力搜索引擎

微信网站如何做/口碑好网络营销电话

优品惠网站建设需求/2020最近的新闻大事10条

html5+css3网站模板/seo课程培训要多少钱

重庆做企业年报在哪个网站做/2023今日新闻头条

相关文章：