当前位置：首页 > news >正文

目标检测如何将同时有方形框和旋转框的json/xml标注转为txt格式

news 2025/9/8 18:33:38

X-AnyLabeling标出保存json转yolo obb 5参数表示的txt文件

X-AnyLabeling标注后生成自己定义的json文件，图片上有旋转框和方形框，导出只能导出水平或者旋转标签，我需要全部转为水平标签格式
在这里插入图片描述

json文件

{"version": "3.2.1","flags": {},"shapes": [{"label": "cow","score": null,"points": [[547.560975609756,374.1463414634146],[685.3270740146849,332.2175289053928],[779.0,640.0],[641.2339015950713,681.9288125580218]],"group_id": null,"description": "","difficult": false,"shape_type": "rotation","flags": {},"attributes": {},"kie_linking": [],"direction": 5.987744470035866},{"label": "cow","score": null,"points": [[1008.0,484.0],[1179.0,417.0],[1311.0,753.0],[1140.0,820.0]],"group_id": null,"description": "","difficult": false,"shape_type": "rotation","flags": {},"attributes": {},"kie_linking": [],"direction": 5.909756668300514},{"label": "cow","score": null,"points": [[1359.0,423.0],[1494.0,442.0],[1446.0,779.0],[1311.0,760.0]],"group_id": null,"description": "","difficult": false,"shape_type": "rotation","flags": {},"attributes": {},"kie_linking": [],"direction": 0.1398223687841933},{"label": "cow","score": null,"points": [[1492.0,450.0],[1628.0,468.0],[1590.0,755.0],[1454.0,738.0]],"group_id": null,"description": "","difficult": false,"shape_type": "rotation","flags": {},"attributes": {},"kie_linking": [],"direction": 0.13158814145792064},{"label": "cow","score": null,"points": [[1610.0,468.0],[1750.0,453.0],[1779.0,735.0],[1639.0,749.0]],"group_id": null,"description": "","difficult": false,"shape_type": "rotation","flags": {},"attributes": {},"kie_linking": [],"direction": 6.17644963453045},{"label": "head","score": null,"points": [[641.4634146341463,465.60975609756093],[653.6585365853658,465.60975609756093],[653.6585365853658,477.80487804878044],[641.4634146341463,477.80487804878044]],"group_id": null,"description": "","difficult": false,"shape_type": "rectangle","flags": {},"attributes": {},"kie_linking": []},{"label": "head","score": null,"points": [[1169.5121951219512,641.2195121951219],[1184.1463414634145,641.2195121951219],[1184.1463414634145,647.3170731707316],[1169.5121951219512,647.3170731707316]],"group_id": null,"description": "","difficult": false,"shape_type": "rectangle","flags": {},"attributes": {},"kie_linking": []},{"label": "head","score": null,"points": [[1392.6829268292681,625.3658536585365],[1398.780487804878,625.3658536585365],[1398.780487804878,638.780487804878],[1392.6829268292681,638.780487804878]],"group_id": null,"description": "","difficult": false,"shape_type": "rectangle","flags": {},"attributes": {},"kie_linking": []},{"label": "head","score": null,"points": [[1529.2682926829268,633.9024390243902],[1535.3658536585365,633.9024390243902],[1535.3658536585365,643.6585365853658],[1529.2682926829268,643.6585365853658]],"group_id": null,"description": "","difficult": false,"shape_type": "rectangle","flags": {},"attributes": {},"kie_linking": []},{"label": "head","score": null,"points": [[1699.9999999999998,643.6585365853658],[1709.7560975609754,643.6585365853658],[1709.7560975609754,647.3170731707316],[1699.9999999999998,647.3170731707316]],"group_id": null,"description": "","difficult": false,"shape_type": "rectangle","flags": {},"attributes": {},"kie_linking": []}],"imagePath": "..\\0000001.png","imageData": null,"imageHeight": 1080,"imageWidth": 1920
}

转换脚本json_convert_txt

import os
import json
import math
import cv2
import numpy as npdef get_label_map(label_map_path):"""加载类别映射文件，创建类别名称到ID的字典。"""label_map = {}with open(label_map_path, 'r', encoding='utf-8') as f:for i, line in enumerate(f):label_map[line.strip()] = ireturn label_mapdef convert_point_to_yolo(points, img_w, img_h):"""将 Labelme 的四点坐标转换为 YOLO 的五参数格式。返回: [x_center, y_center, width, height, angle]"""# 1. 确保points是NumPy数组，这是解决报错的关键points_np = np.array(points, dtype=np.float32).reshape(-1, 1, 2)# 2. 计算最小外接旋转矩形rect = cv2.minAreaRect(points_np)(center_x, center_y), (w, h), angle = rect# 3. 调整角度，使width为长边，angle在[-90, 0)if w < h:w, h = h, wangle += 90# 4. 将角度从度数转换为弧度，并确保范围在[0, pi)# 许多OBB模型将角度归一化到[0, pi]或[-pi/2, pi/2]angle_rad = (angle + 90) * math.pi / 180.0# 5. 归一化坐标x_center_normalized = center_x / img_wy_center_normalized = center_y / img_hwidth_normalized = w / img_wheight_normalized = h / img_h# 返回统一的5参数格式return [x_center_normalized,y_center_normalized,width_normalized,height_normalized,angle_rad]def process_json_to_yolo_txt(json_dir, output_dir, label_map):"""遍历文件夹中的所有JSON文件并转换为YOLO格式。"""if not os.path.exists(output_dir):os.makedirs(output_dir)json_files = [f for f in os.listdir(json_dir) if f.endswith('.json')]for json_file in json_files:json_path = os.path.join(json_dir, json_file)with open(json_path, 'r', encoding='utf-8') as f:data = json.load(f)img_w = data['imageWidth']img_h = data['imageHeight']# 创建对应的txt文件txt_filename = os.path.splitext(json_file)[0] + '.txt'txt_path = os.path.join(output_dir, txt_filename)with open(txt_path, 'w', encoding='utf-8') as f:for shape in data['shapes']:label = shape['label']points = shape['points']if label not in label_map:print(f"警告: 类别 '{label}' 未在 label_map.txt 中找到，跳过此标注。")continueclass_id = label_map[label]# 统一转换为 YOLO OBB 格式yolo_params = convert_point_to_yolo(points, img_w, img_h)# 写入txt文件line = f"{class_id} {' '.join(map(str, yolo_params))}\n"f.write(line)if __name__ == "__main__":# 需要先安装opencv-python库# pip install opencv-pythonjson_folder = r'E:\deeplearning\torch\cow_data\json'  # 存放JSON文件的文件夹output_folder = r'E:\deeplearning\torch\cow_data\labels'  # 输出YOLO TXT文件的文件夹label_map_file = r'E:\deeplearning\torch\cow_data\classes.txt'  # 类别映射文件# 获取类别映射label_map = get_label_map(label_map_file)print("加载的类别映射:", label_map)# 执行转换process_json_to_yolo_txt(json_folder, output_folder, label_map)print(f"所有JSON文件已转换完成，保存在 '{output_folder}' 文件夹中。")

输出yolo obb 5参数表示

<class_id> <x_center> <y_center> <width> <height> <angle>

1 0.3359375 0.4287037037037037 0.003125 0.003703703703703704 0.0
1 0.609375 0.5796296296296296 0.004166666666666667 0.005555555555555556 0.0
1 0.7307291666666667 0.5574074074074075 0.0020833333333333333 0.003703703703703704 0.0
1 0.7958333333333333 0.5944444444444444 0.004166666666666667 0.003703703703703704 0.0
1 0.88671875 0.5888888888888889 0.0015625 0.003703703703703704 0.0
0 0.34510838541666666 0.4643143518518518 0.07709296874999999 0.2721395370370371 2.863569
0 0.6000128124999999 0.5552342592592593 0.0955703125 0.29858675925925926 2.743585
0 0.7328688020833333 0.5300550925925926 0.06547239583333334 0.25863305555555555 0.288572
0 0.8018531249999999 0.5706671296296296 0.07168947916666667 0.3166819444444445 0.336216
0 0.8861483333333333 0.5624073148148148 0.070671875 0.26672990740740743 0.045002

VOC的xml文件转yolo obb的5参数表示

这是用rolabelimg工具标注后保存的xml文件，我们需要yolo obb的5参数txt文件

<annotation verified="no"><folder>camera</folder><filename>0000001</filename><path>E:\deeplearning\torch\cow_data\0000001.png</path><source><database>Unknown</database></source><size><width>1920</width><height>1080</height><depth>3</depth></size><segmented>0</segmented><object><type>bndbox</type><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>642</xmin><ymin>461</ymin><xmax>648</xmax><ymax>465</ymax></bndbox></object><object><type>bndbox</type><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>1166</xmin><ymin>623</ymin><xmax>1174</xmax><ymax>629</ymax></bndbox></object><object><type>bndbox</type><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>1401</xmin><ymin>600</ymin><xmax>1405</xmax><ymax>604</ymax></bndbox></object><object><type>bndbox</type><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>1524</xmin><ymin>640</ymin><xmax>1532</xmax><ymax>644</ymax></bndbox></object><object><type>bndbox</type><name>head</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><bndbox><xmin>1701</xmin><ymin>634</ymin><xmax>1704</xmax><ymax>638</ymax></bndbox></object><object><type>robndbox</type><name>cow</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><robndbox><cx>662.6081</cx><cy>501.4595</cy><w>148.0185</w><h>293.9107</h><angle>2.863569</angle></robndbox></object><object><type>robndbox</type><name>cow</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><robndbox><cx>1152.0246</cx><cy>599.653</cy><w>183.495</w><h>322.4737</h><angle>2.743585</angle></robndbox></object><object><type>robndbox</type><name>cow</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><robndbox><cx>1407.1081</cx><cy>572.4595</cy><w>125.707</w><h>279.3237</h><angle>0.288572</angle></robndbox></object><object><type>robndbox</type><name>cow</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><robndbox><cx>1539.558</cx><cy>616.3205</cy><w>137.6438</w><h>342.0165</h><angle>0.336216</angle></robndbox></object><object><type>robndbox</type><name>cow</name><pose>Unspecified</pose><truncated>0</truncated><difficult>0</difficult><robndbox><cx>1701.4048</cx><cy>607.3999</cy><w>135.69</w><h>288.0683</h><angle>0.045002</angle></robndbox></object>
</annotation>

转换脚本xml_convert_txt

import os
import xml.etree.ElementTree as ET
import math# --- 配置路径 ---
XML_FOLDER = r'E:\deeplearning\torch\cow_data\labels'  # 输入XML文件所在的文件夹
OUTPUT_FOLDER = r'E:\deeplearning\torch\cow_data\labels'  # 输出YOLO TXT文件的文件夹
CLASSES_FILE = r'E:\deeplearning\torch\cow_data\classes.txt'  # 包含类别名称的文件，每行一个# --- 函数定义 ---
def get_label_map(file_path):"""加载类别名称并创建类别名称到ID的字典。"""label_map = {}with open(file_path, 'r', encoding='utf-8') as f:for i, line in enumerate(f):label_map[line.strip()] = ireturn label_mapdef convert_to_yolo_obb(bbox, img_w, img_h, is_rotated=False):"""将边界框坐标转换为YOLO OBB 5参数格式。参数:bbox (dict): 包含边界框坐标的字典。img_w (int): 图像宽度。img_h (int): 图像高度。is_rotated (bool): 如果边界框是旋转的，则为True，否则为False。返回:list: 包含5个归一化浮点数的列表 [x_center, y_center, width, height, angle]。"""if is_rotated:# 针对旋转边界框 (<robndbox>)cx = float(bbox['cx'])cy = float(bbox['cy'])w = float(bbox['w'])h = float(bbox['h'])angle = float(bbox['angle'])# YOLO OBB 通常使用弧度，这里直接使用XML提供的弧度值yolo_angle = angleelse:# 针对标准边界框 (<bndbox>)xmin = float(bbox['xmin'])ymin = float(bbox['ymin'])xmax = float(bbox['xmax'])ymax = float(bbox['ymax'])# 计算中心点、宽度和高度cx = (xmin + xmax) / 2cy = (ymin + ymax) / 2w = xmax - xminh = ymax - ymin# 水平边界框的角度为0yolo_angle = 0.0# 归一化坐标x_center_norm = cx / img_wy_center_norm = cy / img_hw_norm = w / img_wh_norm = h / img_hreturn [x_center_norm, y_center_norm, w_norm, h_norm, yolo_angle]def process_xml_to_yolo_obb_txt(xml_dir, output_dir, label_map):"""处理目录中的所有XML文件并转换为YOLO TXT格式。"""if not os.path.exists(output_dir):os.makedirs(output_dir)xml_files = [f for f in os.listdir(xml_dir) if f.endswith('.xml')]for xml_file in xml_files:xml_path = os.path.join(xml_dir, xml_file)try:tree = ET.parse(xml_path)root = tree.getroot()except ET.ParseError:print(f"跳过格式不正确的XML文件: {xml_file}")continue# 获取图像尺寸size = root.find('size')img_w = int(size.find('width').text)img_h = int(size.find('height').text)# 创建对应的输出TXT文件txt_filename = os.path.splitext(xml_file)[0] + '.txt'txt_path = os.path.join(output_dir, txt_filename)with open(txt_path, 'w', encoding='utf-8') as f:for obj in root.findall('object'):obj_name = obj.find('name').text# 获取边界框类型bbox_type = obj.find('type').textif obj_name not in label_map:print(f"警告: 类别 '{obj_name}' 未在 {CLASSES_FILE} 中找到，跳过此标注。")continueclass_id = label_map[obj_name]bbox_data = {}is_rotated = (bbox_type == 'robndbox')if is_rotated:robndbox = obj.find('robndbox')bbox_data['cx'] = robndbox.find('cx').textbbox_data['cy'] = robndbox.find('cy').textbbox_data['w'] = robndbox.find('w').textbbox_data['h'] = robndbox.find('h').textbbox_data['angle'] = robndbox.find('angle').textelse:bndbox = obj.find('bndbox')bbox_data['xmin'] = bndbox.find('xmin').textbbox_data['ymin'] = bndbox.find('ymin').textbbox_data['xmax'] = bndbox.find('xmax').textbbox_data['ymax'] = bndbox.find('ymax').textyolo_params = convert_to_yolo_obb(bbox_data, img_w, img_h, is_rotated)line = f"{class_id} {' '.join(map(str, yolo_params))}\n"f.write(line)print(f"转换完成。文件已保存到 '{output_dir}' 文件夹。")# --- 主程序入口 ---
if __name__ == "__main__":if not os.path.exists(XML_FOLDER):print(f"错误: 输入文件夹未找到: {XML_FOLDER}")elif not os.path.exists(CLASSES_FILE):print(f"错误: 类别文件未找到: {CLASSES_FILE}")else:label_map = get_label_map(CLASSES_FILE)print("已加载类别映射:", label_map)process_xml_to_yolo_obb_txt(XML_FOLDER, OUTPUT_FOLDER, label_map)

输出yolo obb 5参数表示

<class_id> <x_center> <y_center> <width> <height> <angle>

0 0.28518800813008127 0.3464317976513098 0.3569411843826484 0.3076088230605489 0.40572916666666664 0.5925925925925926 0.33397599041409964 0.6314155671833536
0 0.525 0.44814814814814813 0.6140625 0.3861111111111111 0.6828125 0.6972222222222222 0.59375 0.7592592592592593
0 0.7078125 0.39166666666666666 0.778125 0.40925925925925927 0.753125 0.7212962962962963 0.6828125 0.7037037037037037
0 0.7770833333333333 0.4166666666666667 0.8479166666666667 0.43333333333333335 0.828125 0.6990740740740741 0.7572916666666667 0.6833333333333333
0 0.8385416666666666 0.43333333333333335 0.9114583333333334 0.41944444444444445 0.9265625 0.6805555555555556 0.8536458333333333 0.6935185185185185
1 0.3340955284552845 0.43112014453477865 0.3404471544715447 0.43112014453477865 0.3404471544715447 0.44241192411924113 0.3340955284552845 0.44241192411924113
1 0.6091209349593496 0.5937217705510388 0.6167428861788617 0.5937217705510388 0.6167428861788617 0.59936766034327 0.6091209349593496 0.59936766034327
1 0.7253556910569104 0.5790424570912375 0.7285315040650406 0.5790424570912375 0.7285315040650406 0.5914634146341463 0.7253556910569104 0.5914634146341463
1 0.7964939024390244 0.5869467028003613 0.7996697154471544 0.5869467028003613 0.7996697154471544 0.5959801264679313 0.7964939024390244 0.5959801264679313
1 0.8854166666666665 0.5959801264679313 0.8904979674796747 0.5959801264679313 0.8904979674796747 0.59936766034327 0.8854166666666665 0.59936766034327

验证工具check_yolo

导入图片和标注的yolo obb 5参数 txt文件，验证标注结果是否正确

import os
import cv2
import numpy as np
import math# --- 配置路径 ---
IMAGE_DIR = r'E:\deeplearning\torch\cow_data'  # 存放图片的文件夹
LABEL_DIR = r'E:\deeplearning\torch\cow_data\labels'  # 存放YOLO TXT文件的文件夹
LABEL_MAP_FILE = r'E:\deeplearning\torch\cow_data\classes.txt'  # 类别映射文件
OUTPUT_DIR = r'E:\deeplearning\torch\cow_data\labels'  # 保存可视化结果的文件夹def get_label_map(label_map_path):"""加载类别映射文件，创建ID到类别名称的字典。"""label_map = {}with open(label_map_path, 'r', encoding='utf-8') as f:for i, line in enumerate(f):label_map[i] = line.strip()return label_mapdef draw_rotated_bbox(img, bbox, class_name, color):"""在图片上绘制旋转边界框。bbox格式: [x_center, y_center, width, height, angle] (归一化)"""img_h, img_w = img.shape[:2]# 将归一化坐标转换为像素坐标x_c = int(bbox[0] * img_w)y_c = int(bbox[1] * img_h)w = int(bbox[2] * img_w)h = int(bbox[3] * img_h)angle_rad = bbox[4]# 将弧度转换为度数，并调整角度以匹配OpenCV约定angle_deg = angle_rad * 180 / math.pi - 90# 获取旋转矩形的四个角点rect = ((x_c, y_c), (w, h), angle_deg)box = cv2.boxPoints(rect)box = np.intp(box)# 绘制边界框cv2.drawContours(img, [box], 0, color, 2)# 绘制类别名称label_size, base_line = cv2.getTextSize(class_name, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)# 确保文本框不超出图像边界text_x = int(np.min(box[:, 0]))text_y = int(np.min(box[:, 1])) - 5if text_y < 0:text_y = int(np.min(box[:, 1])) + label_size[1] + 5cv2.putText(img, class_name, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1, cv2.LINE_AA)def main():# 创建输出文件夹if not os.path.exists(OUTPUT_DIR):os.makedirs(OUTPUT_DIR)# 获取类别映射label_map = get_label_map(LABEL_MAP_FILE)print("加载的类别映射:", label_map)# 为每个类别随机分配一个颜色np.random.seed(0)  # 保证每次运行颜色一致colors = {class_id: [int(c) for c in np.random.randint(0, 256, 3)] for class_id in label_map}# 遍历所有YOLO TXT文件label_files = [f for f in os.listdir(LABEL_DIR) if f.endswith('.txt')]for label_file in label_files:# 找到对应的图片文件img_name = os.path.splitext(label_file)[0] + '.png'img_path = os.path.join(IMAGE_DIR, img_name)if not os.path.exists(img_path):print(f"警告: 未找到图片文件 {img_path}，跳过。")continue# 读取图片img = cv2.imread(img_path)if img is None:print(f"警告: 无法读取图片 {img_path}，跳过。")continue# 读取标注文件label_path = os.path.join(LABEL_DIR, label_file)with open(label_path, 'r', encoding='utf-8') as f:lines = f.readlines()# 绘制所有边界框for line in lines:parts = line.strip().split()class_id = int(parts[0])bbox = [float(p) for p in parts[1:]]# 获取类别名称和颜色class_name = label_map.get(class_id, 'unknown')color = colors.get(class_id, [255, 255, 255])# 绘制边界框 (适用于5参数OBB格式)draw_rotated_bbox(img, bbox, class_name, color)# 保存可视化结果output_path = os.path.join(OUTPUT_DIR, img_name)cv2.imwrite(output_path, img)print(f"已处理并保存: {output_path}")if __name__ == '__main__':main()