当前位置：首页 > news >正文

YOLO脚本合集

news 2025/9/23 14:25:51

1. 批量移除xml标注中的某一个类别标签

#  批量移除xml标注中的某一个类别标签
import xml.etree.cElementTree as ET
import os

# xml文件路径
xml_path = r'./Annotations'
xml_files = os.listdir(xml_path)
# 需要删除的类别名称
CLASSES = ["person"]

for axml in xml_files:
    path_xml = os.path.join(xml_path, axml)
    tree = ET.parse(path_xml)
    root = tree.getroot()
    for child in root.findall('object'):
        name = child.find('name').text
        if name in CLASSES:
            root.remove(child)
    tree.write(os.path.join(xml_path, axml))



# import xml.etree.cElementTree as ET
# import os
# import shutil
# import logging

# # 配置日志
# logging.basicConfig(filename='xml_process.log', level=logging.INFO,
#                     format='%(asctime)s - %(levelname)s - %(message)s')

# def process_xml(xml_path, classes_to_remove):
#     backup_path = os.path.join(xml_path, '_backup')
#     os.makedirs(backup_path, exist_ok=True)
    
#     for filename in os.listdir(xml_path):
#         if not filename.endswith('.xml'):
#             continue
            
#         src_path = os.path.join(xml_path, filename)
#         dest_path = os.path.join(backup_path, filename)
        
#         try:
#             # 备份原始文件
#             shutil.copy2(src_path, dest_path)
#             logging.info(f"已备份文件: {filename}")
            
#             # 解析XML
#             tree = ET.parse(src_path, parser=ET.XMLParser(encoding='utf-8'))
#             root = tree.getroot()
            
#             # 查找并删除目标节点
#             removed_count = 0
#             for obj in root.findall('.//{*}object'):  # 处理命名空间
#                 name_elem = obj.find('.//{*}name')
#                 if name_elem is not None and name_elem.text.lower() in [c.lower() for c in classes_to_remove]:
#                     root.remove(obj)
#                     removed_count += 1
            
#             # 保存修改
#             tree.write(src_path, encoding='utf-8', xml_declaration=True)
#             logging.info(f"已处理文件: {filename}, 删除 {removed_count} 个标注")
            
#         except ET.ParseError as e:
#             logging.error(f"XML解析错误 {filename}: {str(e)}")
#         except Exception as e:
#             logging.error(f"处理文件 {filename} 时发生错误: {str(e)}")

# if __name__ == "__main__":
#     xml_folder = r'./Annotations'
#     target_classes = ["person"]
#     process_xml(xml_folder, target_classes)
#     print("批量处理完成，请检查日志文件 xml_process.log")

2. 修改标签的label名字

"""
使用python xml解析树解析xml文件，批量修改xml文件里object节点下name节点的text
"""


import glob
import xml.etree.ElementTree as ET
path = r'./Annotations'    # xml文件夹路径
i = 0
for xml_file in glob.glob(path + '/*.xml'):
    # print(xml_file)
    tree = ET.parse(xml_file) 
    obj_list = tree.getroot().findall('object') 
    for per_obj in obj_list:
        if per_obj[0].text == 'dangerous-behavior':    # 找到错误的标签“ dangerous-behavior ”
            per_obj[0].text = 'climbing'    # 修改成“自己想要的标签名”
            i = i+1

    tree.write(xml_file)    # 将改好的文件重新写入，会覆盖原文件
print('共完成了{}处替换'.format(i))

3. 划分YOLOv11数据集

# 该代码用于划分yolov11数据集
import os
import shutil
import random
 
# random.seed(0)  #随机种子，可自选开启
def split_data(file_path, label_path, new_file_path, train_rate, val_rate, test_rate):
	images = os.listdir(file_path)
	labels = os.listdir(label_path)
	images_no_ext = {os.path.splitext(image)[0]: image for image in images}
	labels_no_ext = {os.path.splitext(label)[0]: label for label in labels}
	matched_data = [(img, images_no_ext[img], labels_no_ext[img]) for img in images_no_ext if img in labels_no_ext]
 
	unmatched_images = [img for img in images_no_ext if img not in labels_no_ext]
	unmatched_labels = [label for label in labels_no_ext if label not in images_no_ext]
	if unmatched_images:
		print("未匹配的图片文件:")
		for img in unmatched_images:
			print(images_no_ext[img])
	if unmatched_labels:
		print("未匹配的标签文件:")
		for label in unmatched_labels:
			print(labels_no_ext[label])
 
	random.shuffle(matched_data)
	total = len(matched_data)
	train_data = matched_data[:int(train_rate * total)]
	val_data = matched_data[int(train_rate * total):int((train_rate + val_rate) * total)]
	test_data = matched_data[int((train_rate + val_rate) * total):]
 
	# 处理训练集
	for img_name, img_file, label_file in train_data:
		old_img_path = os.path.join(file_path, img_file)
		old_label_path = os.path.join(label_path, label_file)
		new_img_dir = os.path.join(new_file_path, 'train', 'images')
		new_label_dir = os.path.join(new_file_path, 'train', 'labels')
		os.makedirs(new_img_dir, exist_ok=True)
		os.makedirs(new_label_dir, exist_ok=True)
		shutil.copy(old_img_path, os.path.join(new_img_dir, img_file))
		shutil.copy(old_label_path, os.path.join(new_label_dir, label_file))
	# 处理验证集
	for img_name, img_file, label_file in val_data:
		old_img_path = os.path.join(file_path, img_file)
		old_label_path = os.path.join(label_path, label_file)
		new_img_dir = os.path.join(new_file_path, 'val', 'images')
		new_label_dir = os.path.join(new_file_path, 'val', 'labels')
		os.makedirs(new_img_dir, exist_ok=True)
		os.makedirs(new_label_dir, exist_ok=True)
		shutil.copy(old_img_path, os.path.join(new_img_dir, img_file))
		shutil.copy(old_label_path, os.path.join(new_label_dir, label_file))
	# 处理测试集
	for img_name, img_file, label_file in test_data:
		old_img_path = os.path.join(file_path, img_file)
		old_label_path = os.path.join(label_path, label_file)
		new_img_dir = os.path.join(new_file_path, 'test', 'images')
		new_label_dir = os.path.join(new_file_path, 'test', 'labels')
		os.makedirs(new_img_dir, exist_ok=True)
		os.makedirs(new_label_dir, exist_ok=True)
		shutil.copy(old_img_path, os.path.join(new_img_dir, img_file))
		shutil.copy(old_label_path, os.path.join(new_label_dir, label_file))
	print("数据集已划分完成")
 
if __name__ == '__main__':
	file_path = r"./data/JPEGImages"  # 图片文件夹
	label_path = r'./data/labels'  # 标签文件夹
	new_file_path = r"./VOCdevkit"  # 新数据存放位置
	split_data(file_path, label_path, new_file_path, train_rate=0.8, val_rate=0.1, test_rate=0.1)

本文代码皆来自于CSDN博主，本文是为了方便自己使用做了汇总，参考：

笑脸惹桃花-CSDN博客

voc xml标签批量修改，更改、删除指定类别-CSDN博客

python批量修改labelme标注的json文件中的标签名_labelme批量修改标签-CSDN博客

查看全文

http://www.dtcms.com/a/114526.html