Labelme格式转yolo格式
将Labelme标注数据转为yolo格式的数据,自动获取类别,生成yolo数据yaml文件,代码如下:
import os
import shutil
import numpy as np
import json
from glob import glob
import cv2
from sklearn.model_selection import train_test_splitdef convert(size, box):dw = 1. / size[0]dh = 1. / size[1]x = (box[0] + box[1]) / 2.0 - 1y = (box[2] + box[3]) / 2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn x, y, w, hdef extract_all_classes(json_files):"""从所有 JSON 文件中提取唯一类别"""all_labels = set()for json_path in json_files:try:with open(json_path, "r", encoding="utf-8") as f:data = json.load(f)for shape in data.get("shapes", []):label = shape.get("label", "").strip().lower()if label=="kengcao\\":print(json_path,label)if label:all_labels.add(label)except Exception as e:print(f"Warning: Failed to parse {json_path}: {e}")return sorted(list(all_labels)) # 排序保证顺序一致def change_2_yolo5(files, file_full_path_dic, classes, txt_Name):imag_name = []for json_file_ in files:json_filename = file_full_path_dic[json_file_]out_txt_path = json_filename.replace('.json', '.txt')out_file = open(out_txt_path, 'w')try:with open(json_filename, "r", encoding="utf-8") as f:json_file = json.load(f)imag_name.append(json_file_)image_ext = json_file['imagePath'].split('.')[-1]image_path = json_filename.replace('.json', '.' + image_ext)if not os.path.exists(image_path):print(f"Image not found: {image_path}")continueimage = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), -1)if image is None:print(f"Failed to load image: {image_path}")continueheight, width = image.shape[:2]for multi in json_file.get("shapes", []):points = np.array(multi["points"])if len(points) < 2:continuexmin = max(0, min(points[:, 0]))xmax = max(0, max(points[:, 0]))ymin = max(0, min(points[:, 1]))ymax = max(0, max(points[:, 1]))if xmax <= xmin or ymax <= ymin:continuelabel = multi["label"].strip().lower()if label not in classes:continue # 跳过未知类别(理论上不会发生)cls_id = classes.index(label)b = (float(xmin), float(xmax), float(ymin), float(ymax))bb = convert((width, height), b)out_file.write(f"{cls_id} {' '.join(map(str, bb))}\n")except Exception as ex:print(f"Error processing {json_filename}: {ex}")passfinally:out_file.close()return imag_namedef image_txt_copy(files, dic_filepath, dst_img_path, dst_txt_path):for file in files:json_path = dic_filepath[file]try:with open(json_path, "r", encoding="utf-8") as f:json_file = json.load(f)image_ext = json_file['imagePath'].split('.')[-1]image_src = json_path.replace('.json', '.' + image_ext)txt_src = json_path.replace('.json', '.txt')# 检查 txt 是否非空if os.path.exists(txt_src) and os.path.getsize(txt_src) > 0:dst_img = os.path.join(dst_img_path, f"{file}.{image_ext}")dst_txt = os.path.join(dst_txt_path, f"{file}.txt")shutil.copy(image_src, dst_img)shutil.copy(txt_src, dst_txt)except Exception as ex:print(f"Copy error for {file}: {ex}")passdef generate_yaml(yaml_path, classes, is_use_test=False):"""生成 YOLOv5 的 dataset.yaml 文件"""data = {'train': './images/train','val': './images/val','nc': len(classes),'names': classes}if is_use_test:data['test'] = './images/test'with open(yaml_path, 'w', encoding='utf-8') as f:yaml_content = "path: ../VOC_aa\n"yaml_content += f"train: {data['train']}\n"yaml_content += f"val: {data['val']}\n"if is_use_test:yaml_content += f"test: {data['test']}\n"yaml_content += f"nc: {data['nc']}\n"yaml_content += f"names: {repr(data['names'])}\n"f.write(yaml_content)print(f"YOLOv5 YAML config saved to: {yaml_path}")if __name__ == '__main__':# 创建目录base_dir = 'VOC_aa'for split in ["train", "val", "test"]:os.makedirs(os.path.join(base_dir, 'images', split), exist_ok=True)os.makedirs(os.path.join(base_dir, 'labels', split), exist_ok=True)labelme_path = "./labelme_voc/"isUseTest = True# 获取所有 JSON 文件json_files = glob(os.path.join(labelme_path, "*.json"))if not json_files:raise ValueError(f"No JSON files found in {labelme_path}")# 自动提取类别classes = extract_all_classes(json_files)print(f"Detected classes: {classes}")# 构建文件名映射dic_filepath = {}file_list = []for file in json_files:file_name = os.path.basename(file).replace('.json', '')file_list.append(file_name)dic_filepath[file_name] = file# 划分数据集trainval_files, test_files = train_test_split(file_list, test_size=0.2, random_state=55)train_files, val_files = train_test_split(trainval_files, test_size=0.1, random_state=55)# 转换为 YOLO 格式if isUseTest:test_name_list = change_2_yolo5(test_files, dic_filepath, classes, "test")image_txt_copy(test_name_list, dic_filepath, 'VOC_aa/images/test/', 'VOC_aa/labels/test/')val_name_list = change_2_yolo5(val_files, dic_filepath, classes, "val")image_txt_copy(val_name_list, dic_filepath, 'VOC_aa/images/val/', 'VOC_aa/labels/val/')train_name_list = change_2_yolo5(train_files, dic_filepath, classes, "train")image_txt_copy(train_name_list, dic_filepath, 'VOC_aa/images/train/', 'VOC_aa/labels/train/')# 清理临时 txt 文件(在 Labelme_VOC 中生成的)for txt_file in glob(os.path.join(labelme_path, "*.txt")):os.remove(txt_file)# 生成 YAML 配置文件generate_yaml('VOC_aa/dataset.yaml', classes, is_use_test=isUseTest)