当前位置：首页 > news >正文

yolov12 导出onnx

news 2025/9/23 7:32:09

yolov12 导出onnx

这个可以转tensorrt

转tensorrt：

trt_infer 单类别

trt_infer pycuda

yolov12 导出onnx

这个测试ok，但是不能转tensorrt

import torch
import sys
from pathlib import Path# 载入模型（这里假设和 YOLOv5/YOLOv8 类似）
ckpt = torch.load(r"E:\data\models\person_det\best_0814.pt", map_location="cpu")  # 如果有自定义，改成对应的 load 函数
model = ckpt['ema'] or ckpt['model']
model.float().eval()# 输入张量 (batch, channels, height, width)
dummy_input = torch.randn(1, 3, 640, 640)# 导出 ONNX
torch.onnx.export(model,                      # 模型dummy_input,                # 输入样例"yolov12.onnx",             # 导出的文件名input_names=["images"],     # 输入节点名字output_names=["output"],    # 输出节点名字opset_version=12,           # 常用 11 或 12dynamic_axes={              # 动态 batch / 宽高"images": {0: "batch", 2: "height", 3: "width"},"output": {0: "batch"},}
)
print("导出成功: yolov12.onnx")

这个可以转tensorrt

import torch
import sys
from pathlib import Path# 载入模型（这里假设和 YOLOv5/YOLOv8 类似）
ckpt = torch.load(r"E:\data\models\person_det\best_0814.pt", map_location="cpu")  # 如果有自定义，改成对应的 load 函数
model = ckpt['ema'] or ckpt['model']
model.float().eval()# 输入张量 (batch, channels, height, width)
inputs = torch.randn(1, 3, 640, 640)torch.onnx.export(model, inputs, "model.onnx",opset_version=12,input_names=["input"],output_names=["output"],# 移除dynamic_axes参数
)

转tensorrt：

import tensorrt as trtonnx_file = "yolov12.onnx"
engine_file = "yolov12_fp16.engine"logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
network_flags = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
network = builder.create_network(network_flags)
parser = trt.OnnxParser(network, logger)with open(onnx_file, "rb") as f:if not parser.parse(f.read()):for error in range(parser.num_errors):print(parser.get_error(error))raise RuntimeError("ONNX 解析失败")config = builder.create_builder_config()
config.max_workspace_size = 4 << 30  # 4GB
if builder.platform_has_fast_fp16:config.set_flag(trt.BuilderFlag.FP16)engine = builder.build_engine(network, config)
with open(engine_file, "wb") as f:f.write(engine.serialize())print("✅ TensorRT FP16 engine 导出完成:", engine_file)

trt_infer 单类别

# coding=utf-8
import sys
import os
current_dir = os.path.dirname(os.path.abspath(__file__))
os.chdir(current_dir)
print('current_dir', current_dir)
paths = [current_dir, current_dir+'/../']
paths.append(os.path.join(current_dir, 'src'))
for path in paths:sys.path.insert(0, path)os.environ['PYTHONPATH'] = (os.environ.get('PYTHONPATH', '') + ':' + path).strip(':')import tensorrt as trt
import numpy as np# 兼容旧TensorRT API
if not hasattr(np, 'bool'):np.bool = np.bool_
import cv2
import time
import osclass YOLOv12TRTNoPycuda:def __init__(self, onnx_path, engine_path=None, fp16_mode=True):"""不使用pycuda的YOLOv12 TensorRT推理器Args:onnx_path: ONNX模型路径engine_path: 引擎文件路径，如果为None则从ONNX生成fp16_mode: 是否使用FP16精度"""self.logger = trt.Logger(trt.Logger.WARNING)self.engine = Noneself.context = Noneself.input_shape = Noneself.output_shapes = []# 构建或加载引擎if engine_path is None or not os.path.exists(engine_path):self.engine = self.build_engine(onnx_path, fp16_mode)if engine_path is not None:self.save_engine(engine_path)else:self.engine = self.load_engine(engine_path)self.context = self.engine.create_execution_context()self.setup_io_buffers()def build_engine(self, onnx_path, fp16_mode):"""从ONNX构建TensorRT引擎"""builder = trt.Builder(self.logger)network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))parser = trt.OnnxParser(network, self.logger)# 解析ONNX模型with open(onnx_path, 'rb') as model:if not parser.parse(model.read()):print('ERROR: Failed to parse the ONNX file.')for error in range(parser.num_errors):print(parser.get_error(error))return None# 配置构建器config = builder.create_builder_config()config.max_workspace_size = 1 << 30  # 1GBif fp16_mode and builder.platform_has_fast_fp16:config.set_flag(trt.BuilderFlag.FP16)print("Building TensorRT engine. This may take a few minutes...")engine = builder.build_engine(network, config)return enginedef load_engine(self, engine_path):"""从文件加载TensorRT引擎"""with open(engine_path, 'rb') as f:runtime = trt.Runtime(self.logger)return runtime.deserialize_cuda_engine(f.read())def save_engine(self, engine_path):"""保存引擎到文件"""if self.engine is not None:with open(engine_path, 'wb') as f:f.write(self.engine.serialize())print(f"Engine saved to {engine_path}")def setup_io_buffers(self):"""设置输入输出缓冲区"""# 获取输入输出信息for i in range(self.engine.num_bindings):name = self.engine.get_binding_name(i)shape = self.engine.get_binding_shape(i)dtype = self.engine.get_binding_dtype(i)if self.engine.binding_is_input(i):self.input_shape = shapeprint(f"Input: {name}, Shape: {shape}, Type: {dtype}")else:self.output_shapes.append((name, shape, dtype))print(f"Output: {name}, Shape: {shape}, Type: {dtype}")def allocate_buffers(self):"""分配输入输出缓冲区（使用numpy数组）"""# 输入缓冲区input_size = trt.volume(self.input_shape) * np.dtype(np.float32).itemsizeinput_host = np.empty(self.input_shape, dtype=np.float32)# 输出缓冲区output_hosts = []for name, shape, dtype in self.output_shapes:nptype = trt.nptype(dtype)output_host = np.empty(shape, dtype=nptype)output_hosts.append(output_host)return input_host, output_hostsdef preprocess(self, image, input_shape=(640, 640)):"""图像预处理"""# 调整大小并保持宽高比h, w = image.shape[:2]scale = min(input_shape[0] / h, input_shape[1] / w)new_h, new_w = int(h * scale), int(w * scale)# 调整大小resized = cv2.resize(image, (new_w, new_h))# 创建填充后的图像padded = np.full((input_shape[0], input_shape[1], 3), 114, dtype=np.uint8)padded[:new_h, :new_w] = resized# 转换格式padded = padded.astype(np.float32) / 255.0  # 归一化padded = padded.transpose(2, 0, 1)  # HWC to CHWpadded = np.ascontiguousarray(padded)return padded, scale, (h, w)def inference(self, image):"""执行推理"""# 预处理input_img, scale, original_shape = self.preprocess(image)input_data = np.expand_dims(input_img, axis=0)  # 添加batch维度# 分配缓冲区input_host, output_hosts = self.allocate_buffers()# 复制输入数据np.copyto(input_host, input_data)# 设置绑定bindings = [None] * (1 + len(self.output_shapes))  # 输入 + 输出# 输入绑定bindings[0] = input_host.ctypes.data# 输出绑定for i, output_host in enumerate(output_hosts):bindings[1 + i] = output_host.ctypes.data# 执行推理self.context.execute_v2(bindings=bindings)return output_hosts, scale, original_shapedef postprocess(self, outputs, scale, original_shape, conf_threshold=0.25, iou_threshold=0.45):"""后处理 - 解析检测结果"""# 根据您的模型输出格式调整# 假设输出是 [batch, num_detections, 6] 格式 (x1, y1, x2, y2, conf, class_id)detections = []h, w = original_shapefor output in outputs:# 根据实际输出形状调整if output.ndim == 3:  # [batch, num_det, 6]batch_detections = output[0]  # 取第一个batchfor det in batch_detections:if len(det) < 6:continueif det[4] < conf_threshold:  # 置信度阈值continue# 还原到原始图像尺寸x1, y1, x2, y2 = det[:4]x1 = int(x1 / scale)y1 = int(y1 / scale)x2 = int(x2 / scale)y2 = int(y2 / scale)# 确保坐标在图像范围内x1 = max(0, min(x1, w))y1 = max(0, min(y1, h))x2 = max(0, min(x2, w))y2 = max(0, min(y2, h))conf = float(det[4])class_id = int(det[5])detections.append([x1, y1, x2, y2, conf, class_id])# NMS非极大值抑制if detections:detections = self.non_max_suppression(np.array(detections), iou_threshold)return detectionsdef non_max_suppression(self, boxes, iou_threshold):"""非极大值抑制"""if len(boxes) == 0:return []x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]scores = boxes[:, 4]areas = (x2 - x1 + 1) * (y2 - y1 + 1)order = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x1[i], x1[order[1:]])yy1 = np.maximum(y1[i], y1[order[1:]])xx2 = np.minimum(x2[i], x2[order[1:]])yy2 = np.minimum(y2[i], y2[order[1:]])w = np.maximum(0.0, xx2 - xx1 + 1)h = np.maximum(0.0, yy2 - yy1 + 1)inter = w * hovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= iou_threshold)[0]order = order[inds + 1]return boxes[keep].tolist()def draw_detections(self, image, detections, class_names=None):h,w=image.shape[:2]"""在图像上绘制检测结果"""for det in detections:x1, y1, x2, y2, conf, class_id = detx1 = max(0, min(int(x1), w - 1))y1 = max(0, min(int(y1), h - 1))x2 = max(0, min(int(x2), w - 1))y2 = max(0, min(int(y2), h - 1))cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)return imagedef main():# 配置参数onnx_path = "model.onnx"  # 您的ONNX模型路径engine_path = "yolov12_fp16.engine"  # 引擎文件保存路径image_path = "111.jpg"  # 测试图像路径# 类别名称（根据您的模型调整）class_names = ["person"]  # 如果只有person类别# 初始化TensorRT推理器print("Initializing TensorRT engine...")detector = YOLOv12TRTNoPycuda(onnx_path, engine_path)# 加载测试图像image = cv2.imread(image_path)if image is None:print(f"Error: Cannot load image {image_path}")return# 执行推理print("Running inference...")start_time = time.time()outputs, scale, original_shape = detector.inference(image)inference_time = time.time() - start_time# 后处理detections = detector.postprocess(outputs, scale, original_shape)# 绘制结果result_image = detector.draw_detections(image.copy(), detections, class_names)# 显示结果print(f"Inference time: {inference_time * 1000:.2f} ms")print(f"Detections: {len(detections)}")for i, det in enumerate(detections):print(f"Detection {i}: {det}")if 0:cv2.imshow("Detection Results", result_image)cv2.waitKey(0)cv2.destroyAllWindows()# 保存结果图像cv2.imwrite("result.jpg", result_image)print("Result saved to result.jpg")# 实时摄像头推理版本
def camera_inference():"""摄像头实时推理"""detector = YOLOv12TRTNoPycuda("model.onnx", "model.engine")cap = cv2.VideoCapture(0)while True:ret, frame = cap.read()if not ret:breakstart_time = time.time()outputs, scale, original_shape = detector.inference(frame)detections = detector.postprocess(outputs, scale, original_shape)inference_time = time.time() - start_time# 绘制结果和FPSresult_frame = detector.draw_detections(frame, detections)fps = 1.0 / inference_timecv2.putText(result_frame, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)cv2.imshow("YOLOv12 TensorRT", result_frame)if cv2.waitKey(1) & 0xFF == ord('q'):breakcap.release()cv2.destroyAllWindows()if __name__ == "__main__":main()# 取消注释下面这行来运行摄像头推理# camera_inference()

trt_infer pycuda

pip install tensorrt pycuda opencv-python numpy

import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
import cv2
import time
from pathlib import Pathclass YOLOv12TRT:def __init__(self, onnx_path, engine_path=None, max_batch_size=1, fp16_mode=True):"""初始化YOLOv12 TensorRT推理器Args:onnx_path: ONNX模型路径engine_path: 引擎文件路径，如果为None则从ONNX生成max_batch_size: 最大batch sizefp16_mode: 是否使用FP16精度"""self.logger = trt.Logger(trt.Logger.WARNING)self.engine = Noneself.context = Noneself.inputs = []self.outputs = []self.bindings = []self.stream = cuda.Stream()# 构建或加载引擎if engine_path is None or not Path(engine_path).exists():self.engine = self.build_engine(onnx_path, max_batch_size, fp16_mode)if engine_path is not None:self.save_engine(engine_path)else:self.engine = self.load_engine(engine_path)self.context = self.engine.create_execution_context()self.setup_bindings()def build_engine(self, onnx_path, max_batch_size, fp16_mode):"""从ONNX构建TensorRT引擎"""builder = trt.Builder(self.logger)network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))parser = trt.OnnxParser(network, self.logger)# 解析ONNX模型with open(onnx_path, 'rb') as model:if not parser.parse(model.read()):print('ERROR: Failed to parse the ONNX file.')for error in range(parser.num_errors):print(parser.get_error(error))return None# 配置构建器config = builder.create_builder_config()config.max_workspace_size = 1 << 30  # 1GBconfig.set_flag(trt.BuilderFlag.GPU_FALLBACK)if fp16_mode and builder.platform_has_fast_fp16:config.set_flag(trt.BuilderFlag.FP16)profile = builder.create_optimization_profile()input_tensor = network.get_input(0)input_shape = input_tensor.shape# 设置动态形状（如果需要）if -1 in input_shape:profile.set_shape(input_tensor.name, (1, 3, 640, 640), (max_batch_size, 3, 640, 640), (max_batch_size, 3, 640, 640))config.add_optimization_profile(profile)print("Building TensorRT engine. This may take a few minutes...")engine = builder.build_engine(network, config)return enginedef load_engine(self, engine_path):"""从文件加载TensorRT引擎"""with open(engine_path, 'rb') as f:runtime = trt.Runtime(self.logger)return runtime.deserialize_cuda_engine(f.read())def save_engine(self, engine_path):"""保存引擎到文件"""if self.engine is not None:with open(engine_path, 'wb') as f:f.write(self.engine.serialize())print(f"Engine saved to {engine_path}")def setup_bindings(self):"""设置输入输出绑定"""for binding in self.engine:size = trt.volume(self.engine.get_binding_shape(binding)) * self.engine.max_batch_sizedtype = trt.nptype(self.engine.get_binding_dtype(binding))# 分配GPU内存device_mem = cuda.mem_alloc(size * dtype.itemsize)self.bindings.append(int(device_mem))if self.engine.binding_is_input(binding):self.inputs.append({'name': binding, 'memory': device_mem, 'shape': self.engine.get_binding_shape(binding), 'dtype': dtype})else:self.outputs.append({'name': binding, 'memory': device_mem, 'shape': self.engine.get_binding_shape(binding), 'dtype': dtype})def preprocess(self, image, input_shape=(640, 640)):"""图像预处理"""# 调整大小并保持宽高比h, w = image.shape[:2]scale = min(input_shape[0] / h, input_shape[1] / w)new_h, new_w = int(h * scale), int(w * scale)# 调整大小resized = cv2.resize(image, (new_w, new_h))# 创建填充后的图像padded = np.full((input_shape[0], input_shape[1], 3), 114, dtype=np.uint8)padded[:new_h, :new_w] = resized# 转换格式padded = padded.astype(np.float32) / 255.0  # 归一化padded = padded.transpose(2, 0, 1)  # HWC to CHWpadded = np.ascontiguousarray(padded)return padded, scale, (h, w)def inference(self, image):"""执行推理"""# 预处理input_img, scale, original_shape = self.preprocess(image)input_data = np.expand_dims(input_img, axis=0)  # 添加batch维度# 复制数据到GPUcuda.memcpy_htod_async(self.inputs[0]['memory'], input_data, self.stream)# 执行推理self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle)# 获取输出output_data = []for output in self.outputs:host_mem = np.empty(output['shape'], dtype=output['dtype'])cuda.memcpy_dtoh_async(host_mem, output['memory'], self.stream)output_data.append(host_mem)self.stream.synchronize()return output_data, scale, original_shapedef postprocess(self, outputs, scale, original_shape, conf_threshold=0.25, iou_threshold=0.45):"""后处理 - 解析检测结果"""# 假设输出是 [batch, num_detections, 6] 格式 (x1, y1, x2, y2, conf, class_id)# 具体格式可能需要根据您的模型调整detections = []h, w = original_shapefor output in outputs:if output.ndim == 3:  # [batch, num_det, 6]batch_detections = output[0]  # 取第一个batchfor det in batch_detections:if det[4] < conf_threshold:  # 置信度阈值continue# 还原到原始图像尺寸x1, y1, x2, y2 = det[:4]x1 = int(x1 / scale)y1 = int(y1 / scale)x2 = int(x2 / scale)y2 = int(y2 / scale)# 确保坐标在图像范围内x1 = max(0, min(x1, w))y1 = max(0, min(y1, h))x2 = max(0, min(x2, w))y2 = max(0, min(y2, h))conf = float(det[4])class_id = int(det[5])detections.append([x1, y1, x2, y2, conf, class_id])# NMS非极大值抑制if detections:detections = self.non_max_suppression(np.array(detections), iou_threshold)return detectionsdef non_max_suppression(self, boxes, iou_threshold):"""非极大值抑制"""if len(boxes) == 0:return []x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]scores = boxes[:, 4]areas = (x2 - x1 + 1) * (y2 - y1 + 1)order = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x1[i], x1[order[1:]])yy1 = np.maximum(y1[i], y1[order[1:]])xx2 = np.minimum(x2[i], x2[order[1:]])yy2 = np.minimum(y2[i], y2[order[1:]])w = np.maximum(0.0, xx2 - xx1 + 1)h = np.maximum(0.0, yy2 - yy1 + 1)inter = w * hovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= iou_threshold)[0]order = order[inds + 1]return boxes[keep].tolist()def draw_detections(self, image, detections, class_names=None):"""在图像上绘制检测结果"""for det in detections:x1, y1, x2, y2, conf, class_id = det# 绘制边界框cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)# 标签文本label = f"Class {class_id}: {conf:.2f}"if class_names and class_id < len(class_names):label = f"{class_names[class_id]}: {conf:.2f}"# 绘制标签背景label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]cv2.rectangle(image, (x1, y1 - label_size[1] - 10), (x1 + label_size[0], y1), (0, 255, 0), -1)# 绘制标签文本cv2.putText(image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)return imagedef main():# 配置参数onnx_path = "model.onnx"  # 您的ONNX模型路径engine_path = "model.engine"  # 引擎文件保存路径image_path = "test.jpg"  # 测试图像路径# 类别名称（根据您的模型调整）class_names = ["person"]  # 如果只有person类别# 初始化TensorRT推理器print("Initializing TensorRT engine...")detector = YOLOv12TRT(onnx_path, engine_path)# 加载测试图像image = cv2.imread(image_path)if image is None:print(f"Error: Cannot load image {image_path}")return# 执行推理print("Running inference...")start_time = time.time()outputs, scale, original_shape = detector.inference(image)inference_time = time.time() - start_time# 后处理detections = detector.postprocess(outputs, scale, original_shape)# 绘制结果result_image = detector.draw_detections(image.copy(), detections, class_names)# 显示结果print(f"Inference time: {inference_time * 1000:.2f} ms")print(f"Detections: {len(detections)}")cv2.imshow("Detection Results", result_image)cv2.waitKey(0)cv2.destroyAllWindows()# 保存结果图像cv2.imwrite("result.jpg", result_image)print("Result saved to result.jpg")if __name__ == "__main__":main()

查看全文

http://www.dtcms.com/a/394798.html