当前位置: 首页 > news >正文

YOLO系列——基于Ultralytics YOLOv11模型在C++ OpenCV DNN模块进行模型加载与推理(附源码)

基于Ultralytics YOLOv11模型在C++ OpenCV DNN模块进行模型加载与推理(附源码)

  • yolo导出模型
  • opencv dnn
  • 结果

yolo导出模型

test_export.py

from ultralytics import YOLO# Load a model
model = YOLO("yolo11n.pt")  # load an official model# Export the model
model.export(format="onnx")

opencv dnn

inference.h

#ifndef INFERENCE_H
#define INFERENCE_H#include <fstream>
#include <vector>
#include <string>
#include <random>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>// 检测结果结构体
struct Detection
{int class_id{0};           // 类别IDstd::string className{};   // 类别名称float confidence{0.0};     // 置信度cv::Scalar color{};        // 显示颜色cv::Rect box{};            // 边界框坐标
};class Inference
{
public:// 构造函数:初始化模型路径、输入尺寸、是否使用CUDA、类别文件Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const bool &runWithCuda = true, const std::string &classesTxtFile = "");// 核心推理函数:输入图像,返回检测结果向量std::vector<Detection> runInference(const cv::Mat &input);private:// 从文件加载类别名称void loadClassesFromFile();// 加载ONNX模型并配置计算后端(CUDA/CPU)void loadOnnxNetwork();// 图像预处理:保持宽高比的方形填充cv::Mat formatToSquare(const cv::Mat &source, int *pad_x, int *pad_y, float *scale);std::string modelPath{};        // ONNX模型文件路径std::string classesPath{};      // 类别文件路径bool cudaEnabled{};             // 是否启用CUDA加速// COCO数据集80个类别的默认名称std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};cv::Size2f modelShape{};                // 模型输入尺寸float modelConfidenceThreshold {0.25};  // 物体存在置信度阈值float modelScoreThreshold      {0.45};  // 类别得分阈值float modelNMSThreshold        {0.50};  // 非极大值抑制阈值bool letterBoxForSquare = true;         // 是否使用letter box预处理cv::dnn::Net net;                       // OpenCV DNN网络对象
};#endif // INFERENCE_H

inference.cpp

#include "inference.h"// 构造
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const bool &runWithCuda, const std::string &classesTxtFile)
{modelPath = onnxModelPath;modelShape = modelInputShape;cudaEnabled = runWithCuda;classesPath = classesTxtFile;loadOnnxNetwork();// loadClassesFromFile();
}// 推理
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{cv::Mat modelInput = input;int pad_x, pad_y;  // 填充尺寸float scale;       // 缩放比例// 如果启用letter box且输入为正方形,进行预处理if (letterBoxForSquare && modelShape.width == modelShape.height)modelInput = formatToSquare(modelInput, &pad_x, &pad_y, &scale);// 将图像转换为模型输入blob格式cv::Mat blob;cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);net.setInput(blob);// 前向传播,获取模型输出std::vector<cv::Mat> outputs;net.forward(outputs, net.getUnconnectedOutLayersNames());// 解析输出维度int rows = outputs[0].size[1];      // 检测框数量int dimensions = outputs[0].size[2]; // 每个检测框的维度数bool yolov8 = false;// YOLOv5: (batchSize, 25200, 85)  85 = 4坐标 + 1置信度 + 80类别// YOLOv8/v11: (batchSize, 84, 8400)  84 = 4坐标 + 80类别if (dimensions > rows) // 判断是否为YOLOv8格式{// 交换维度yolov8 = true;rows = outputs[0].size[2];dimensions = outputs[0].size[1];outputs[0] = outputs[0].reshape(1, dimensions);cv::transpose(outputs[0], outputs[0]);}float *data = (float *)outputs[0].data;// 存储原始检测结果的容器std::vector<int> class_ids;std::vector<float> confidences;std::vector<cv::Rect> boxes;// 遍历所有检测框for (int i = 0; i < rows; ++i){if (yolov8){float *classes_scores = data+4;cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);cv::Point class_id;double maxClassScore;minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);if (maxClassScore > modelScoreThreshold){confidences.push_back(maxClassScore);class_ids.push_back(class_id.x);float x = data[0];float y = data[1];float w = data[2];float h = data[3];int left = int((x - 0.5 * w - pad_x) / scale);int top = int((y - 0.5 * h - pad_y) / scale);int width = int(w / scale);int height = int(h / scale);boxes.push_back(cv::Rect(left, top, width, height));}}else // yolov5{float confidence = data[4];if (confidence >= modelConfidenceThreshold){float *classes_scores = data+5;cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);cv::Point class_id;double max_class_score;minMaxLoc(scores, 0, &max_class_score, 0, &class_id);if (max_class_score > modelScoreThreshold){confidences.push_back(confidence);class_ids.push_back(class_id.x);float x = data[0];float y = data[1];float w = data[2];float h = data[3];int left = int((x - 0.5 * w - pad_x) / scale);int top = int((y - 0.5 * h - pad_y) / scale);int width = int(w / scale);int height = int(h / scale);boxes.push_back(cv::Rect(left, top, width, height));}}}data += dimensions;}// 应用非极大值抑制(NMS)去除重叠框std::vector<int> nms_result;cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);// 构建最终检测结果std::vector<Detection> detections{};for (unsigned long i = 0; i < nms_result.size(); ++i){int idx = nms_result[i];// NMS筛选后的索引Detection result;result.class_id = class_ids[idx];result.confidence = confidences[idx];std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<int> dis(100, 255);result.color = cv::Scalar(dis(gen), dis(gen), dis(gen));result.className = classes[result.class_id];result.box = boxes[idx];detections.push_back(result);}return detections;
}void Inference::loadClassesFromFile()
{std::ifstream inputFile(classesPath);if (inputFile.is_open()){std::string classLine;while (std::getline(inputFile, classLine))classes.push_back(classLine);inputFile.close();}
}void Inference::loadOnnxNetwork()
{net = cv::dnn::readNetFromONNX(modelPath);if (cudaEnabled){std::cout << "\nRunning on CUDA" << std::endl;net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);}else{std::cout << "\nRunning on CPU" << std::endl;net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);}
}// 图像预处理:保持宽高比的方形填充(letter box)
cv::Mat Inference::formatToSquare(const cv::Mat &source, int *pad_x, int *pad_y, float *scale)
{int col = source.cols;int row = source.rows;int m_inputWidth = modelShape.width;int m_inputHeight = modelShape.height;// 计算缩放比例,保持宽高比*scale = std::min(m_inputWidth / (float)col, m_inputHeight / (float)row);int resized_w = col * *scale;int resized_h = row * *scale;// 计算填充尺寸,使图像居中*pad_x = (m_inputWidth - resized_w) / 2;*pad_y = (m_inputHeight - resized_h) / 2;// 缩放图像并填充到目标尺寸cv::Mat resized;cv::resize(source, resized, cv::Size(resized_w, resized_h));// 创建目标尺寸的黑色背景cv::Mat result = cv::Mat::zeros(m_inputHeight, m_inputWidth, source.type());// 将缩放后的图像复制到中央resized.copyTo(result(cv::Rect(*pad_x, *pad_y, resized_w, resized_h)));resized.release();return result;
}

main.cpp

#include <iostream>
#include <vector>
#include <getopt.h>
#include <opencv2/opencv.hpp>
#include "inference.h"int main(int argc, char **argv)
{std::string onnxModelPath = "../data/yolo11n.onnx";bool runOnGPU = true;cv::Size sizeTmp(640,640);Inference inf(onnxModelPath, sizeTmp, runOnGPU);std::vector<std::string> imageNames;imageNames.push_back("../data/bus.jpg");imageNames.push_back("../data/zidane.jpg");imageNames.push_back("../data/traffic.jpg");for (int i = 0; i < imageNames.size(); ++i) //遍历每一幅图像{cv::Mat frame = cv::imread(imageNames[i]);auto start = std::chrono::system_clock::now();std::vector<Detection> output = inf.runInference(frame);auto end = std::chrono::system_clock::now();std::chrono::duration<double> elapsed = end - start;std::cout << "------Images[" << i << "]------" << std::endl;std::cout << "\tInference time: " << elapsed.count() * 1000 << " ms" << std::endl;int detections = output.size();std::cout << "\tdetect " << detections << " of detections" << std::endl;for (int j = 0; j < detections; ++j){Detection detection = output[j];cv::Rect box = detection.box;cv::Scalar color = detection.color;// 打印结果std::cout<<"\t\tdetections["<<j<<"]"<<std::endl;std::cout<< "\t\t\tclass_id:"<<detection.class_id<<std::endl;std::cout<< "\t\t\tclassName:"<<detection.className<<std::endl;std::cout<< "\t\t\tconfidence:"<<detection.confidence<<std::endl;std::cout<< "\t\t\tbox:"<<detection.box<<std::endl;// 绘制cv::rectangle(frame, box, color, 2);std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);cv::rectangle(frame, textBox, color, cv::FILLED);cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);}//保存cv::imwrite(std::to_string(i)+".jpg",frame);//显示float scale = 1.0;cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));cv::imshow("Inference", frame);cv::waitKey(-1);}
}

结果

------Images[0]------Inference time: 154.151 msdetect 4 of detectionsdetections[0]class_id:5className:busconfidence:0.940209box:[785 x 506 from (12, 228)]detections[1]class_id:0className:personconfidence:0.901381box:[194 x 506 from (48, 398)]detections[2]class_id:0className:personconfidence:0.844137box:[139 x 492 from (670, 387)]detections[3]class_id:0className:personconfidence:0.831744box:[122 x 454 from (223, 405)]
------Images[1]------Inference time: 125.669 msdetect 3 of detectionsdetections[0]class_id:0className:personconfidence:0.852391box:[399 x 669 from (749, 41)]detections[1]class_id:0className:personconfidence:0.793089box:[993 x 513 from (143, 200)]detections[2]class_id:27className:tieconfidence:0.481717box:[165 x 281 from (359, 437)]
------Images[2]------Inference time: 131.012 msdetect 11 of detectionsdetections[0]class_id:2className:carconfidence:0.771447box:[48 x 48 from (454, 433)]detections[1]class_id:2className:carconfidence:0.749929box:[37 x 34 from (224, 398)]detections[2]class_id:2className:carconfidence:0.745018box:[82 x 72 from (514, 523)]detections[3]class_id:2className:carconfidence:0.674062box:[35 x 32 from (281, 391)]detections[4]class_id:2className:carconfidence:0.640642box:[28 x 25 from (248, 372)]detections[5]class_id:7className:truckconfidence:0.547953box:[136 x 128 from (448, 670)]detections[6]class_id:7className:truckconfidence:0.53021box:[154 x 161 from (43, 752)]detections[7]class_id:2className:carconfidence:0.524246box:[93 x 110 from (53, 554)]detections[8]class_id:2className:carconfidence:0.522188box:[36 x 32 from (370, 400)]detections[9]class_id:2className:carconfidence:0.503465box:[28 x 19 from (411, 368)]detections[10]class_id:2className:carconfidence:0.465773box:[38 x 36 from (430, 399)]

在这里插入图片描述
在这里插入图片描述

http://www.dtcms.com/a/478470.html

相关文章:

  • 有哪些做统计销量的网站设计了网站
  • 做微信公众号的网站有哪些外贸网站建设团队
  • 广东省省考备考(第一百二十二天10.13)——资料分析、言语(强化训练)
  • MySQL中like模糊查询如何优化
  • 400G QSFP112 FR4光模块:高速数据中心互联的核心力量
  • 旅行商问题(TSP)(1)(Route.py)(TSP 问题中的点与路径核心类)
  • 学习笔记--文件上传
  • Leetcode 26
  • 淘宝领券网站怎么做上海工程咨询行业协会
  • 泰国网站域名wordpress建网站的优点
  • 解锁 JavaScript 字符串补全魔法:padStart()与 padEnd()
  • Spring Boot 3零基础教程,IOC容器中组件的注册,笔记08
  • TDengine 数学函数 DEGRESS 用户手册
  • 源码:Oracle AWR报告之Top 10 Foreground Events by Total Wait Time
  • 告别繁琐坐标,让公式“说人话”:Excel结构化引用完全指南
  • 【AI论文】CoDA:面向协作数据可视化的智能体系统
  • 从AAAI2025中挑选出对目标检测有帮助的文献——第六期
  • 【深度学习】反向传播
  • 网站开发交接新闻源发稿平台
  • 滴答时钟延时
  • 【C++篇】:ServiceBus RPC 分布式服务总线框架项目
  • 后训练——Post-training技术介绍
  • 获取KeyStore的sha256
  • Linux (5)| 入门进阶:Linux 权限管理的基础规则与实践
  • 常见压缩包格式详解:区别及在不同系统中的解压方式
  • 【数学 进制 数位DP】P9362 [ICPC 2022 Xi‘an R] Find Maximum|普及+
  • .net过滤器和缓存
  • 张家港网站建设培训班电力建设专家答疑在哪个网站
  • 零基础学AI大模型之大模型的“幻觉”
  • 网站快速优化排名排名c语言入门自学零基础