当前位置：首页 > news >正文

YOLO系列——基于Ultralytics YOLOv11模型在C++ OpenCV DNN模块进行模型加载与推理（附源码）

news 2025/10/14 9:07:11

基于Ultralytics YOLOv11模型在C++ OpenCV DNN模块进行模型加载与推理（附源码）

yolo导出模型
opencv dnn
结果

yolo导出模型

test_export.py

from ultralytics import YOLO# Load a model
model = YOLO("yolo11n.pt")  # load an official model# Export the model
model.export(format="onnx")

opencv dnn

inference.h

#ifndef INFERENCE_H
#define INFERENCE_H#include <fstream>
#include <vector>
#include <string>
#include <random>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/dnn.hpp>// 检测结果结构体
struct Detection
{int class_id{0};           // 类别IDstd::string className{};   // 类别名称float confidence{0.0};     // 置信度cv::Scalar color{};        // 显示颜色cv::Rect box{};            // 边界框坐标
};class Inference
{
public:// 构造函数：初始化模型路径、输入尺寸、是否使用CUDA、类别文件Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape = {640, 640}, const bool &runWithCuda = true, const std::string &classesTxtFile = "");// 核心推理函数：输入图像，返回检测结果向量std::vector<Detection> runInference(const cv::Mat &input);private:// 从文件加载类别名称void loadClassesFromFile();// 加载ONNX模型并配置计算后端（CUDA/CPU）void loadOnnxNetwork();// 图像预处理：保持宽高比的方形填充cv::Mat formatToSquare(const cv::Mat &source, int *pad_x, int *pad_y, float *scale);std::string modelPath{};        // ONNX模型文件路径std::string classesPath{};      // 类别文件路径bool cudaEnabled{};             // 是否启用CUDA加速// COCO数据集80个类别的默认名称std::vector<std::string> classes{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"};cv::Size2f modelShape{};                // 模型输入尺寸float modelConfidenceThreshold {0.25};  // 物体存在置信度阈值float modelScoreThreshold      {0.45};  // 类别得分阈值float modelNMSThreshold        {0.50};  // 非极大值抑制阈值bool letterBoxForSquare = true;         // 是否使用letter box预处理cv::dnn::Net net;                       // OpenCV DNN网络对象
};#endif // INFERENCE_H

inference.cpp

#include "inference.h"// 构造
Inference::Inference(const std::string &onnxModelPath, const cv::Size &modelInputShape, const bool &runWithCuda, const std::string &classesTxtFile)
{modelPath = onnxModelPath;modelShape = modelInputShape;cudaEnabled = runWithCuda;classesPath = classesTxtFile;loadOnnxNetwork();// loadClassesFromFile();
}// 推理
std::vector<Detection> Inference::runInference(const cv::Mat &input)
{cv::Mat modelInput = input;int pad_x, pad_y;  // 填充尺寸float scale;       // 缩放比例// 如果启用letter box且输入为正方形，进行预处理if (letterBoxForSquare && modelShape.width == modelShape.height)modelInput = formatToSquare(modelInput, &pad_x, &pad_y, &scale);// 将图像转换为模型输入blob格式cv::Mat blob;cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);net.setInput(blob);// 前向传播，获取模型输出std::vector<cv::Mat> outputs;net.forward(outputs, net.getUnconnectedOutLayersNames());// 解析输出维度int rows = outputs[0].size[1];      // 检测框数量int dimensions = outputs[0].size[2]; // 每个检测框的维度数bool yolov8 = false;// YOLOv5: (batchSize, 25200, 85)  85 = 4坐标 + 1置信度 + 80类别// YOLOv8/v11: (batchSize, 84, 8400)  84 = 4坐标 + 80类别if (dimensions > rows) // 判断是否为YOLOv8格式{// 交换维度yolov8 = true;rows = outputs[0].size[2];dimensions = outputs[0].size[1];outputs[0] = outputs[0].reshape(1, dimensions);cv::transpose(outputs[0], outputs[0]);}float *data = (float *)outputs[0].data;// 存储原始检测结果的容器std::vector<int> class_ids;std::vector<float> confidences;std::vector<cv::Rect> boxes;// 遍历所有检测框for (int i = 0; i < rows; ++i){if (yolov8){float *classes_scores = data+4;cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);cv::Point class_id;double maxClassScore;minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);if (maxClassScore > modelScoreThreshold){confidences.push_back(maxClassScore);class_ids.push_back(class_id.x);float x = data[0];float y = data[1];float w = data[2];float h = data[3];int left = int((x - 0.5 * w - pad_x) / scale);int top = int((y - 0.5 * h - pad_y) / scale);int width = int(w / scale);int height = int(h / scale);boxes.push_back(cv::Rect(left, top, width, height));}}else // yolov5{float confidence = data[4];if (confidence >= modelConfidenceThreshold){float *classes_scores = data+5;cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);cv::Point class_id;double max_class_score;minMaxLoc(scores, 0, &max_class_score, 0, &class_id);if (max_class_score > modelScoreThreshold){confidences.push_back(confidence);class_ids.push_back(class_id.x);float x = data[0];float y = data[1];float w = data[2];float h = data[3];int left = int((x - 0.5 * w - pad_x) / scale);int top = int((y - 0.5 * h - pad_y) / scale);int width = int(w / scale);int height = int(h / scale);boxes.push_back(cv::Rect(left, top, width, height));}}}data += dimensions;}// 应用非极大值抑制(NMS)去除重叠框std::vector<int> nms_result;cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);// 构建最终检测结果std::vector<Detection> detections{};for (unsigned long i = 0; i < nms_result.size(); ++i){int idx = nms_result[i];// NMS筛选后的索引Detection result;result.class_id = class_ids[idx];result.confidence = confidences[idx];std::random_device rd;std::mt19937 gen(rd());std::uniform_int_distribution<int> dis(100, 255);result.color = cv::Scalar(dis(gen), dis(gen), dis(gen));result.className = classes[result.class_id];result.box = boxes[idx];detections.push_back(result);}return detections;
}void Inference::loadClassesFromFile()
{std::ifstream inputFile(classesPath);if (inputFile.is_open()){std::string classLine;while (std::getline(inputFile, classLine))classes.push_back(classLine);inputFile.close();}
}void Inference::loadOnnxNetwork()
{net = cv::dnn::readNetFromONNX(modelPath);if (cudaEnabled){std::cout << "\nRunning on CUDA" << std::endl;net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);}else{std::cout << "\nRunning on CPU" << std::endl;net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);}
}// 图像预处理：保持宽高比的方形填充（letter box）
cv::Mat Inference::formatToSquare(const cv::Mat &source, int *pad_x, int *pad_y, float *scale)
{int col = source.cols;int row = source.rows;int m_inputWidth = modelShape.width;int m_inputHeight = modelShape.height;// 计算缩放比例，保持宽高比*scale = std::min(m_inputWidth / (float)col, m_inputHeight / (float)row);int resized_w = col * *scale;int resized_h = row * *scale;// 计算填充尺寸，使图像居中*pad_x = (m_inputWidth - resized_w) / 2;*pad_y = (m_inputHeight - resized_h) / 2;// 缩放图像并填充到目标尺寸cv::Mat resized;cv::resize(source, resized, cv::Size(resized_w, resized_h));// 创建目标尺寸的黑色背景cv::Mat result = cv::Mat::zeros(m_inputHeight, m_inputWidth, source.type());// 将缩放后的图像复制到中央resized.copyTo(result(cv::Rect(*pad_x, *pad_y, resized_w, resized_h)));resized.release();return result;
}

main.cpp

#include <iostream>
#include <vector>
#include <getopt.h>
#include <opencv2/opencv.hpp>
#include "inference.h"int main(int argc, char **argv)
{std::string onnxModelPath = "../data/yolo11n.onnx";bool runOnGPU = true;cv::Size sizeTmp(640,640);Inference inf(onnxModelPath, sizeTmp, runOnGPU);std::vector<std::string> imageNames;imageNames.push_back("../data/bus.jpg");imageNames.push_back("../data/zidane.jpg");imageNames.push_back("../data/traffic.jpg");for (int i = 0; i < imageNames.size(); ++i) //遍历每一幅图像{cv::Mat frame = cv::imread(imageNames[i]);auto start = std::chrono::system_clock::now();std::vector<Detection> output = inf.runInference(frame);auto end = std::chrono::system_clock::now();std::chrono::duration<double> elapsed = end - start;std::cout << "------Images[" << i << "]------" << std::endl;std::cout << "\tInference time: " << elapsed.count() * 1000 << " ms" << std::endl;int detections = output.size();std::cout << "\tdetect " << detections << " of detections" << std::endl;for (int j = 0; j < detections; ++j){Detection detection = output[j];cv::Rect box = detection.box;cv::Scalar color = detection.color;// 打印结果std::cout<<"\t\tdetections["<<j<<"]"<<std::endl;std::cout<< "\t\t\tclass_id:"<<detection.class_id<<std::endl;std::cout<< "\t\t\tclassName:"<<detection.className<<std::endl;std::cout<< "\t\t\tconfidence:"<<detection.confidence<<std::endl;std::cout<< "\t\t\tbox:"<<detection.box<<std::endl;// 绘制cv::rectangle(frame, box, color, 2);std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);cv::rectangle(frame, textBox, color, cv::FILLED);cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);}//保存cv::imwrite(std::to_string(i)+".jpg",frame);//显示float scale = 1.0;cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));cv::imshow("Inference", frame);cv::waitKey(-1);}
}

结果

------Images[0]------Inference time: 154.151 msdetect 4 of detectionsdetections[0]class_id:5className:busconfidence:0.940209box:[785 x 506 from (12, 228)]detections[1]class_id:0className:personconfidence:0.901381box:[194 x 506 from (48, 398)]detections[2]class_id:0className:personconfidence:0.844137box:[139 x 492 from (670, 387)]detections[3]class_id:0className:personconfidence:0.831744box:[122 x 454 from (223, 405)]
------Images[1]------Inference time: 125.669 msdetect 3 of detectionsdetections[0]class_id:0className:personconfidence:0.852391box:[399 x 669 from (749, 41)]detections[1]class_id:0className:personconfidence:0.793089box:[993 x 513 from (143, 200)]detections[2]class_id:27className:tieconfidence:0.481717box:[165 x 281 from (359, 437)]
------Images[2]------Inference time: 131.012 msdetect 11 of detectionsdetections[0]class_id:2className:carconfidence:0.771447box:[48 x 48 from (454, 433)]detections[1]class_id:2className:carconfidence:0.749929box:[37 x 34 from (224, 398)]detections[2]class_id:2className:carconfidence:0.745018box:[82 x 72 from (514, 523)]detections[3]class_id:2className:carconfidence:0.674062box:[35 x 32 from (281, 391)]detections[4]class_id:2className:carconfidence:0.640642box:[28 x 25 from (248, 372)]detections[5]class_id:7className:truckconfidence:0.547953box:[136 x 128 from (448, 670)]detections[6]class_id:7className:truckconfidence:0.53021box:[154 x 161 from (43, 752)]detections[7]class_id:2className:carconfidence:0.524246box:[93 x 110 from (53, 554)]detections[8]class_id:2className:carconfidence:0.522188box:[36 x 32 from (370, 400)]detections[9]class_id:2className:carconfidence:0.503465box:[28 x 19 from (411, 368)]detections[10]class_id:2className:carconfidence:0.465773box:[38 x 36 from (430, 399)]