当前位置: 首页 > news >正文

opencv+ONNX模型的推理

如前文我们已经编译出带dnn模块的opencv,如果使用简易版的opencv虽然也可以加载onxx模型但是无法利用GPU进行并行计算,导致推理速度比较慢。所以既然都有这个RTX4060Ti的环境了,为什么不使用并行计算呢。

opencv加载的模型通常是ONXX模型,使用yolov8训练自己的数据集得到的模型为.pt,所以通常需要将.pt转为ONXX模型。

(这里为什么使用yolov8),应为我发现使用yolov5训练的模型opencv无法加载,或者报加载错误问题。而使用yolov8训练完成之后,直接导出ONXX模型可以直接使用,原因暂时没找到可能为版本问题。(待更新)

当然也可以使用ONXX的API加载ONXX模型,ONXX的api个人感觉比较通用,但是这里我没使用

这里我们的版本
PyTorch版本: 2.6.0+cu126
torchvision版本: 0.21.0+cu126
Python版本: 3.10.0 | packaged by conda-forge | (default, Nov 20 2021, 02:18:13) [MSC v.1916 64 bit (AMD64)]

pytorch去官网下载
https://pytorch.org/
我们的环境是cuda12.6
选择对应的版本
在这里插入图片描述
Torchvision 是 PyTorch 深度学习框架的一个重要组成部分,专门用于处理计算机视觉任务。它提供了一系列工具和预训练模型,以帮助开发者在图像处理和视觉识别领域中更有效地工作。
https://mirror.nju.edu.cn/pytorch/whl/cu126/torchvision/
在这里插入图片描述
torchvision-0.21.0+cu126-cp310-cp310-win_amd64.whl这个版本

导出模型python代码
下载yolov8代码,安装所需环境

from ultralytics import YOLO
import ultralytics
print(ultralytics.__version__)
# 加载训练完成的基于YOLOv8s预训练的自己的模型
model = YOLO('best.pt')

# 导出为 ONNX 格式,指定 opset_version 和简化选项
model.export(format='onnx', dynamic=False, opset=12)  # 尝试使用特定的opset版本

C++推理
inference.h

#ifndef INFERENCE_H
#define INFERENCE_H

// Cpp native
#include <fstream>
#include <vector>
#include <string>
#include <random>

// OpenCV / DNN / Inference
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
struct Detection
{
	int class_id{ 0 };
	std::string className{};
	float confidence{ 0.0 };
	cv::Scalar color{};
	cv::Rect box{};
};

class Inference
{
public:
	Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape = { 640, 640 }, const std::string& classesTxtFile = "", const bool& runWithCuda = true);
	std::vector<Detection> runInference(const cv::Mat& input);

private:
	void loadClassesFromFile();
	void loadOnnxNetwork();
	cv::Mat formatToSquare(const cv::Mat& source);

	std::string modelPath{};
	std::string classesPath{};
	bool cudaEnabled{};

	//std::vector<std::string> classes{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush" };

	std::vector<std::string> classes{ "fire" };
	cv::Size2f modelShape{};

	float modelConfidenceThreshold{ 0.25 };
	float modelScoreThreshold{ 0.45 };
	float modelNMSThreshold{ 0.50 };

	bool letterBoxForSquare = true;

	cv::dnn::Net net;
};

#endif // INFERENCE_H


inference.cpp

#include "inference.h"

Inference::Inference(const std::string& onnxModelPath, const cv::Size& modelInputShape, const std::string& classesTxtFile, const bool& runWithCuda)
{
	modelPath = onnxModelPath;
	modelShape = modelInputShape;
	classesPath = classesTxtFile;
	cudaEnabled = runWithCuda;

	loadOnnxNetwork();
	// loadClassesFromFile(); The classes are hard-coded for this example
}

std::vector<Detection> Inference::runInference(const cv::Mat& input)
{
	cv::Mat modelInput = input;
	if (letterBoxForSquare && modelShape.width == modelShape.height)
		modelInput = formatToSquare(modelInput);

	cv::Mat blob;
	cv::dnn::blobFromImage(modelInput, blob, 1.0 / 255.0, modelShape, cv::Scalar(), true, false);
	net.setInput(blob);

	std::vector<cv::Mat> outputs;
	net.forward(outputs, net.getUnconnectedOutLayersNames());

	int rows = outputs[0].size[1];
	int dimensions = outputs[0].size[2];

	bool yolov8 = false;
	// yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
	// yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
	if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
	{
		yolov8 = true;
		rows = outputs[0].size[2];
		dimensions = outputs[0].size[1];

		outputs[0] = outputs[0].reshape(1, dimensions);
		cv::transpose(outputs[0], outputs[0]);
	}
	float* data = (float*)outputs[0].data;

	float x_factor = modelInput.cols / modelShape.width;
	float y_factor = modelInput.rows / modelShape.height;

	std::vector<int> class_ids;
	std::vector<float> confidences;
	std::vector<cv::Rect> boxes;

	for (int i = 0; i < rows; ++i)
	{
		if (yolov8)
		{
			float* classes_scores = data + 4;

			cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
			cv::Point class_id;
			double maxClassScore;

			minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);

			if (maxClassScore > modelScoreThreshold)
			{
				confidences.push_back(maxClassScore);
				class_ids.push_back(class_id.x);

				float x = data[0];
				float y = data[1];
				float w = data[2];
				float h = data[3];

				int left = int((x - 0.5 * w) * x_factor);
				int top = int((y - 0.5 * h) * y_factor);

				int width = int(w * x_factor);
				int height = int(h * y_factor);

				boxes.push_back(cv::Rect(left, top, width, height));
			}
		}
		else // yolov5
		{
			float confidence = data[4];

			if (confidence >= modelConfidenceThreshold)
			{
				float* classes_scores = data + 5;

				cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
				cv::Point class_id;
				double max_class_score;

				minMaxLoc(scores, 0, &max_class_score, 0, &class_id);

				if (max_class_score > modelScoreThreshold)
				{
					confidences.push_back(confidence);
					class_ids.push_back(class_id.x);

					float x = data[0];
					float y = data[1];
					float w = data[2];
					float h = data[3];

					int left = int((x - 0.5 * w) * x_factor);
					int top = int((y - 0.5 * h) * y_factor);

					int width = int(w * x_factor);
					int height = int(h * y_factor);

					boxes.push_back(cv::Rect(left, top, width, height));
				}
			}
		}

		data += dimensions;
	}

	std::vector<int> nms_result;
	cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);

	std::vector<Detection> detections{};
	for (unsigned long i = 0; i < nms_result.size(); ++i)
	{
		int idx = nms_result[i];

		Detection result;
		result.class_id = class_ids[idx];
		result.confidence = confidences[idx];

		std::random_device rd;
		std::mt19937 gen(rd());
		std::uniform_int_distribution<int> dis(100, 255);
		result.color = cv::Scalar(dis(gen),
			dis(gen),
			dis(gen));

		result.className = classes[result.class_id];
		result.box = boxes[idx];

		detections.push_back(result);
	}

	return detections;
}

void Inference::loadClassesFromFile()
{
	std::ifstream inputFile(classesPath);
	if (inputFile.is_open())
	{
		std::string classLine;
		while (std::getline(inputFile, classLine))
			classes.push_back(classLine);
		inputFile.close();
	}
}

void Inference::loadOnnxNetwork()
{
	//net = cv::dnn::readNetFromONNX(modelPath);
	//if (cudaEnabled)
	//{
	//	std::cout << "\nRunning on CUDA" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	//}
	//else
	//{
	//	std::cout << "\nRunning on CPU" << std::endl;
	//	net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
	//	net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	//}

	net = cv::dnn::readNetFromONNX(modelPath);
	if (cudaEnabled)
	{
		std::cout << "\nRunning on CUDA" << std::endl;
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
	}
	else
	{
		std::cout << "\nRunning on CPU" << std::endl;
		// 使用默认的CPU后端,避免指定为DNN_BACKEND_OPENCV可能导致的TBB依赖
		net.setPreferableBackend(cv::dnn::DNN_BACKEND_DEFAULT);
		net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
	}


}

cv::Mat Inference::formatToSquare(const cv::Mat& source)
{
	int col = source.cols;
	int row = source.rows;
	int _max = MAX(col, row);
	cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
	source.copyTo(result(cv::Rect(0, 0, col, row)));
	return result;
}

main.cpp


#include <iostream>
#include <vector>
#include <opencv2/opencv.hpp>
#include "inference.h"
//#include <onnxruntime_cxx_api.h>
#include <iostream>
#include <windows.h>
using namespace std;
using namespace cv;

int main(int argc, char** argv)
{

	// 动态加载 cuBLAS DLL
	HMODULE hCuBLAS = LoadLibrary(TEXT("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.6\\bin\\cublasLt64_12.dll"));
	if (hCuBLAS == NULL) {
		cout << "无法加载 cuBLAS DLL" << endl;
		return -1;
	}


	bool runOnGPU = true;

	// 1. 设置你的onnx模型
	// Note that in this example the classes are hard-coded and 'classes.txt' is a place holder.
	Inference  inf("D:/vsworksapce/yolo/x64/Debug/models/best.onnx", cv::Size(640, 640), "classes.txt", runOnGPU); // classes.txt 可以缺失

	// 2. 设置你的输入图片
	std::vector<std::string> imageNames;
	imageNames.push_back("D:/vsworksapce/yolo/x64/Debug/images/fire520.jpg");
	//imageNames.push_back("zidane.jpg");

	for (int i = 0; i < imageNames.size(); ++i)
	{
		cv::Mat frame = cv::imread(imageNames[i]);

		// Inference starts here...
		std::vector<Detection> output = inf.runInference(frame);

		int detections = output.size();
		std::cout << "Number of detections:" << detections << std::endl;

		// feiyull
		// 这里需要resize下,否则结果不对
		//cv::resize(frame, frame, cv::Size(640, 640));

		for (int i = 0; i < detections; ++i)
		{
			Detection detection = output[i];

			cv::Rect box = detection.box;
			cv::Scalar color = detection.color;

			// Detection box
			cv::rectangle(frame, box, color, 2);

			// Detection box text
			std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
			cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
			cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

			cv::rectangle(frame, textBox, color, cv::FILLED);
			cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
		}
		cv::imshow("Inference", frame);
		cv::waitKey(0);
		cv::destroyAllWindows();
	}
}

//std::vector<float> preprocessImage(const cv::Mat& img, const std::vector<int64_t>& inputDims) {
//	int width = inputDims[3];
//	int height = inputDims[2];
//
//	// 调整大小
//	cv::Mat resizedImg;
//	cv::resize(img, resizedImg, cv::Size(width, height));
//
//	// 归一化到 [0, 1]
//	resizedImg.convertTo(resizedImg, CV_32F, 1.0 / 255.0);
//
//	// HWC -> CHW 格式转换
//	std::vector<cv::Mat> channels(3);
//	cv::split(resizedImg, channels);
//
//	std::vector<float> inputData(inputDims[1] * inputDims[2] * inputDims[3]);
//	auto dataPtr = inputData.data();
//
//	for (int i = 0; i < 3; ++i) {
//		std::memcpy(dataPtr, channels[i].data, sizeof(float) * channels[i].total());
//		dataPtr += channels[i].total();
//	}
//
//	return inputData;
//}
 解析YOLOv8输出并绘制边界框
//void drawPredictions(cv::Mat& image, const std::vector<float>& output, float conf_threshold, float nms_threshold, int num_classes = 80) {
//	// 假设每个预测包含 [x_center, y_center, width, height, confidence, class_scores...]
//	int num_predictions = output.size() / (5 + num_classes);
//	std::vector<cv::Rect> boxes;
//	std::vector<float> confidences;
//	std::vector<int> classIds;
//
//	for (int i = 0; i < num_predictions; ++i) {
//		float confidence = output[i * (5 + num_classes) + 4];
//		if (confidence < conf_threshold)
//			continue;
//
//		// 获取最大类别分数及其索引
//		int classId = 0;
//		float max_class_score = 0.0f;
//		for (int j = 0; j < num_classes; ++j) {
//			float score = output[i * (5 + num_classes) + 5 + j];
//			if (score > max_class_score) {
//				max_class_score = score;
//				classId = j;
//			}
//		}
//
//		// 确保最终得分(置信度*类别分数)超过阈值
//		float final_confidence = confidence * max_class_score;
//		if (final_confidence < conf_threshold)
//			continue;
//
//		confidences.push_back(final_confidence);
//		classIds.push_back(classId);
//
//		float x_center = output[i * (5 + num_classes)] * image.cols;
//		float y_center = output[i * (5 + num_classes) + 1] * image.rows;
//		float width = output[i * (5 + num_classes) + 2] * image.cols;
//		float height = output[i * (5 + num_classes) + 3] * image.rows;
//
//		int left = static_cast<int>(x_center - width / 2);
//		int top = static_cast<int>(y_center - height / 2);
//
//		boxes.emplace_back(left, top, static_cast<int>(width), static_cast<int>(height));
//	}
//
//	std::vector<int> indices;
//	cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);
//
//	for (int idx : indices) {
//		cv::Rect box = boxes[idx];
//		cv::rectangle(image, box.tl(), box.br(), cv::Scalar(0, 255, 0), 2);
//		std::string label = cv::format("Class %d: %.2f", classIds[idx], confidences[idx]);
//		int baseLine = 0;
//		cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
//		cv::rectangle(image, cv::Point(box.x, box.y - labelSize.height),
//			cv::Point(box.x + labelSize.width, box.y + baseLine),
//			cv::Scalar(255, 255, 255), cv::FILLED);
//		cv::putText(image, label, cv::Point(box.x, box.y), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar());
//	}
//}
//int main() {
//	// 创建环境
//	Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "test");
//
//	// 创建会话选项
//	Ort::SessionOptions session_options;
//
//	// ONNX 模型路径
//	const wchar_t* model_path = L"D:/vsworksapce/yolo/x64/Debug/models/yolov8sbak.onnx";
//
//	// 创建会话
//	Ort::Session session(env, model_path, session_options);
//
//	// 打印模型输入输出信息
//	size_t num_inputs = session.GetInputCount();
//	size_t num_outputs = session.GetOutputCount();
//
//	// 使用 GetInputNameAllocated 和 GetOutputNameAllocated 获取输入输出名字
//	for (size_t i = 0; i < num_inputs; i++) {
//		Ort::AllocatedStringPtr input_name = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Input Name: " << input_name.get() << std::endl;
//	}
//
//	for (size_t i = 0; i < num_outputs; i++) {
//		Ort::AllocatedStringPtr output_name = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions());
//		std::cout << "Output Name: " << output_name.get() << std::endl;
//	}
//
//	// 读取输入图像
//	cv::Mat img = cv::imread("D:/vsworksapce/yolo/x64/Debug/images/bus.jpg");
//	if (img.empty()) {
//		std::cerr << "Could not open or find the image!" << std::endl;
//		return -1;
//	}
//
//	// 获取输入维度
//	auto input_node_dims = session.GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape();
//	auto inputData = preprocessImage(img, input_node_dims);
//
//	// 创建输入张量
//	Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
//	Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info,
//		inputData.data(),
//		inputData.size(),
//		input_node_dims.data(),
//		input_node_dims.size());
//
//	// 推理
//	std::vector<const char*> input_names(num_inputs);
//	std::vector<const char*> output_names(num_outputs);
//
//	for (size_t i = 0; i < num_inputs; i++) {
//		input_names[i] = session.GetInputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//	for (size_t i = 0; i < num_outputs; i++) {
//		output_names[i] = session.GetOutputNameAllocated(i, Ort::AllocatorWithDefaultOptions()).get();
//	}
//
//	auto output_tensors = session.Run(Ort::RunOptions{ nullptr },
//		input_names.data(),
//		&input_tensor,
//		num_inputs,
//		output_names.data(),
//		num_outputs);
//
//	// 处理输出...
 处理输出...
//	for (size_t i = 0; i < num_outputs; i++) {
//		auto& output_tensor = output_tensors[i]; // 使用引用而不是复制
//		float* floatValues = output_tensor.GetTensorMutableData<float>();
//		size_t length = output_tensor.GetTensorTypeAndShapeInfo().GetElementCount();
//
//		std::cout << "Output tensor " << i << ": " << length << " elements" << std::endl;
//		// 这里可以添加你的后处理逻辑
//	}
//
//	// 处理解析输出...
//	for (size_t i = 0; i < output_tensors.size(); ++i) {
//		float* floatValues = output_tensors[i].GetTensorMutableData<float>();
//		std::vector<float> output_vector(floatValues, floatValues + output_tensors[i].GetTensorTypeAndShapeInfo().GetElementCount());
//
//		// 绘制预测结果
//		drawPredictions(img, output_vector, 0.5, 0.4); // 设置置信度阈值和NMS阈值
//	}
//
//
//	// 显示带有预测结果的图像
//	cv::imshow("Detection Results", img);
//	cv::waitKey(0);
//	return 0;
//}

环境C++17

相关文章:

  • 不定方程求解(信息学奥赛一本通-1101)
  • EB-Cable许可管理中的数据安全与隐私保护
  • 江科大51单片机笔记【16】AD/DA(上)
  • Linux 跨进程同步方案
  • MySQL与Canal、RabbitMQ集成指南
  • 分布式存储学习——HBase表结构设计
  • 修改trae全局默认的JDK版本
  • Windows软件插件-音视频文件读取器
  • python数据分析--pandas读取数据--按行和列提取数据
  • 50个经典的python库
  • Python函数的递归调用
  • Flutter_学习记录_video_player、chewie 播放视频
  • Github 2025-03-12 C开源项目日报Top5
  • [洛谷]P1123 取数游戏
  • 文献分享: 对ColBERT段落多向量的剪枝——基于学习的方法
  • 设计模式Python版 模板方法模式(上)
  • Linux:基本指令与内涵理解
  • 初阶数据结构--复杂度
  • 前端发布缓存导致白屏解决方案
  • 解决webdriver和Chrome不匹配的办法
  • 世界黄金协会:一季度全球黄金投资需求同比增170%
  • 中央网信办部署开展“清朗·整治AI技术滥用”专项行动
  • 屠呦呦当选美国科学院外籍院士
  • 融创服务全面退出彰泰服务集团:约8.26亿元出售广西彰泰融创智慧80%股权
  • 荆州市委书记汪元程:全市各级干部要做到慎微、慎初、慎独、慎友
  • 住房和城乡建设部办公厅主任李晓龙已任部总工程师