tensorrt安装 2025
目录
tensorrt windows 安装:
pip 安装
添加到环境变量:
yolov12推理代码:
tensorrt安装 2025
tensorrt windows 安装:
官网下载cuda对应版本,然后在python目录有whl安装文件,pip 进行安装
pip 安装
pip install "D:\xxx\TensorRT-10.12.0.36\python\tensorrt-10.12.0.36-cp310-none-win_amd64.whl"
这个可以没有:
pip install pycuda
添加到环境变量:
D:\xxx\TensorRT-10.12.0.36.Windows.win10.cuda-12.9\TensorRT-10.12.0.36\lib
@echo off
set TENSORRT_PATH=C:\TensorRT-8.6.1.6echo 正在安装TensorRT...
cd /d %TENSORRT_PATH%\python
pip install tensorrt-*-none-any.whlecho 正在安装GraphSurgeon...
cd /d %TENSORRT_PATH%\graphsurgeon
pip install graphsurgeon-*-py2.py3-none-any.whlecho 正在安装ONNX GraphSurgeon...
cd /d %TENSORRT_PATH%\onnx_graphsurgeon
pip install onnx_graphsurgeon-*-py2.py3-none-any.whlecho 设置环境变量...
setx PATH "%PATH%;%TENSORRT_PATH%\lib"echo TensorRT安装完成!
pause
yolov12推理代码:
from utils.utils import preproc, visimport argparseimport tensorrt as trt
from cuda import cudart
import numpy as np
import cv2
import matplotlib.pyplot as pltfrom utils import commonclass BaseEngine(object):def __init__(self, engine_path):self.mean = Noneself.std = Noneself.n_classes = 1self.class_names = [ 'person',]logger = trt.Logger(trt.Logger.WARNING)logger.min_severity = trt.Logger.Severity.ERRORruntime = trt.Runtime(logger)trt.init_libnvinfer_plugins(logger,'') # initialize TensorRT pluginswith open(engine_path, "rb") as f:serialized_engine = f.read()self.engine = runtime.deserialize_cuda_engine(serialized_engine)self.imgsz = self.engine.get_tensor_shape(self.engine.get_tensor_name(0))[2:] # get the read shape of model, in case user input it wrongself.context = self.engine.create_execution_context()# Setup I/O bindingsself.inputs = []self.outputs = []self.allocations = []for i in range(self.engine.num_io_tensors):name = self.engine.get_tensor_name(i)dtype = self.engine.get_tensor_dtype(name)shape = self.engine.get_tensor_shape(name)is_input = Falseif self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:is_input = Trueif is_input:self.batch_size = shape[0]size = np.dtype(trt.nptype(dtype)).itemsizefor s in shape:size *= sallocation = common.cuda_call(cudart.cudaMalloc(size))binding = {'index': i,'name': name,'dtype': np.dtype(trt.nptype(dtype)),'shape': list(shape),'allocation': allocation,'size': size}self.allocations.append(allocation)if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:self.inputs.append(binding)else:self.outputs.append(binding)def output_spec(self):"""Get the specs for the output tensors of the network. Useful to prepare memory allocations.:return: A list with two items per element, the shape and (numpy) datatype of each output tensor."""specs = []for o in self.outputs:specs.append((o['shape'], o['dtype']))return specsdef infer(self, img):"""Execute inference on a batch of images. The images should already be batched and preprocessed, as prepared bythe ImageBatcher class. Memory copying to and from the GPU device will be performed here.:param batch: A numpy array holding the image batch.:param scales: The image resize scales for each image in this batch. Default: No scale postprocessing applied.:return: A nested list for each image in the batch and each detection in the list."""# Prepare the output data.outputs = []for shape, dtype in self.output_spec():outputs.append(np.zeros(shape, dtype))# Process I/O and execute the network.common.memcpy_host_to_device(self.inputs[0]['allocation'], np.ascontiguousarray(img))self.context.execute_v2(self.allocations)for o in range(len(outputs)):common.memcpy_device_to_host(outputs[o], self.outputs[o]['allocation'])return outputsdef inference(self, img_path, conf=0.5, end2end=False):origin_img = cv2.imread(img_path)# img, ratio = preproc(origin_img, self.imgsz, self.mean, self.std)imshow,img, ratio, (dw, dh) = letterbox(origin_img, self.imgsz)data = self.infer(img)if end2end:num, final_boxes, final_scores, final_cls_inds = data# final_boxes, final_scores, final_cls_inds = datadwdh = np.asarray(dwdh * 2, dtype=np.float32)print(final_boxes.shape,dwdh.shape)final_boxes -= dwdhfinal_boxes = np.reshape(final_boxes/ratio, (-1, 4))final_scores = np.reshape(final_scores, (-1, 1))final_cls_inds = np.reshape(final_cls_inds, (-1, 1))dets = np.concatenate([np.array(final_boxes)[:int(num[0])], np.array(final_scores)[:int(num[0])], np.array(final_cls_inds)[:int(num[0])]], axis=-1)else:final_boxes = data[0].transpose(0, 2, 1)predictions = final_boxes[0]dets = self.postprocess(predictions,ratio,dw, dh)if dets is not None:final_boxes, final_scores, final_cls_inds = dets[:,:4], dets[:, 4], dets[:, 5]origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,conf=conf, class_names=self.class_names)return origin_img@staticmethoddef postprocess(predictions, ratio,dw,dh):boxes = predictions[:, :4]scores = predictions[:, 4:5]boxes_xyxy = np.ones_like(boxes)boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.boxes_xyxy[:, [0, 2]] = (boxes_xyxy[:, [0, 2]] - dw ) / ratio # 调整x坐标boxes_xyxy[:, [1, 3]] = (boxes_xyxy[:, [1, 3]] - dh ) / ratio # 调整y坐标# boxes_xyxy /= ratiodets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)return detsdef get_fps(self):import timeimg = np.ones((1,3,self.imgsz[0], self.imgsz[1]))img = np.ascontiguousarray(img, dtype=np.float32)for _ in range(5): # warmup_ = self.infer(img)t0 = time.perf_counter()for _ in range(100): # calculate average time_ = self.infer(img)print((time.perf_counter() - t0)/100, 's')def nms(boxes, scores, nms_thr):"""Single class NMS implemented in Numpy."""x1 = boxes[:, 0]y1 = boxes[:, 1]x2 = boxes[:, 2]y2 = boxes[:, 3]areas = (x2 - x1 + 1) * (y2 - y1 + 1)order = scores.argsort()[::-1]keep = []while order.size > 0:i = order[0]keep.append(i)xx1 = np.maximum(x1[i], x1[order[1:]])yy1 = np.maximum(y1[i], y1[order[1:]])xx2 = np.minimum(x2[i], x2[order[1:]])yy2 = np.minimum(y2[i], y2[order[1:]])w = np.maximum(0.0, xx2 - xx1 + 1)h = np.maximum(0.0, yy2 - yy1 + 1)inter = w * hovr = inter / (areas[i] + areas[order[1:]] - inter)inds = np.where(ovr <= nms_thr)[0]order = order[inds + 1]return keepdef multiclass_nms(boxes, scores, nms_thr, score_thr):"""Multiclass NMS implemented in Numpy"""final_dets = []num_classes = scores.shape[1]for cls_ind in range(num_classes):cls_scores = scores[:, cls_ind]valid_score_mask = cls_scores > score_thrif valid_score_mask.sum() == 0:continueelse:valid_scores = cls_scores[valid_score_mask]valid_boxes = boxes[valid_score_mask]keep = nms(valid_boxes, valid_scores, nms_thr)if len(keep) > 0:cls_inds = np.ones((len(keep), 1)) * cls_inddets = np.concatenate([valid_boxes[keep], valid_scores[keep, None], cls_inds], 1)final_dets.append(dets)if len(final_dets) == 0:return Nonereturn np.concatenate(final_dets, 0)def preproc(image, input_size, mean, std, swap=(2, 0, 1)):if len(image.shape) == 3:padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0else:padded_img = np.ones(input_size) * 114.0img = np.array(image)r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])resized_img = cv2.resize(img,(int(img.shape[1] * r), int(img.shape[0] * r)),interpolation=cv2.INTER_LINEAR,).astype(np.float32)padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_imgpadded_img = padded_img[:, :, ::-1]padded_img /= 255.0if mean is not None:padded_img -= meanif std is not None:padded_img /= stdpadded_img = padded_img.transpose(swap)padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)return padded_img, rdef letterbox(im,new_shape = (640, 640),color = (114, 114, 114),swap=(2, 0, 1)):shape = im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# new_shape: [width, height]# Scale ratio (new / old)r = min(new_shape[0] / shape[1], new_shape[1] / shape[0])# Compute padding [width, height]new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[0] - new_unpad[0], new_shape[1] - new_unpad[1] # wh paddingdw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeim = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im,top,bottom,left,right,cv2.BORDER_CONSTANT,value=color) # add borderimshow=im.copy()im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)im = im.transpose(swap)im = np.ascontiguousarray(im, dtype=np.float32) / 255.return imshow,im, r, (dw, dh)def rainbow_fill(size=50): # simpler way to generate rainbow colorcmap = plt.get_cmap('jet')color_list = []for n in range(size):color = cmap(n/size)color_list.append(color[:3]) # might need rounding? (round(x, 3) for x in color)[:3]return np.array(color_list)_COLORS = rainbow_fill(80).astype(np.float32).reshape(-1, 3)def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):for i in range(len(boxes)):box = boxes[i]cls_id = int(cls_ids[i])score = scores[i]if score < conf:continuex0 = int(box[0])y0 = int(box[1])x1 = int(box[2])y1 = int(box[3])color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()font = cv2.FONT_HERSHEY_SIMPLEXcv2.rectangle(img, (x0, y0), (x1, y1), color, 2)cv2.imshow('vis.jpg', img)cv2.waitKey(0)return imgclass Predictor(BaseEngine):def __init__(self, engine_path):super(Predictor, self).__init__(engine_path)self.n_classes = 1 # your model classesif __name__ == '__main__':parser = argparse.ArgumentParser()# parser.add_argument("-e", "--engine",default='yolov5s.engine', help="TRT engine Path")parser.add_argument("-e", "--engine",default=r"D:\project\trt\TensorRT-For-YOLO-Series-cuda-python\yolov12s.engine", help="TRT engine Path")# parser.add_argument("-i", "--image",default=r"E:\data\tiaosheng\imgs\12.png", help="image path")parser.add_argument("-i", "--image",default=r"C:\Users\ChanJing-01\Pictures\111.jpg", help="image path")parser.add_argument("-o", "--output",default='res.jpg', help="image output path")parser.add_argument("-v", "--video", help="video path or camera index ")parser.add_argument("--end2end", default=0, action="store_true",help="use end2end engine")args = parser.parse_args()print(args)pred = Predictor(engine_path=args.engine)pred.get_fps()img_path = args.imagevideo = args.videoif img_path:origin_img = pred.inference(img_path, conf=0.1, end2end=args.end2end)cv2.imwrite("%s" %args.output , origin_img)if video:pred.detect_video(video, conf=0.1, end2end=args.end2end) # set 0 use a webcam