Python OpenCV 4.10 库详解
Python OpenCV 4.10 库详解文档
核心模块覆盖:
Core模块:基本数据结构、矩阵操作、数学运算
ImgProc模块:图像处理的核心功能,包括颜色转换、几何变换、滤波、边缘检测
VideoIO模块:视频和摄像头操作
HighGUI模块:用户界面功能,窗口管理、事件处理
Features2D模块:特征检测和匹配(SIFT、ORB等)
ObjDetect模块:目标检测算法
DNN模块:深度学习模型集成
Video模块:视频分析(光流、背景减除)
Calib3D模块:相机标定和3D重建
Photo模块:计算摄影学(HDR、去噪、修复)
ML模块:机器学习算法
简介
OpenCV (Open Source Computer Vision Library) 是一个开源的计算机视觉和机器学习库。OpenCV 4.10 是该库的最新版本,提供了丰富的图像处理、计算机视觉、机器学习和深度学习功能。本文档将详细介绍 OpenCV 4.10 的主要子模块和 API。
安装
pip install opencv-python==4.10.*
pip install opencv-contrib-python==4.10.* # 包含额外模块
核心架构概述
OpenCV 4.10 采用模块化设计,主要模块包括:
- core: 核心功能,数据结构和基本操作
- imgproc: 图像处理
- imgcodecs: 图像编解码
- videoio: 视频I/O
- highgui: GUI功能
- video: 视频分析
- calib3d: 相机标定和3D重建
- features2d: 2D特征检测
- objdetect: 目标检测
- dnn: 深度神经网络
- ml: 机器学习
- photo: 计算摄影学
Core 模块 - 核心功能
基本数据结构
import cv2
import numpy as np# 创建不同类型的矩阵
mat_zeros = np.zeros((480, 640, 3), dtype=np.uint8) # 3通道图像
mat_ones = np.ones((100, 100), dtype=np.float32) # 单通道浮点矩阵
mat_random = np.random.randint(0, 255, (200, 200, 3), dtype=np.uint8)# 矩阵属性
print(f"形状: {mat_zeros.shape}")
print(f"数据类型: {mat_zeros.dtype}")
print(f"通道数: {mat_zeros.shape[2] if len(mat_zeros.shape) == 3 else 1}")
print(f"元素总数: {mat_zeros.size}")
基本矩阵操作
# 矩阵创建
mat = cv2.Mat(np.zeros((100, 100, 3), dtype=np.uint8))# 克隆和复制
mat_clone = mat.copy()
mat_ref = mat # 引用,不是复制# 矩阵类型转换
float_mat = mat.astype(np.float32) / 255.0
uint8_mat = (float_mat * 255).astype(np.uint8)# ROI (Region of Interest) 操作
roi = mat[50:150, 100:200] # 提取感兴趣区域
mat[0:100, 0:100] = [255, 0, 0] # 设置区域颜色# 通道分离和合并
b, g, r = cv2.split(mat) # 分离BGR通道
merged = cv2.merge([b, g, r]) # 合并通道
数学运算
# 基本算术运算
img1 = np.ones((300, 300, 3), dtype=np.uint8) * 100
img2 = np.ones((300, 300, 3), dtype=np.uint8) * 50# 加法运算
add_result = cv2.add(img1, img2)
add_weighted = cv2.addWeighted(img1, 0.7, img2, 0.3, 0)# 减法运算
sub_result = cv2.subtract(img1, img2)
absdiff_result = cv2.absdiff(img1, img2)# 位运算
bitwise_and = cv2.bitwise_and(img1, img2)
bitwise_or = cv2.bitwise_or(img1, img2)
bitwise_xor = cv2.bitwise_xor(img1, img2)
bitwise_not = cv2.bitwise_not(img1)# 逻辑运算和比较
comparison = cv2.compare(img1, img2, cv2.CMP_GT)
min_result = cv2.min(img1, img2)
max_result = cv2.max(img1, img2)
ImgProc 模块 - 图像处理
颜色空间转换
# 读取图像
img = cv2.imread('example.jpg')# 常见颜色空间转换
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)# YUV颜色空间
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
xyz = cv2.cvtColor(img, cv2.COLOR_BGR2XYZ)# 单通道转多通道
gray_bgr = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
几何变换
# 获取图像尺寸
height, width = img.shape[:2]# 缩放
scaled_up = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
scaled_down = cv2.resize(img, (width//2, height//2), interpolation=cv2.INTER_AREA)# 旋转
center = (width//2, height//2)
rotation_matrix = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, rotation_matrix, (width, height))# 平移
translation_matrix = np.float32([[1, 0, 100], [0, 1, 50]])
translated = cv2.warpAffine(img, translation_matrix, (width, height))# 仿射变换
pts1 = np.float32([[50, 50], [200, 50], [50, 200]])
pts2 = np.float32([[10, 100], [200, 50], [100, 250]])
affine_matrix = cv2.getAffineTransform(pts1, pts2)
affine_result = cv2.warpAffine(img, affine_matrix, (width, height))# 透视变换
pts1 = np.float32([[56, 65], [368, 52], [28, 387], [389, 390]])
pts2 = np.float32([[0, 0], [300, 0], [0, 300], [300, 300]])
perspective_matrix = cv2.getPerspectiveTransform(pts1, pts2)
perspective_result = cv2.warpPerspective(img, perspective_matrix, (300, 300))
滤波操作
# 平滑滤波
blur = cv2.blur(img, (15, 15)) # 均值滤波
gaussian = cv2.GaussianBlur(img, (15, 15), 0) # 高斯滤波
median = cv2.medianBlur(img, 15) # 中值滤波
bilateral = cv2.bilateralFilter(img, 9, 75, 75) # 双边滤波# 形态学操作
kernel = np.ones((5, 5), np.uint8)
erosion = cv2.erode(img, kernel, iterations=1) # 腐蚀
dilation = cv2.dilate(img, kernel, iterations=1) # 膨胀
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel) # 开运算
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel) # 闭运算
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel) # 梯度
tophat = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel) # 顶帽
blackhat = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel) # 黑帽# 自定义卷积核
kernel_sharpen = np.array([[-1, -1, -1],[-1, 9, -1],[-1, -1, -1]])
sharpened = cv2.filter2D(img, -1, kernel_sharpen)
边缘检测
# Canny边缘检测
edges = cv2.Canny(gray, 100, 200)# Sobel算子
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
sobel_combined = cv2.magnitude(sobelx, sobely)# Laplacian算子
laplacian = cv2.Laplacian(gray, cv2.CV_64F)# Scharr算子
scharrx = cv2.Scharr(gray, cv2.CV_64F, 1, 0)
scharry = cv2.Scharr(gray, cv2.CV_64F, 0, 1)
轮廓检测和分析
# 查找轮廓
contours, hierarchy = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)# 绘制轮廓
contour_img = img.copy()
cv2.drawContours(contour_img, contours, -1, (0, 255, 0), 2)# 轮廓分析
for contour in contours:# 轮廓面积area = cv2.contourArea(contour)# 轮廓周长perimeter = cv2.arcLength(contour, True)# 边界框x, y, w, h = cv2.boundingRect(contour)cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)# 最小外接圆(x, y), radius = cv2.minEnclosingCircle(contour)cv2.circle(img, (int(x), int(y)), int(radius), (0, 255, 0), 2)# 椭圆拟合if len(contour) >= 5:ellipse = cv2.fitEllipse(contour)cv2.ellipse(img, ellipse, (0, 0, 255), 2)# 轮廓近似epsilon = 0.02 * cv2.arcLength(contour, True)approx = cv2.approxPolyDP(contour, epsilon, True)# 凸包hull = cv2.convexHull(contour)
ImgCodecs 模块 - 图像编解码
图像读取和保存
# 读取图像
img_color = cv2.imread('image.jpg', cv2.IMREAD_COLOR) # 彩色
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE) # 灰度
img_unchanged = cv2.imread('image.png', cv2.IMREAD_UNCHANGED) # 包含alpha通道# 保存图像
cv2.imwrite('output.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 90])
cv2.imwrite('output.png', img, [cv2.IMWRITE_PNG_COMPRESSION, 9])# 图像编解码
# 将图像编码为内存中的字节数组
ret, buffer = cv2.imencode('.jpg', img, [cv2.IMWRITE_JPEG_QUALITY, 80])
if ret:# 从字节数组解码图像img_decoded = cv2.imdecode(buffer, cv2.IMREAD_COLOR)# 支持的格式
formats = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp', '.pbm', '.pgm', '.ppm']
VideoIO 模块 - 视频输入输出
视频读取和写入
# 打开视频文件
cap = cv2.VideoCapture('video.mp4')# 获取视频属性
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))print(f"FPS: {fps}, 尺寸: {width}x{height}, 总帧数: {total_frames}")# 创建视频写入对象
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output.avi', fourcc, fps, (width, height))# 逐帧处理视频
while True:ret, frame = cap.read()if not ret:break# 图像处理processed_frame = cv2.GaussianBlur(frame, (15, 15), 0)# 写入处理后的帧out.write(processed_frame)# 显示帧cv2.imshow('Frame', processed_frame)if cv2.waitKey(1) & 0xFF == ord('q'):break# 释放资源
cap.release()
out.release()
cv2.destroyAllWindows()
摄像头操作
# 打开默认摄像头
cap = cv2.VideoCapture(0)# 设置摄像头属性
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
cap.set(cv2.CAP_PROP_FPS, 30)# 实时处理
while True:ret, frame = cap.read()if not ret:break# 实时图像处理gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)edges = cv2.Canny(gray, 50, 150)# 显示结果cv2.imshow('Original', frame)cv2.imshow('Edges', edges)if cv2.waitKey(1) & 0xFF == ord('q'):breakcap.release()
cv2.destroyAllWindows()
HighGUI 模块 - 图形用户界面
窗口操作
# 创建窗口
cv2.namedWindow('Image', cv2.WINDOW_AUTOSIZE)
cv2.namedWindow('Resizable', cv2.WINDOW_NORMAL)# 显示图像
cv2.imshow('Image', img)# 移动和调整窗口
cv2.moveWindow('Image', 100, 100)
cv2.resizeWindow('Resizable', 800, 600)# 等待按键
key = cv2.waitKey(0)
if key == 27: # ESC键cv2.destroyAllWindows()# 保存窗口截图
cv2.imwrite('screenshot.png', img)
鼠标事件处理
# 鼠标回调函数
def mouse_callback(event, x, y, flags, param):if event == cv2.EVENT_LBUTTONDOWN:print(f"左键点击: ({x}, {y})")cv2.circle(img, (x, y), 5, (255, 0, 0), -1)elif event == cv2.EVENT_RBUTTONDOWN:print(f"右键点击: ({x}, {y})")cv2.circle(img, (x, y), 5, (0, 255, 0), -1)elif event == cv2.EVENT_MOUSEMOVE:if flags == cv2.EVENT_FLAG_LBUTTON:cv2.circle(img, (x, y), 3, (0, 0, 255), -1)# 设置鼠标回调
img = np.zeros((512, 512, 3), np.uint8)
cv2.namedWindow('Mouse Events')
cv2.setMouseCallback('Mouse Events', mouse_callback)while True:cv2.imshow('Mouse Events', img)if cv2.waitKey(1) & 0xFF == ord('q'):breakcv2.destroyAllWindows()
滑动条控件
# 滑动条回调函数
def trackbar_callback(val):global img_result# 根据滑动条值调整图像alpha = val / 100.0img_result = cv2.addWeighted(img1, alpha, img2, 1-alpha, 0)cv2.imshow('Blended', img_result)# 创建滑动条
img1 = cv2.imread('image1.jpg')
img2 = cv2.imread('image2.jpg')
img_result = img1.copy()cv2.namedWindow('Blended')
cv2.createTrackbar('Alpha', 'Blended', 50, 100, trackbar_callback)# 初始显示
trackbar_callback(50)
cv2.waitKey(0)
cv2.destroyAllWindows()
Features2D 模块 - 特征检测
角点检测
# Harris角点检测
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
harris_corners = cv2.cornerHarris(gray, 2, 3, 0.04)
img[harris_corners > 0.01 * harris_corners.max()] = [0, 0, 255]# Shi-Tomasi角点检测
corners = cv2.goodFeaturesToTrack(gray, maxCorners=100, qualityLevel=0.01, minDistance=10, blockSize=3)
if corners is not None:corners = np.int0(corners)for corner in corners:x, y = corner.ravel()cv2.circle(img, (x, y), 3, (0, 255, 0), -1)
SIFT特征检测
# 创建SIFT检测器
sift = cv2.SIFT_create()# 检测关键点和描述符
keypoints, descriptors = sift.detectAndCompute(gray, None)# 绘制关键点
img_keypoints = cv2.drawKeypoints(img, keypoints, None, flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)print(f"检测到 {len(keypoints)} 个关键点")
ORB特征检测
# 创建ORB检测器
orb = cv2.ORB_create(nfeatures=500)# 检测关键点和描述符
keypoints, descriptors = orb.detectAndCompute(gray, None)# 绘制关键点
img_orb = cv2.drawKeypoints(img, keypoints, None, color=(0, 255, 0))
特征匹配
# 读取两幅图像
img1 = cv2.imread('image1.jpg', 0)
img2 = cv2.imread('image2.jpg', 0)# 检测特征点
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)# 创建匹配器
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)# 匹配特征点
matches = bf.match(des1, des2)
matches = sorted(matches, key=lambda x: x.distance)# 绘制匹配结果
img_matches = cv2.drawMatches(img1, kp1, img2, kp2, matches[:20], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)# 使用FLANN匹配器
FLANN_INDEX_KDTREE = 1
index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
search_params = dict(checks=50)
flann = cv2.FlannBasedMatcher(index_params, search_params)# 对于SIFT/SURF描述符
sift = cv2.SIFT_create()
kp1, des1 = sift.detectAndCompute(img1, None)
kp2, des2 = sift.detectAndCompute(img2, None)matches = flann.knnMatch(des1, des2, k=2)# 应用比值测试
good_matches = []
for m, n in matches:if m.distance < 0.7 * n.distance:good_matches.append(m)
ObjDetect 模块 - 目标检测
Haar级联分类器
# 加载人脸检测器
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_eye.xml')# 检测人脸
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))# 绘制检测结果
for (x, y, w, h) in faces:cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)# 在人脸区域检测眼睛roi_gray = gray[y:y+h, x:x+w]roi_color = img[y:y+h, x:x+w]eyes = eye_cascade.detectMultiScale(roi_gray)for (ex, ey, ew, eh) in eyes:cv2.rectangle(roi_color, (ex, ey), (ex+ew, ey+eh), (0, 255, 0), 2)
HOG行人检测
# 创建HOG描述符
hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())# 检测行人
pedestrians, weights = hog.detectMultiScale(img, winStride=(8, 8), padding=(32, 32), scale=1.05)# 绘制检测框
for (x, y, w, h) in pedestrians:cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 2)
DNN 模块 - 深度神经网络
加载和使用预训练模型
# 加载YOLO模型
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')# 获取输出层名称
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]# 加载类别名称
with open('coco.names', 'r') as f:classes = [line.strip() for line in f.readlines()]# 图像预处理
height, width, channels = img.shape
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)# 设置输入并进行前向传播
net.setInput(blob)
outs = net.forward(output_layers)# 解析检测结果
class_ids = []
confidences = []
boxes = []for out in outs:for detection in out:scores = detection[5:]class_id = np.argmax(scores)confidence = scores[class_id]if confidence > 0.5:# 目标框坐标center_x = int(detection[0] * width)center_y = int(detection[1] * height)w = int(detection[2] * width)h = int(detection[3] * height)# 矩形框x = int(center_x - w / 2)y = int(center_y - h / 2)boxes.append([x, y, w, h])confidences.append(float(confidence))class_ids.append(class_id)# 非极大值抑制
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)if len(indexes) > 0:for i in indexes.flatten():x, y, w, h = boxes[i]label = str(classes[class_ids[i]])confidence = confidences[i]cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)cv2.putText(img, f'{label}: {confidence:.2f}', (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
OpenCV DNN支持的框架
# 加载不同框架的模型# TensorFlow模型
net_tf = cv2.dnn.readNetFromTensorflow('model.pb', 'config.pbtxt')# Caffe模型
net_caffe = cv2.dnn.readNetFromCaffe('deploy.prototxt', 'model.caffemodel')# ONNX模型
net_onnx = cv2.dnn.readNetFromONNX('model.onnx')# Darknet模型
net_darknet = cv2.dnn.readNetFromDarknet('config.cfg', 'weights.weights')# 设置计算后端
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)# 使用GPU加速(如果可用)
# net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
# net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
Video 模块 - 视频分析
光流估计
# Lucas-Kanade光流
cap = cv2.VideoCapture('video.mp4')# 获取第一帧
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)# 检测角点
p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)# 创建颜色数组
colors = np.random.randint(0, 255, (100, 3))
mask = np.zeros_like(old_frame)while True:ret, frame = cap.read()if not ret:breakframe_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)# 计算光流p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, winSize=(15, 15), maxLevel=2)# 选择好的点good_new = p1[st == 1]good_old = p0[st == 1]# 绘制轨迹for i, (tr, to) in enumerate(zip(good_new, good_old)):a, b = tr.ravel()c, d = to.ravel()mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)), colors[i].tolist(), 2)frame = cv2.circle(frame, (int(a), int(b)), 5, colors[i].tolist(), -1)img = cv2.add(frame, mask)cv2.imshow('Frame', img)if cv2.waitKey(30) & 0xFF == ord('q'):break# 更新前一帧old_gray = frame_gray.copy()p0 = good_new.reshape(-1, 1, 2)cap.release()
cv2.destroyAllWindows()
背景减除
# 创建背景减除器
backSub = cv2.createBackgroundSubtractorMOG2(detectShadows=True)cap = cv2.VideoCapture('video.mp4')while True:ret, frame = cap.read()if not ret:break# 应用背景减除fgMask = backSub.apply(frame)# 形态学操作去噪kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))fgMask = cv2.morphologyEx(fgMask, cv2.MORPH_OPEN, kernel)# 查找轮廓contours, _ = cv2.findContours(fgMask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)# 绘制边界框for contour in contours:if cv2.contourArea(contour) > 500: # 过滤小区域x, y, w, h = cv2.boundingRect(contour)cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)cv2.imshow('Frame', frame)cv2.imshow('FG Mask', fgMask)if cv2.waitKey(30) & 0xFF == ord('q'):breakcap.release()
cv2.destroyAllWindows()
Calib3D 模块 - 相机标定和3D重建
相机标定
# 设置棋盘格参数
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)
pattern_size = (9, 6) # 内角点数量# 准备对象点
objp = np.zeros((pattern_size[0] * pattern_size[1], 3), np.float32)
objp[:, :2] = np.mgrid[0:pattern_size[0], 0:pattern_size[1]].T.reshape(-1, 2)# 存储点
objpoints = [] # 3D点
imgpoints = [] # 2D点# 读取标定图像
import glob
images = glob.glob('calibration/*.jpg')for fname in images:img = cv2.imread(fname)gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)# 查找棋盘格角点ret, corners = cv2.findChessboardCorners(gray, pattern_size, None)if ret:objpoints.append(objp)corners2 = cv2.cornerSubPix(gray, corners, (11, 11), (-1, -1), criteria)imgpoints.append(corners2)# 绘制角点cv2.drawChessboardCorners(img, pattern_size, corners2, ret)cv2.imshow('img', img)cv2.waitKey(500)cv2.destroyAllWindows()# 相机标定
ret, mtx, dist, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)print("相机矩阵:")
print(mtx)
print("\n畸变系数:")
print(dist)
立体视觉
# 立体标定
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 30, 0.001)# 立体标定
retval, cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2, R, T, E, F = \cv2.stereoCalibrate(objpoints, imgpoints_left, imgpoints_right, mtx1, dist1, mtx2, dist2, gray.shape[::-1], criteria)# 立体校正
R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = \cv2.stereoRectify(cameraMatrix1, distCoeffs1, cameraMatrix2, distCoeffs2, gray.shape[::-1], R, T)# 计算校正映射
map1x, map1y = cv2.initUndistortRectifyMap(cameraMatrix1, distCoeffs1, R1, P1, gray.shape[::-1], cv2.CV_32FC1)
map2x, map2y = cv2.initUndistortRectifyMap(cameraMatrix2, distCoeffs2, R2, P2, gray.shape[::-1], cv2.CV_32FC1)# 立体匹配
stereo = cv2.StereoBM_create(numDisparities=16*5, blockSize=21)# 对于每对立体图像
left_img = cv2.imread('left.jpg', 0)
right_img = cv2.imread('right.jpg', 0)# 校正图像
left_rectified = cv2.remap(left_img, map1x, map1y, cv2.INTER_LINEAR)
right_rectified = cv2.remap(right_img, map2x, map2y, cv2.INTER_LINEAR)# 计算视差图
disparity = stereo.compute(left_rectified, right_rectified)# 可视化视差图
disparity_normalized = cv2.normalize(disparity, None, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)
Photo 模块 - 计算摄影学
图像修复
# 图像修复
damaged_img = cv2.imread('damaged.jpg')
mask = cv2.imread('mask.jpg', 0)# 使用Navier-Stokes方法
restored_ns = cv2.inpaint(damaged_img, mask, 3, cv2.INPAINT_NS)# 使用快速行进方法
restored_telea = cv2.inpaint(damaged_img, mask, 3, cv2.INPAINT_TELEA)
图像去噪
# 非局部平均去噪
denoised = cv2.fastNlMeansDenoising(gray, None, 10, 7, 21)# 彩色图像去噪
denoised_color = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)# 多帧去噪(视频)
imgToDenoiseIndex = 2
temporalWindowSize = 7
imgs = [cv2.imread(f'frame_{i}.jpg') for i in range(5)]
denoised_video = cv2.fastNlMeansDenoisingColoredMulti(imgs, imgToDenoiseIndex, temporalWindowSize, None, 4, 4, 7, 21)
HDR成像
# 读取不同曝光的图像
img_list = []
exposure_times = np.array([1/30.0, 0.25, 2.5, 15.0], dtype=np.float32)for i in range(4):img = cv2.imread(f'exposure_{i}.jpg')img_list.append(img)# 校准相机响应函数
calibrateDebevec = cv2.createCalibrateDebevec()
responseDebevec = calibrateDebevec.process(img_list, exposure_times)# 合并HDR图像
mergeDebevec = cv2.createMergeDebevec()
hdrDebevec = mergeDebevec.process(img_list, exposure_times, responseDebevec)# 色调映射
tonemapDrago = cv2.createTonemapDrago(1.0, 0.7)
ldrDrago = tonemapDrago.process(hdrDebevec)# 保存结果
cv2.imwrite('hdr_image.hdr', hdrDebevec)
cv2.imwrite('ldr_drago.jpg', ldrDrago * 255)
ML 模块 - 机器学习
K-Means聚类
# 图像颜色量化
data = img.reshape((-1, 3))
data = np.float32(data)# 定义K-Means参数
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
k = 8# 执行K-Means
_, labels, centers = cv2.kmeans(data, k, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)# 重构图像
centers = np.uint8(centers)
segmented_data = centers[labels.flatten()]
segmented_image = segmented_data.reshape(img.shape)
SVM分类器
# 创建SVM分类器
svm = cv2.ml.SVM_create()
svm.setType(cv2.ml.SVM_C_SVC)
svm.setKernel(cv2.ml.SVM_LINEAR)# 准备训练数据
train_data = np.random.randint(0, 100, (100, 2)).astype(np.float32)
train_labels = np.random.randint(0, 2, (100, 1)).astype(np.int32)# 训练模型
svm.train(train_data, cv2.ml.ROW_SAMPLE, train_labels)# 预测
test_data = np.random.randint(0, 100, (10, 2)).astype(np.float32)
result = svm.predict(test_data)
实际应用示例
实时人脸识别系统
import cv2
import numpy as npclass FaceRecognitionSystem:def __init__(self):self.face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')self.recognizer = cv2.face.LBPHFaceRecognizer_create()self.cap = cv2.VideoCapture(0)def collect_faces(self, person_id, num_samples=100):"""收集人脸样本"""faces = []labels = []count = 0while count < num_samples:ret, frame = self.cap.read()if not ret:continuegray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)detected_faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)for (x, y, w, h) in detected_faces:face_roi = gray[y:y+h, x:x+w]face_roi = cv2.resize(face_roi, (200, 200))faces.append(face_roi)labels.append(person_id)count += 1cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)cv2.putText(frame, f'Collecting: {count}/{num_samples}', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)cv2.imshow('Collecting Faces', frame)if cv2.waitKey(1) & 0xFF == ord('q'):breakreturn faces, labelsdef train_model(self, faces, labels):"""训练识别模型"""self.recognizer.train(faces, np.array(labels))def recognize_faces(self):"""实时人脸识别"""while True:ret, frame = self.cap.read()if not ret:breakgray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)faces = self.face_cascade.detectMultiScale(gray, 1.3, 5)for (x, y, w, h) in faces:face_roi = gray[y:y+h, x:x+w]face_roi = cv2.resize(face_roi, (200, 200))# 识别人脸person_id, confidence = self.recognizer.predict(face_roi)if confidence < 100: # 置信度阈值name = f'Person {person_id}'color = (0, 255, 0)else:name = 'Unknown'color = (0, 0, 255)cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)cv2.putText(frame, f'{name} ({confidence:.1f})', (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)cv2.imshow('Face Recognition', frame)if cv2.waitKey(1) & 0xFF == ord('q'):breakdef release(self):self.cap.release()cv2.destroyAllWindows()
图像拼接全景图
class PanoramaStitcher:def __init__(self):self.detector = cv2.SIFT_create()self.matcher = cv2.BFMatcher()def stitch_images(self, images):"""拼接多张图像生成全景图"""if len(images) < 2:return images[0] if images else None# 从左到右依次拼接result = images[0]for i in range(1, len(images)):result = self.stitch_pair(result, images[i])if result is None:print(f"无法拼接第{i+1}张图像")breakreturn resultdef stitch_pair(self, img1, img2):"""拼接两张图像"""# 检测特征点kp1, des1 = self.detector.detectAndCompute(img1, None)kp2, des2 = self.detector.detectAndCompute(img2, None)if des1 is None or des2 is None:return None# 特征匹配matches = self.matcher.knnMatch(des1, des2, k=2)# 筛选好的匹配点good_matches = []for m, n in matches:if m.distance < 0.7 * n.distance:good_matches.append(m)if len(good_matches) < 10:return None# 计算单应性矩阵src_pts = np.float32([kp1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)dst_pts = np.float32([kp2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)H, mask = cv2.findHomography(dst_pts, src_pts, cv2.RANSAC, 5.0)if H is None:return None# 图像变换和拼接h1, w1 = img1.shape[:2]h2, w2 = img2.shape[:2]# 计算拼接后的画布大小corners = np.float32([[0, 0], [w2, 0], [w2, h2], [0, h2]]).reshape(-1, 1, 2)transformed_corners = cv2.perspectiveTransform(corners, H)all_corners = np.concatenate([[[0, 0]], [[w1, 0]], [[w1, h1]], [[0, h1]], transformed_corners], axis=0)[x_min, y_min] = np.int32(all_corners.min(axis=0).ravel())[x_max, y_max] = np.int32(all_corners.max(axis=0).ravel())# 平移矩阵translation = np.array([[1, 0, -x_min], [0, 1, -y_min], [0, 0, 1]])# 创建拼接画布canvas_width = x_max - x_mincanvas_height = y_max - y_min# 变换第二张图像H_translated = translation.dot(H)warped_img2 = cv2.warpPerspective(img2, H_translated, (canvas_width, canvas_height))# 将第一张图像放到画布上result = np.zeros((canvas_height, canvas_width, 3), dtype=np.uint8)result[-y_min:-y_min+h1, -x_min:-x_min+w1] = img1# 混合图像mask = (warped_img2 > 0).astype(np.uint8)result = cv2.bitwise_and(result, 255 - mask * 255) + warped_img2return result# 使用示例
stitcher = PanoramaStitcher()
images = [cv2.imread(f'panorama_{i}.jpg') for i in range(3)]
panorama = stitcher.stitch_images(images)
if panorama is not None:cv2.imwrite('panorama_result.jpg', panorama)
性能优化和最佳实践
内存管理
OpenCV 4.10 中的图像数据通常以 NumPy 数组的形式存在,需要注意内存使用。对于大图像处理,建议及时释放不需要的变量,使用适当的数据类型(如 uint8 而不是 float64),以及在可能的情况下进行就地操作来减少内存拷贝。
多线程处理
OpenCV 内部许多函数已经进行了多线程优化。可以通过 cv2.setNumThreads()
设置线程数,或者在应用层面使用 Python 的 concurrent.futures
模块进行并行处理。
GPU加速
对于支持CUDA的系统,可以使用OpenCV的GPU模块来加速计算密集型操作。许多函数都有对应的GPU版本,通常以 gpu
或 cuda
前缀命名。
错误处理
在实际应用中,应该对OpenCV操作进行适当的错误处理,检查返回值和异常情况。特别是在处理用户输入的图像文件时,需要验证文件格式和内容的有效性。
OpenCV 4.10 提供了强大而全面的计算机视觉功能,通过合理使用这些API,可以构建出高效的图像处理和计算机视觉应用程序。