python 实现一个完整的基于Python的多视角三维重建系统,包含特征提取与匹配、相机位姿估计、三维重建、优化和可视化等功能
多视角三维重建系统
下面我将实现一个完整的基于Python的多视角三维重建系统,包含特征提取与匹配、相机位姿估计、三维重建、优化和可视化等功能。
1. 环境准备与数据加载
首先安装必要的库:
pip install opencv-python opencv-contrib-python numpy matplotlib plotly scipy
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
from scipy.optimize import least_squares
import os
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go
import time
from tqdm import tqdmclass MultiViewReconstruction:def __init__(self, image_dir, focal_length=2000, principal_point=None):"""初始化多视角三维重建系统参数:image_dir: 图像目录路径focal_length: 相机焦距(像素单位)principal_point: 主点坐标(cx, cy), 默认为图像中心"""self.image_dir = image_dirself.images = []self.image_names = []self.focal_length = focal_lengthself.principal_point = principal_pointself.keypoints = []self.descriptors = []self.matches = {}self.camera_poses = {} # 相机位姿字典 {image_id: (R, t)}self.point_cloud = [] # 三维点云 [(x, y, z, r, g, b), ...]self.point_visibility = {} # 点可见性 {point_id: [image_id1, image_id2, ...]}# 加载图像self._load_images()# 如果没有指定主点,使用图像中心if self.principal_point is None:h, w = self.images[0].shape[:2]self.principal_point = (w/2, h/2)# 相机内参矩阵self.K = np.array([[self.focal_length, 0, self.principal_point[0]],[0, self.focal_length, self.principal_point[1]],[0, 0, 1]])def _load_images(self):"""加载目录中的所有图像"""print("Loading images...")for img_name in sorted(os.listdir(self.image_dir)):if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):img_path = os.path.join(self.image_dir, img_name)img = cv2.imread(img_path)if img is not None:self.images.append(img)self.image_names.append(img_name)print(f"Loaded {len(self.images)} images.")
2. 特征提取与匹配
def extract_features(self, method='SIFT', n_features=5000):"""从所有图像中提取特征点参数:method: 特征提取方法 ('SIFT', 'SURF', 'ORB')n_features: 要提取的最大特征点数"""print(f"Extracting features using {method}...")# 选择特征提取器if method == 'SIFT':detector = cv2.SIFT_create(nfeatures=n_features)elif method == 'SURF':detector = cv2.xfeatures2d.SURF_create(hessianThreshold=400)elif method == 'ORB':detector = cv2.ORB_create(nfeatures=n_features)else:raise ValueError(f"Unsupported feature method: {method}")self.keypoints = []self.descriptors = []for img in tqdm(self.images):# 转换为灰度图gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)# 检测特征点和计算描述子kp, des = detector.detectAndCompute(gray, None)self.keypoints.append(kp)self.descriptors.append(des)def match_features(self, method='FLANN', ratio_test=0.7):"""匹配不同图像之间的特征点参数:method: 匹配方法 ('FLANN' 或 'BF')ratio_test: Lowe's ratio test的阈值"""print("Matching features between images...")# 初始化匹配器if method == 'FLANN':if self.descriptors[0].dtype == np.uint8: # ORB描述子flann_params = dict(algorithm=6, # FLANN_INDEX_LSHtable_number=6,key_size=12,multi_probe_level=1)matcher = cv2.FlannBasedMatcher(flann_params, {})else: # SIFT/SURF描述子flann_params = dict(algorithm=1, trees=5) # FLANN_INDEX_KDTREEmatcher = cv2.FlannBasedMatcher(flann_params, {})elif method == 'BF':if self.descriptors[0].dtype == np.uint8: # ORB描述子matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False)else: # SIFT/SURF描述子matcher = cv2.BFMatcher(cv2.NORM_L2, crossCheck=False)else:raise ValueError(f"Unsupported matching method: {method}")self.matches = {}n_images = len(self.images)# 匹配所有图像对for i in tqdm(range(n_images)):for j in range(i+1, n_images):# 匹配描述子matches = matcher.knnMatch(self.descriptors[i], self.descriptors[j], k=2)# 应用Lowe's ratio test筛选好的匹配good_matches = []for m, n in matches:if m.distance < ratio_test * n.distance:good_matches.append(m)if len(good_matches) > 20: # 只保留足够多的匹配对self.matches[(i, j)] = good_matchesdef visualize_matches(self, img_idx1, img_idx2):"""可视化两个图像之间的匹配点"""if (img_idx1, img_idx2) not in self.matches:print(f"No matches found between image {img_idx1} and {img_idx2}")returnimg1 = self.images[img_idx1]img2 = self.images[img_idx2]matches = self.matches[(img_idx1, img_idx2)]# 绘制匹配结果matched_img = cv2.drawMatches(img1, self.keypoints[img_idx1],img2, self.keypoints[img_idx2],matches, None,flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)plt.figure(figsize=(20, 10))plt.imshow(cv2.cvtColor(matched_img, cv2.COLOR_BGR2RGB))plt.title(f"Feature matches between image {img_idx1} and {img_idx2}")plt.axis('off')plt.show()
3. 相机位姿估计与三维重建
def estimate_initial_pose(self):"""估计初始相机位姿"""print("Estimating initial camera poses...")if not self.matches:raise ValueError("No feature matches found. Run match_features() first.")# 选择有最多匹配点的图像对作为初始对best_pair = max(self.matches.keys(), key=lambda x: len(self.matches[x]))img_idx1, img_idx2 = best_pairmatches = self.matches[best_pair]# 获取匹配点的像素坐标kp1 = self.keypoints[img_idx1]kp2 = self.keypoints[img_idx2]pts1 = np.float32([kp1[m.queryIdx].pt for m in matches])pts2 = np.float32([kp2[m.trainIdx].pt for m in matches])# 计算基础矩阵和本质矩阵E, mask = cv2.findEssentialMat(pts1, pts2, self.K, method=cv2.RANSAC, prob=0.999, threshold=1.0)# 从本质矩阵恢复相对位姿_, R, t, mask = cv2.recoverPose(E, pts1, pts2, self.K, mask=mask)# 设置第一个相机的位姿为世界坐标系self.camera_poses[img_idx1] = (np.eye(3), np.zeros((3, 1))) # R, tself.camera_poses[img_idx2] = (R, t)# 三角测量初始点云self._triangulate_initial_points(img_idx1, img_idx2)# 逐步添加更多视图self._incremental_sfm()def _triangulate_initial_points(self, img_idx1, img_idx2):"""三角测量初始点云"""matches = self.matches[(img_idx1, img_idx2)]kp1 = self.keypoints[img_idx1]kp2 = self.keypoints[img_idx2]# 获取匹配点的像素坐标pts1 = np.float32([kp1[m.queryIdx].pt for m in matches])pts2 = np.float32([kp2[m.trainIdx].pt for m in matches])# 获取相机投影矩阵R1, t1 = self.camera_poses[img_idx1]R2, t2 = self.camera_poses[img_idx2]P1 = np.dot(self.K, np.hstack((R1, t1)))P2 = np.dot(self.K, np.hstack((R2, t2)))# 三角测量points_4d = cv2.triangulatePoints(P1, P2, pts1.T, pts2.T)points_3d = points_4d[:3] / points_4d[3] # 齐次坐标转3D坐标# 获取点的颜色colors = []img1 = self.images[img_idx1]for pt in pts1:x, y = int(round(pt[0])), int(round(pt[1]))colors.append(img1[y, x][::-1]) # BGR转RGB# 保存点云for i in range(points_3d.shape[1]):self.point_cloud.append((*points_3d[:, i], *colors[i]))point_id = len(self.point_cloud) - 1self.point_visibility[point_id] = [img_idx1, img_idx2]def _incremental_sfm(self):"""增量式运动恢复结构"""print("Running incremental structure from motion...")n_images = len(self.images)registered_images = set(self.camera_poses.keys())# 继续添加视图直到所有视图都注册while len(registered_images) < n_images:# 找到与已注册视图有最多匹配点的未注册视图best_img = Nonebest_matches = 0best_registered_img = Nonebest_matches_list = []for unreg_img in set(range(n_images)) - registered_images:for reg_img in registered_images:pair = (min(unreg_img, reg_img), max(unreg_img, reg_img))if pair in self.matches and len(self.matches[pair]) > best_matches:best_matches = len(self.matches[pair])best_img = unreg_imgbest_registered_img = reg_imgbest_matches_list = self.matches[pair]if best_img is None:print("No more images can be registered")breakprint(f"Registering image {best_img} using matches with image {best_registered_img}")# 2D-3D对应关系kp_reg = self.keypoints[best_registered_img]kp_unreg = self.keypoints[best_img]# 找到已重建的3D点pts3d = []pts2d = []colors = []for match in best_matches_list:# 在已注册图像中找到对应的3D点point_found = Falsefor point_id, img_list in self.point_visibility.items():if best_registered_img in img_list:# 检查是否是同一个特征点img_pos = img_list.index(best_registered_img)kp_idx = match.queryIdx if img_pos == 0 else match.trainIdx# 这里简化处理,实际应该建立特征点与3D点的对应关系# 为了简化,我们假设匹配是正确的pts3d.append(self.point_cloud[point_id][:3])pts2d.append(kp_unreg[match.trainIdx if img_pos == 0 else match.queryIdx].pt)colors.append(self.point_cloud[point_id][3:6])point_found = Truebreakif not point_found:continueif len(pts3d) < 6:print(f"Not enough 2D-3D correspondences for image {best_img}")registered_images.add(best_img) # 标记为已注册但无法重建continuepts3d = np.array(pts3d, dtype=np.float32)pts2d = np.array(pts2d, dtype=np.float32)# 使用PnP算法估计新视图的相机位姿_, rvec, tvec, inliers = cv2.solvePnPRansac(pts3d, pts2d, self.K, None,iterationsCount=1000,reprojectionError=8.0,confidence=0.99)# 转换旋转向量为旋转矩阵R, _ = cv2.Rodrigues(rvec)t = tvec# 保存新视图的相机位姿self.camera_poses[best_img] = (R, t)registered_images.add(best_img)# 三角测量新的点self._triangulate_new_points(best_img, registered_images)def _triangulate_new_points(self, new_img_idx, registered_images):"""三角测量新的3D点"""# 找到与新视图有匹配的已注册视图for reg_img in registered_images:if reg_img == new_img_idx:continuepair = (min(new_img_idx, reg_img), max(new_img_idx, reg_img))if pair not in self.matches:continuematches = self.matches[pair]kp_new = self.keypoints[new_img_idx]kp_reg = self.keypoints[reg_img]# 获取匹配点pts_new = []pts_reg = []match_indices = []for i, match in enumerate(matches):# 检查这些匹配点是否已经存在于点云中exists = Falsefor point_id, img_list in self.point_visibility.items():if reg_img in img_list:# 简化处理,实际应该建立特征点与3D点的对应关系img_pos = img_list.index(reg_img)kp_idx = match.queryIdx if img_pos == 0 else match.trainIdx# 如果这个特征点已经有对应的3D点,跳过exists = Truebreakif not exists:# 新点,准备三角测量query_pt = kp_reg[match.queryIdx].pttrain_pt = kp_new[match.trainIdx].ptpts_reg.append(query_pt)pts_new.append(train_pt)match_indices.append(i)if len(pts_new) < 8:continuepts_new = np.float32(pts_new).Tpts_reg = np.float32(pts_reg).T# 获取相机投影矩阵R_reg, t_reg = self.camera_poses[reg_img]R_new, t_new = self.camera_poses[new_img_idx]P_reg = np.dot(self.K, np.hstack((R_reg, t_reg)))P_new = np.dot(self.K, np.hstack((R_new, t_new)))# 三角测量新点points_4d = cv2.triangulatePoints(P_reg, P_new, pts_reg, pts_new)points_3d = points_4d[:3] / points_4d[3] # 齐次坐标转3D坐标# 检查点的深度是否为正(在两个相机前方)valid_points = []for i in range(points_3d.shape[1]):point = points_3d[:, i]# 在第一个相机坐标系下的深度depth_reg = np.dot(R_reg, point) + t_reg.flatten()# 在新相机坐标系下的深度depth_new = np.dot(R_new, point) + t_new.flatten()if depth_reg[2] > 0 and depth_new[2] > 0:valid_points.append(i)if not valid_points:continue# 获取点的颜色(取两个视图的平均)img_new = self.images[new_img_idx]img_reg = self.images[reg_img]for i in valid_points:match_idx = match_indices[i]# 获取两个视图中的颜色kp_new_idx = matches[match_idx].trainIdxkp_reg_idx = matches[match_idx].queryIdxx_new, y_new = map(int, map(round, kp_new[kp_new_idx].pt))x_reg, y_reg = map(int, map(round, kp_reg[kp_reg_idx].pt))color_new = img_new[y_new, x_new][::-1] # BGR转RGBcolor_reg = img_reg[y_reg, x_reg][::-1]avg_color = ((np.array(color_new) + np.array(color_reg)) / 2).astype(int)# 添加到点云self.point_cloud.append((*points_3d[:, i], *avg_color))point_id = len(self.point_cloud) - 1self.point_visibility[point_id] = [reg_img, new_img_idx]
4. 光束法平差优化
def bundle_adjustment(self, n_iterations=10):"""执行光束法平差优化相机位姿和3D点"""print("Running bundle adjustment...")if not self.camera_poses or not self.point_cloud:raise ValueError("No camera poses or point cloud available")# 准备数据n_cameras = len(self.camera_poses)n_points = len(self.point_cloud)# 获取相机参数和点参数camera_params = []camera_indices = {img_idx: i for i, img_idx in enumerate(self.camera_poses.keys())}for img_idx in self.camera_poses:R, t = self.camera_poses[img_idx]# 将旋转矩阵转换为旋转向量rvec, _ = cv2.Rodrigues(R)camera_params.extend([*rvec.flatten(), *t.flatten()])camera_params = np.array(camera_params, dtype=np.float64)points_3d = np.array([p[:3] for p in self.point_cloud], dtype=np.float64)# 准备观测数据(2D点)observations = []camera_idx_for_obs = []point_idx_for_obs = []for point_id, img_list in self.point_visibility.items():for img_idx in img_list:# 找到这个点在图像中的位置# 这里简化处理,实际应该建立特征点与3D点的对应关系# 为了简化,我们假设点云中的点顺序与特征点顺序一致kp_list = self.keypoints[img_idx]for i, kp in enumerate(kp_list):# 简化处理,实际应该使用更精确的匹配关系if (img_idx, i) in [(img_idx, m.queryIdx) for m in self.matches.get((min(img_idx, other), max(img_idx, other)), []) for other in self.camera_poses if (min(img_idx, other), max(img_idx, other)) in self.matches]:observations.append(kp.pt)camera_idx_for_obs.append(camera_indices[img_idx])point_idx_for_obs.append(point_id)breakobservations = np.array(observations, dtype=np.float64)# 优化函数def fun(params, n_cameras, n_points, camera_indices, point_indices, observations):"""计算残差"""camera_params = params[:n_cameras * 6].reshape((n_cameras, 6))points_3d = params[n_cameras * 6:].reshape((n_points, 3))residuals = []for i in range(len(observations)):camera_idx = camera_indices[i]point_idx = point_indices[i]rvec = camera_params[camera_idx, :3]tvec = camera_params[camera_idx, 3:6]point = points_3d[point_idx]# 投影点projected, _ = cv2.projectPoints(point.reshape(1, 1, 3),rvec,tvec,self.K,None)projected = projected.reshape(-1)# 计算残差residual = observations[i] - projectedresiduals.extend(residual)return np.array(residuals)# 将参数合并为一个数组x0 = np.hstack((camera_params.ravel(), points_3d.ravel()))# 调用最小二乘优化res = least_squares(fun, x0,jac='3-point',method='trf',max_nfev=n_iterations,verbose=2,args=(n_cameras, n_points, camera_idx_for_obs, point_idx_for_obs, observations))# 更新优化后的参数optimized_params = res.xcamera_params_opt = optimized_params[:n_cameras * 6].reshape((n_cameras, 6))points_3d_opt = optimized_params[n_cameras * 6:].reshape((n_points, 3))# 更新相机位姿for i, img_idx in enumerate(self.camera_poses.keys()):rvec = camera_params_opt[i, :3]tvec = camera_params_opt[i, 3:6]R, _ = cv2.Rodrigues(rvec)t = tvec.reshape(3, 1)self.camera_poses[img_idx] = (R, t)# 更新点云for i in range(n_points):x, y, z = points_3d_opt[i]r, g, b = self.point_cloud[i][3:6]self.point_cloud[i] = (x, y, z, r, g, b)
5. 可视化与结果展示
def visualize_point_cloud(self, method='matplotlib'):"""可视化重建的点云"""if not self.point_cloud:print("No point cloud to visualize")returnprint("Visualizing point cloud...")# 提取点坐标和颜色points = np.array([p[:3] for p in self.point_cloud])colors = np.array([p[3:6] for p in self.point_cloud]) / 255.0if method == 'matplotlib':fig = plt.figure(figsize=(10, 8))ax = fig.add_subplot(111, projection='3d')ax.scatter(points[:, 0], points[:, 1], points[:, 2],c=colors, s=1, alpha=0.6, marker='.')# 绘制相机位置for img_idx, (R, t) in self.camera_poses.items():camera_center = -R.T @ tax.scatter(camera_center[0], camera_center[1], camera_center[2],c='red', s=50, marker='^')ax.text(camera_center[0], camera_center[1], camera_center[2],f'Cam {img_idx}', color='black')ax.set_xlabel('X')ax.set_ylabel('Y')ax.set_zlabel('Z')ax.set_title('Reconstructed 3D Point Cloud')plt.show()elif method == 'plotly':fig = go.Figure()# 添加点云fig.add_trace(go.Scatter3d(x=points[:, 0],y=points[:, 1],z=points[:, 2],mode='markers',marker=dict(size=2,color=colors,opacity=0.8),name='Point Cloud'))# 添加相机for img_idx, (R, t) in self.camera_poses.items():camera_center = -R.T @ tfig.add_trace(go.Scatter3d(x=[camera_center[0]],y=[camera_center[1]],z=[camera_center[2]],mode='markers+text',marker=dict(size=5,color='red',symbol='diamond'),text=f'Cam {img_idx}',textposition="top center",name=f'Camera {img_idx}'))fig.update_layout(title='Reconstructed 3D Point Cloud',scene=dict(xaxis_title='X',yaxis_title='Y',zaxis_title='Z',aspectmode='data'),margin=dict(l=0, r=0, b=0, t=0))fig.show()else:raise ValueError(f"Unsupported visualization method: {method}")
6. 完整流程示例
# 使用示例
if __name__ == "__main__":# 初始化重建系统image_dir = "path/to/your/images" # 替换为你的图像目录reconstructor = MultiViewReconstruction(image_dir, focal_length=2000)# 1. 特征提取reconstructor.extract_features(method='SIFT')# 2. 特征匹配reconstructor.match_features(method='FLANN', ratio_test=0.7)# 可视化一些匹配reconstructor.visualize_matches(0, 1)# 3. 相机位姿估计与三维重建reconstructor.estimate_initial_pose()# 4. 光束法平差优化reconstructor.bundle_adjustment(n_iterations=20)# 5. 可视化结果reconstructor.visualize_point_cloud(method='plotly')
系统说明
这个多视角三维重建系统实现了以下功能:
-
数据准备:加载多视角图像数据集,设置相机内参。
-
特征提取与匹配:
- 支持SIFT、SURF和ORB特征提取算法
- 使用FLANN或暴力匹配进行特征匹配
- 应用Lowe’s ratio test筛选优质匹配
-
相机位姿估计:
- 从特征匹配计算本质矩阵
- 使用RANSAC去除异常值
- 恢复相机相对位姿
-
三维重建:
- 通过三角测量生成初始点云
- 增量式运动恢复结构逐步添加更多视图
- 处理新视图的注册和新增点的三角测量
-
模型优化:
- 实现光束法平差(Bundle Adjustment)优化相机位姿和3D点
- 使用Scipy的最小二乘优化器
-
可视化展示:
- 支持Matplotlib和Plotly两种可视化方式
- 显示点云和相机位置
技术要点
-
特征选择:SIFT特征对旋转、尺度变化和光照变化具有较好的不变性。
-
鲁棒估计:使用RANSAC算法处理匹配中的异常值。
-
增量式重建:逐步添加视图,保证重建的稳定性。
-
全局优化:光束法平差同时优化所有相机参数和3D点坐标,提高重建精度。
-
效率优化:使用稀疏矩阵和高效的最小二乘算法处理大规模优化问题。
扩展方向
-
稠密重建:在稀疏点云基础上进行稠密重建,获取更完整的表面。
-
纹理映射:将原始图像纹理映射到重建的3D模型上。
-
并行计算:利用GPU加速特征提取和匹配过程。
-
实时重建:优化算法实现实时三维重建。
这个系统提供了多视角三维重建的基础框架,可以根据具体应用需求进行进一步扩展和优化。