飞浆 paddleocr 识别图片上文字的角度
1. 识别角度,飞浆无法接收路径中带有中文的图片,所以将图片转np.array格式传进去
def paddle_orientation(image_path):from paddlex import create_model# 飞浆识别图片上文字角度if isinstance(pdf_path, str) and pdf_path.endswith('.pdf'):pdf_path = pdf_to_image_bytes(pdf_path)if isinstance(pdf_path, str):pil_image = Image.open(pdf_path)width, height = pil_image.sizeelif isinstance(pdf_path, bytes):image_stream = io.BytesIO(pdf_path)pil_image = Image.open(image_stream)width, height = pil_image.size# 图片转np.arrayimage_rgb = pil_image.convert('RGB') # 将 RGBA 转换为 RGB(去除透明度通道)image_array = np.array(image_rgb)model = create_model(model_name="PP-LCNet_x1_0_doc_ori")output = model.predict(image_array, batch_size=1)for res in output:print(res.json)label_names = res.json['res']['label_names']return int(label_names[0])
2. 修正角度 可以接收图片或者pdf文件
def pdf_to_image_bytes(pdf_path, page_index=0, dpi=200):"""将 PDF 指定页面转换为 PNG 格式的字节数据"""import fitz # PyMuPDFfrom PIL import Imageimport iodoc = fitz.open(pdf_path)page = doc.load_page(page_index) # 读取指定页# 设置高分辨率渲染 (DPI)zoom = dpi / 72 # 72是PDF的标准DPImat = fitz.Matrix(zoom, zoom)# 渲染为RGB图像pix = page.get_pixmap(matrix=mat, colorspace="rgb")# 直接转换为PIL图像img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)# 转换为字节流img_bytes = io.BytesIO()img.save(img_bytes, format="PNG") # 可改为 JPEG/其他格式return img_bytes.getvalue()def handle_image_size(image_path):""" 修改图片的尺寸:param image_path::return:"""if isinstance(image_path, str) and image_path.endswith('.pdf'):image_path = pdf_to_image_bytes(image_path)if isinstance(image_path, str):pil_image = Image.open(image_path)width, height = pil_image.sizeelif isinstance(image_path, bytes):image_stream = io.BytesIO(image_path)pil_image = Image.open(image_stream)width, height = pil_image.sizeelse:pil_image = np.array(image_path)height, width = pil_image.shape[:2]if isinstance(pil_image, np.ndarray):pil_image = Image.fromarray(pil_image)print(f'pil_imagesize:{width, height}')# 修改图片尺寸resized_image = pil_image.resize((new_width, new_height))image = np.array(resized_image)return imagedef correct_orientation(image_path, angle):""" 修复图片的方向:param image_path::param angle:"""img = handle_image_size(image_path)# 1 获取图像尺寸height, width = img.shape[:2]# 2 计算图像中心点center = (width / 2, height / 2)# 3 创建旋转矩阵(逆时针60度)rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) # 60表示逆时针旋转60度# 4 计算旋转后的新图像尺寸cos = np.abs(rotation_matrix[0, 0])sin = np.abs(rotation_matrix[0, 1])# 计算新宽度和高度new_width = int((height * sin) + (width * cos))new_height = int((height * cos) + (width * sin))# 5 调整旋转矩阵以考虑平移(确保图像居中)rotation_matrix[0, 2] += (new_width / 2) - center[0]rotation_matrix[1, 2] += (new_height / 2) - center[1]# 6 执行仿射变换rotated_img = cv2.warpAffine(img, rotation_matrix, (new_width, new_height),flags=cv2.INTER_CUBIC, # 使用三次样条插值,质量更高borderMode=cv2.BORDER_REPLICATE # 边界处理方式)# 7 保存结果# cv2.imwrite('lw.jpg', rotated_img)print(f"旋转完成!原始尺寸: {width}x{height} → 新尺寸: {new_width}x{new_height}")# cv2.imshow('Rotated 60 degrees', rotated_img)# cv2.waitKey(0)# cv2.destroyAllWindows()return rotated_img