【Python】基于Python提取图片验证码
一、主要步骤
1、图片扩大10倍(图片像素比较小的情况)
2、去噪,提取图片黑色相关像素
3、灰度值
4、二值化
5、识别图片验证码
二、主要代码
1、图片扩大10倍(图片像素比较小的情况)
# 获取图片的原始宽度和高度,并放大图像height, width = image.shape[:2]new_width, new_height = width * 10, height * 10enlarged_image = cv.resize(image, (new_width, new_height), interpolation=cv.INTER_LINEAR)
2、去噪,提取图片黑色相关像素并去除
# 收集与黑色相关的颜色remove_colors = []# 将图像重塑为像素列表pixels = enlarged_image.reshape(-1, 3)# 统计颜色出现频率color_counts = Counter(map(tuple, pixels))color_list = color_counts.most_common()for color, count in color_list[:20]:if color[0] == 0:remove_colors.append(color)# 删除黑色线if remove_colors:img_rgb = cv.cvtColor(enlarged_image, cv.COLOR_BGR2RGB)# 创建一个掩码,初始化为全黑(即不删除任何像素)mask = np.zeros(img_rgb.shape[:2], dtype=np.uint8)tolerance = 5# 遍历每个目标颜色,创建掩码for color in remove_colors:lower_bound = np.array([max(0, c - tolerance) for c in color])upper_bound = np.array([min(255, c + tolerance) for c in color])color_mask = cv.inRange(img_rgb, lower_bound, upper_bound)mask = cv.bitwise_or(mask, color_mask)# 将掩码区域设置为白色(或你想要的其他颜色)img_rgb[mask != 0] = [255, 255, 255]# 转换回BGR格式并保存result_img = cv.cvtColor(img_rgb, cv.COLOR_RGB2BGR)else:result_img = enlarged_image
3、灰度值
# 将放大后的图像转换为灰度图像gray = cv.cvtColor(result_img, cv.COLOR_BGR2GRAY)# 显示处理后的图像cv.imshow('gray Image', result_img)# 灰度图识别gray_bytes = image_to_bytes(gray)result = ocr.classification(gray_bytes)print(f'ddddocr 识别结果(灰度): {result}')
4、二值化
# 如果需要进一步处理,可以加上二值化和形态学操作ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)erode = cv.erode(binary, None, iterations=2)dilate = cv.dilate(erode, None, iterations=1)
5、识别图片验证码
# 最终处理图识别dilate_bytes = image_to_bytes(dilate)result = ocr.classification(dilate_bytes)print(f'ddddocr 识别结果(处理后): {result}')
三、全部代码
1、直接处理图片
import cv2 as cv
from PIL import Image
import ddddocr
import numpy as np
from collections import Counter# 初始化OCR工具
ocr = ddddocr.DdddOcr(det=False, ocr=True, show_ad=False)from io import BytesIOdef image_to_bytes(image):pil_image = Image.fromarray(image)buf = BytesIO()pil_image.save(buf, format='PNG')return buf.getvalue()def recognize_text(image):# 获取图片的原始宽度和高度,并放大图像height, width = image.shape[:2]new_width, new_height = width * 10, height * 10enlarged_image = cv.resize(image, (new_width, new_height), interpolation=cv.INTER_LINEAR)# 收集与黑色相关的颜色remove_colors = []# 将图像重塑为像素列表pixels = enlarged_image.reshape(-1, 3)# 统计颜色出现频率color_counts = Counter(map(tuple, pixels))color_list = color_counts.most_common()for color, count in color_list[:20]:if color[0] == 0:remove_colors.append(color)# 删除黑色线if remove_colors:img_rgb = cv.cvtColor(enlarged_image, cv.COLOR_BGR2RGB)# 创建一个掩码,初始化为全黑(即不删除任何像素)mask = np.zeros(img_rgb.shape[:2], dtype=np.uint8)tolerance = 5# 遍历每个目标颜色,创建掩码for color in remove_colors:lower_bound = np.array([max(0, c - tolerance) for c in color])upper_bound = np.array([min(255, c + tolerance) for c in color])color_mask = cv.inRange(img_rgb, lower_bound, upper_bound)mask = cv.bitwise_or(mask, color_mask)# 将掩码区域设置为白色(或你想要的其他颜色)img_rgb[mask != 0] = [255, 255, 255]# 转换回BGR格式并保存result_img = cv.cvtColor(img_rgb, cv.COLOR_RGB2BGR)else:result_img = enlarged_image# 边缘保留滤波 去噪# dst = cv.pyrMeanShiftFiltering(enlarged_image, sp=10, sr=150)# 将放大后的图像转换为灰度图像gray = cv.cvtColor(result_img, cv.COLOR_BGR2GRAY)# 显示处理后的图像cv.imshow('gray Image', result_img)# 灰度图识别gray_bytes = image_to_bytes(gray)result = ocr.classification(gray_bytes)print(f'ddddocr 识别结果(灰度): {result}')# 如果需要进一步处理,可以加上二值化和形态学操作ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)erode = cv.erode(binary, None, iterations=2)dilate = cv.dilate(erode, None, iterations=1)# 逻辑运算 让背景为白色 字体为黑 便于识别cv.bitwise_not(dilate, dilate)# 确保传递给ddddocr的是单通道图像if len(dilate.shape) > 2:dilate = cv.cvtColor(dilate, cv.COLOR_BGR2GRAY)# 显示处理后的图像cv.imshow('Processed Image', dilate)# 最终处理图识别dilate_bytes = image_to_bytes(dilate)result = ocr.classification(dilate_bytes)print(f'ddddocr 识别结果(处理后): {result}')# 加载图片
src = cv.imread(r'C:\Users\Administrator\Desktop\get.png')
if src is not None:cv.imshow('Input Image', src)recognize_text(src)cv.waitKey(0)
else:print("无法加载图像,请检查路径是否正确")
cv.destroyAllWindows()
2、图片是base64位
import cv2 as cv
from PIL import Image
import ddddocr
import base64
import numpy as np
from collections import Counter# 初始化OCR工具
ocr = ddddocr.DdddOcr(det=False, ocr=True, show_ad=False)from io import BytesIOdef image_to_bytes(image):pil_image = Image.fromarray(image)buf = BytesIO()pil_image.save(buf, format='PNG')return buf.getvalue()def recognize_text(image):# 获取图片的原始宽度和高度,并放大图像# 将 PIL 图像转换为 NumPy 数组image = cv.cvtColor(np.array(image), cv.COLOR_RGB2BGR)# 获取图片的原始宽度和高度,并放大图像height, width = image.shape[:2]new_width, new_height = width * 10, height * 10enlarged_image = cv.resize(image, (new_width, new_height), interpolation=cv.INTER_LINEAR)# 收集与黑色相关的颜色remove_colors = []# 将图像重塑为像素列表pixels = enlarged_image.reshape(-1, 3)# 统计颜色出现频率color_counts = Counter(map(tuple, pixels))color_list = color_counts.most_common()for color, count in color_list[:20]:if color[0] == 0:remove_colors.append(color)# 删除黑色线if remove_colors:img_rgb = cv.cvtColor(enlarged_image, cv.COLOR_BGR2RGB)# 创建一个掩码,初始化为全黑(即不删除任何像素)mask = np.zeros(img_rgb.shape[:2], dtype=np.uint8)tolerance = 5# 遍历每个目标颜色,创建掩码for color in remove_colors:lower_bound = np.array([max(0, c - tolerance) for c in color])upper_bound = np.array([min(255, c + tolerance) for c in color])color_mask = cv.inRange(img_rgb, lower_bound, upper_bound)mask = cv.bitwise_or(mask, color_mask)# 将掩码区域设置为白色(或你想要的其他颜色)img_rgb[mask != 0] = [255, 255, 255]# 转换回BGR格式并保存result_img = cv.cvtColor(img_rgb, cv.COLOR_RGB2BGR)else:result_img = enlarged_image# 将放大后的图像转换为灰度图像gray = cv.cvtColor(result_img, cv.COLOR_BGR2GRAY)# 显示处理后的图像cv.imshow('gray Image', result_img)# # 灰度图识别# gray_bytes = image_to_bytes(gray)# result = ocr.classification(gray_bytes)# print(f'ddddocr 识别结果(灰度): {result}')# 如果需要进一步处理,可以加上二值化和形态学操作ret, binary = cv.threshold(gray, 0, 255, cv.THRESH_BINARY_INV | cv.THRESH_OTSU)erode = cv.erode(binary, None, iterations=2)dilate = cv.dilate(erode, None, iterations=1)# 逻辑运算 让背景为白色 字体为黑 便于识别cv.bitwise_not(dilate, dilate)# 确保传递给ddddocr的是单通道图像if len(dilate.shape) > 2:dilate = cv.cvtColor(dilate, cv.COLOR_BGR2GRAY)# 显示处理后的图像cv.imshow('Processed Image', dilate)# 最终处理图识别dilate_bytes = image_to_bytes(dilate)result = ocr.classification(dilate_bytes)print(f'图片验证码识别结果(处理后): {result}')base64_image = "/9j/4AAQSkZJRgABAgAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAAtAH0DASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDt7W1t2tYWaCMkopJKDnism88S+GtO8QQ6JdyQxXsoyA8W1FBBIyxGOcY69a3LP/jzg/65r/KvHvjHpFsnifRb+bekF4PJuHVuQFYcjPAO1v0qIxjyrQiMY8q0PZUtLN1DLBAykZBCAg1ILG0/59Yf+/Yryc/CnxHojGTwx4pkjGciKRmiB+u3IP4iornxZ8TPCFu0utaZb3tomAbhkBAycD5oyMckDkU+WPYfLHsewCxtP+fWD/v2KeLCz/59IP8Av2K8z0v44aNKwi1bTrywmHDFQJFB9+jfpXc6R4y8Oa3tGn6zaSu3SMvsc/8AAWwf0o5Y9g5Y9jXGn2X/AD6Qf9+x/hTxp1l/z52//fpf8KmFSAUcsewcsexANNsf+fK3/wC/S/4U8aZYf8+Vt/36X/CuW8feMo/CemxBEL3dzkRDkAAY3HPryK80svin4kt2+0yRmS3fgsyfIDyCRx7jpjkV62EyLFYqj7anFcvS9lf0IlKnF2Z7uNMsP+fG2/79L/hTxpen/wDPjbf9+V/wrKsvEtsmjR3erTQWkojdpRvyoMbBJMHvhzt9+2aoaj8TfCmmK2/UhM4yAkKlySCR246r+orhhgqtSXLCm2/JXKagtzpxpWnf8+Fr/wB+V/wp40nTv+gfa/8Aflf8K8lvfjtGblYtN0g+UWAMtzJzjP8AdX/GvYrVpntomuEVJioLqvQH0rXF5bXwii68OXm22FHklsRDSNN/6B9p/wB+V/wrlvGlnbWn2H7Pbww7vM3eWgXONvXFduBXH+Pf+Yf/ANtP/Za8zExSpOyM8RFKm9CCz/484P8Armv8q4H402H2nwXFdBfmtbpGJx0VgVP6la7+y/48oP8Armv8qmmt4bqB4LiJJoXGGSRQyt9Qa2j8KNo/CjE0TxJYyeDtL1e7u4445oY1eRjx5mMMPzBH4Vzvj7xPotza6RYrqEMkU2owyXIQ7sQoSzdPcAVz2o2I8Cand6Hd2kl94Z1YPJZqEMjW04HQDrxx+GD2NcBYvbprKyTQg20BkXEqkMxK45B9DXs5dgaOLi43fOmtF1XV+Vlf52JnJx9D1fXPGuia6fIt/DI1jLCMSzQDAY9ADjI/MVm2nw00HxFYfbTYXWjKU374Zt8fQk5D5OOOxFY3hK7Z0tLRG2TyXkbxbvukKMH8e1eteJIl0/weY4I2RUQ7V3c5II2n1+8eB6V1ZtgqGFqRw8IWbe923b8vwFTk5K7POLDRfEXhSaUaD42gkjQ7Vt71GEbYJBAB3AdOox9RW/B8TPE+lW5l8QeFDPaqPmvdMk3oo9SMsB+JFZfhbQYfFsVy19JtkYSEOiHAJAAIJP8Ashvfn0rc0DQ4zq5026tJEs4JGkAEg2y5AAVwx5AAyMZrlxGAw9Lmhzvmjvpp8tdRqTfQ8k8TeL28Ua5PeSOyRFsRRSH7qjgA/wCe5rsvBfi3U4CbGeytZbNlDDzbfILYVBg9sj+vrWb8TtHs7HxIY/syo0jb2kOdzj1A4469vYE4zXTeGfhfb6jpI1DQvEOpaRcs3ziNvMjx1CkAjJ9ee44r18XjJwy2kqlOLhJWVrpq33/mZxinN2ZY+Kd2Lvw7a+VD9nRVSYx5yvLNGAB26Z/KuQ8O+CdO1C2a81nxDa6dbKkb7Tgu4YZwMkc/ga6fxxoXjVrBob7TINRt1jRFu9LU7gFctloTzn5j93iuf8N/D6HxRbubTxJZPcxg7rMhhMuM8FWwR+WK1ynMKFPA+ydf2bu+l3bT/g6hUg3K9rnc+GNJ+FYuktraVL66B+WS6Zju6duB19u9ewCvj3UrC58Oa89rI3762kB3L0JHp+NfU/gu+k1Pwfpl3MSZZIQXz1zXHxDgHRVPEKq5xl/M7+enkVSne6tY3wK4/wAe/wDMP/7af+y12Irj/H3/ADDv+2n/ALLXyOJ/hMWJ/hMgsv8Ajyg/65r/ACq0BXPwa55MEcf2fOxQud/XA+lSjxFj/l1/8if/AFqUcTSSSuKOIppLU3gAcZA46V4h8RNBNt4uiljVdl0wB3A4ZySR+GCo/CvUx4kx/wAun/kT/wCtWfqdzp+ry20l7pvmG3fev74jPseORnB/CvUynOqeAr+1vpZp/wBepNStSkrXOK8LeHTZfEYWzQyJHDHnym+YKxADZzztOHII6ZWvX9VsV1HSri1aNJBIuNrHGfoex9D2POD0rETxBDHKZV05BIV2lw/JHpnFTDxXj/ly/wDIv/1qjHZ0sXVjVk7OKS69Oo41qUVa5xfgjUE8M6td2OoqIQznHJG3146Fc5/2kyRyCxrrNd8QWX27Tba12Su0+4hZIkOM8Y3gggn0IPpVLVrqx1dleaxaOQOr+ZHKA2V6EHbwfcfjkcVU0QWmj3IuRbM84ABaOUxhwOhYD7x9SevpWtXNMHWqe3n8VtVrZvuJV4JWTOU+MtvcS+KbRliZl+yBsLzg5Of6V6V8MPOXwfDDcII5I2I8vaFKDA4KjkH68nr3wIL3W7HUXjkutJSR4xhGaUEryCcZX269evrU9n4ktdPj8u00lYo8fdWXA7/7PvRiM8pVsDTwlvg66+YKrSUnK5Z8U+MU8I3UEl7CWsZiF3Aktu5zgdMABfruPpXNeKfEfw21rT1vr9I7i7GDHJbgx3CnI6OuCMcnk44rT1zV7DxDpz2OoaVvifuJsEfQ7a4ZfBHh4XHmNBdlN27yxcADHPGdvuPy96rB4nJ5U19aclJdtmv0CWJjf3X+Z55Z6VrvijVTDpyzX8uDgTvlgM55c+5PWvoLwx420bQrKz0HXLe78P3kSCNV1FNscpHGVlHyMPfIo0fXdM0G1FtpmhR28Y7LNyfqSuT0FW7zxdaajava32hw3Nu/DRTSB1b6gripzLOqOKahSuqcdk7v+vTZBCtTjq3r6HcxSJLGskbq6MMqynII9Qa5Dx//AMw7/tp/7LVaw8YWumWMNlZaKkFtCoSOJJsBQOw+Ws7xB4g/t37P/ovkeTu/5abs5x7D0rxa9enKm0mTWrQlBpM//9k="
if base64_image:# 将Base64编码的字符串解码回图片if "data" in base64_image:# 将字符串分割成列表codes_en = base64_image.split(',')codes_cn = base64_image.split(',')codes_ret = codes_en if len(codes_en) > len(codes_cn) else codes_cnbase64_image = codes_ret[-1]image = Image.open(BytesIO(base64.b64decode(base64_image)))recognize_text(image)