当前位置: 首页 > news >正文

识别干扰验证码——Python || Tesseract-OCR

目录

首先安装Tesseract-OCR

python实现获取

JAVA实现获取

首先安装Tesseract-OCR

可自行搜索Tesseract-OCR安装方法

python实现获取

提高识别成功率:

# 转换为灰度图像
image = image.convert('L')
image.save('L.png')

# 二值化处理
threshold = 130
table = []
for i in range(256):
    if i < threshold:
        table.append(0)
    else:
        table.append(1)
image = image.point(table, '1')

需要优化:

        请求失败,可以根据返回的结果。判断重新获取验证码,还是返回响应失败的原因。

# Tesseract-OCR 读取干扰验证码
import time
from PIL import Image
import pytesseract
import requests


def searchId(waybillNo):

    # 获取会话中的JSESSIONID------
    response = requests.get('https://www.xxx.com/api/verifyCode?' + str(int(time.time() * 1000)))
    # 获取响应中的 cookie
    jession_id = response.cookies.get('JSESSIONID')    # 打印 cookie
    print(jession_id)

    try:
        # 获取验证码
        CodeUrl = 'https://www.xxx.com/api/verifyCode?' + str(int(time.time() * 1000))
        print(CodeUrl)
        headers = {
            "Cookie": "_bl_uid=7Omv727F2vUlFwjLtaXkav10ggsa; JSESSIONID="+jession_id
        }
        # 携带Cookie发送请求获取验证码图片
        response = requests.get(CodeUrl,headers=headers)

        # 保存图片
        if response.status_code == 200:
            with open("verify_code.png", "wb") as f:
                f.write(response.content)
            print("验证码图片已保存为 verify_code.png")
        else:
            print("请求失败,状态码:", response.status_code)

        # 打开图片并进行处理
        image = Image.open('verify_code.png')

        # 转换为灰度图像
        image = image.convert('L')
        image.save('L.png')
        # 二值化处理
        threshold = 130
        table = []
        for i in range(256):
            if i < threshold:
                table.append(0)
            else:
                table.append(1)
        image = image.point(table, '1')
        # 查看处理后的图片
        # image.save('2.png')

        # 获取图片数字
        codeText = pytesseract.image_to_string(image, config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')

        # 携带运单号+验证码+cookie获取信息
        BASE_URL = "https://www.xxx.com"  # 请替换为实际的BASE_URL
        #waybillNo = "93387172455"  # 请替换为实际的运单号

        # 拼接请求获取数据
        dhurl = f"{BASE_URL}/eos/awb/{waybillNo}/{codeText}"
        print(dhurl.replace('\n',''))
        response = requests.get(dhurl.replace('\n',''), headers=headers)
        data = response.json()
        print(data)

    except Exception as e:
        print("获取失败:" + waybillNo)

if __name__ == '__main__':
    searchId('93387172455')

JAVA实现获取

    @PostMapping(value = "/getPdInfo", produces = "text/html;charset=UTF-8")
    @ResponseBody
    public String getPdInfo(@RequestBody List<String> items) {
        StringBuilder sb = new StringBuilder();
        for (String item : items) {
            if (!item.equals("")) {
                sb.append(getPdDhHtml(item)+"<hr>");
            }
        }
        return sb.toString();
    }

    private static JsonNode getDhJson(String dh) {
        String jsessionid="";
        try {
            // 获取会话 中的JSESSIONID
            // 创建 URL 对象
            URL url1 = new URL("https://www.xxx.com/api/verifyCode?" + System.currentTimeMillis());

            // 打开连接
            HttpURLConnection connection1 = (HttpURLConnection) url1.openConnection();

            // 设置请求方法
            connection1.setRequestMethod("GET");

            // 获取响应码
            int responseCode1 = connection1.getResponseCode();
            //System.out.println("Response Code: " + responseCode1);

            // 获取 Set-Cookie
            String cookies = connection1.getHeaderField("Set-Cookie");

            // 使用正则表达式匹配 JSESSIONID 的值
            Pattern pattern = Pattern.compile("JSESSIONID=([^;]+);");
            Matcher matcher = pattern.matcher(cookies);

//            String jsessionid="";
            if (matcher.find()) {
                jsessionid = matcher.group(1);
            } else {
                System.out.println("未找到 JSESSIONID");
            }
            // 关闭连接
            connection1.disconnect();

            // 获取验证码
            String codeUrl = "https://www.xxx.com/api/verifyCode?" + System.currentTimeMillis();
            Map<String, String> headers = new HashMap<>();
            headers.put("Cookie", "_bl_uid=7Omv727F2vUlFwjLtaXkav10ggsa; JSESSIONID="+jsessionid);

            URL url = new URL(codeUrl);
            HttpURLConnection connection = (HttpURLConnection) url.openConnection();
            connection.setRequestMethod("GET");
            for (Map.Entry<String, String> entry : headers.entrySet()) {
                connection.setRequestProperty(entry.getKey(), entry.getValue());
            }

            int responseCode = connection.getResponseCode();
            if (responseCode == 200) {
                InputStream inputStream = connection.getInputStream();
                FileOutputStream fileOutputStream = new FileOutputStream(jsessionid+"verify_code.png");
                byte[] buffer = new byte[1024];
                int bytesRead;
                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    fileOutputStream.write(buffer, 0, bytesRead);
                }
                fileOutputStream.close();
                inputStream.close();
            } else {
                System.out.println("请求失败,状态码: " + responseCode);
            }

            BufferedImage image = ImageIO.read(new File(jsessionid+"verify_code.png"));

            // 转换为灰度图像
            BufferedImage grayImage = new BufferedImage(image.getWidth(), image.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
            grayImage.getGraphics().drawImage(image, 0, 0, null);

            // 保存灰度图像文件
//            try {
//                File output = new File("path_to_save_gray_image.png");
//                ImageIO.write(grayImage, "png", output);
//                System.out.println("灰度图像已保存到: " + output.getAbsolutePath());
//            } catch (IOException e) {
//                e.printStackTrace();
//            }

            // 二值化处理
            int threshold = 180;
            for (int y = 0; y < grayImage.getHeight(); y++) {
                for (int x = 0; x < grayImage.getWidth(); x++) {
                    int rgb = grayImage.getRGB(x, y);
                    int gray = (rgb >> 16) & 0xFF;
                    if (gray < threshold) {
                        grayImage.setRGB(x, y, 0xFF000000); // 黑色
                    } else {
                        grayImage.setRGB(x, y, 0xFFFFFFFF); // 白色
                    }
                }
            }

            // 保存二值化图像到文件
//            try {
//                File output = new File("path_to_save_binarized_image.png");
//                ImageIO.write(grayImage, "png", output);
//                System.out.println("二值化图像已保存到: " + output.getAbsolutePath());
//            } catch (IOException e) {
//                e.printStackTrace();
//            }

            // 获取图片数字
            Tesseract tesseract = new Tesseract();
            tesseract.setDatapath("D:\\work\\Tesseract-OCR\\tessdata"); // 设置tessdata路径
            tesseract.setLanguage("eng"); // 设置语言为英语
            tesseract.setTessVariable("tessedit_char_whitelist", "0123456789"); // 设置字符白名单为数字
            String codeText = tesseract.doOCR(grayImage);

            String baseUrl = "https://www.xxx.com";
            // String waybillNo = "93387172455";
            String dhurl = baseUrl + "/eos/awb/" + dh + "/" + codeText.replaceAll("\\D","");

            URL dhUrl = new URL(dhurl);
            HttpURLConnection dhConnection = (HttpURLConnection) dhUrl.openConnection();
            dhConnection.setRequestMethod("GET");
            for (Map.Entry<String, String> entry : headers.entrySet()) {
                dhConnection.setRequestProperty(entry.getKey(), entry.getValue());
            }

            int dhResponseCode = dhConnection.getResponseCode();
            if (dhResponseCode == 200) {
                BufferedReader in = new BufferedReader(new InputStreamReader(dhConnection.getInputStream()));
                String inputLine;
                StringBuffer response = new StringBuffer();
                while ((inputLine = in.readLine()) != null) {
                    response.append(inputLine);
                }
                in.close();
//                System.out.println(response.toString());


                ObjectMapper objectMapper = new ObjectMapper();
                JsonNode rootNode = objectMapper.readTree(response.toString());
                String msg = rootNode.get("msg").asText();
                if ("null".equals(msg)) {//等于null说明成功
                    return rootNode.get("data");
                }else if ("运单号不合法!!".equals(msg)){
                    return rootNode.get("msg");
                }else if ("系统异常".equals(msg)){
                    return rootNode.get("msg");
                }

            } else {
                System.out.println("请求失败,状态码: " + dhResponseCode);
            }
        } catch (Exception e) {
            System.out.println("异常"+e);
        }finally {
            //删除验证码图片
            File imageFile = new File(jsessionid+"verify_code.png");
            if (imageFile.exists()) {
                imageFile.delete();
//                boolean isDeleted = imageFile.delete();
//                if (isDeleted) {
//                    System.out.println("图片已成功删除");
//                } else {
//                    System.out.println("图片删除失败");
//                }
            }
        }
        // 失败则再次发起
        return getDhJson(dh);
    }

http://www.dtcms.com/a/111781.html

相关文章:

  • 【深度学习】CNN简述
  • Talend API Tester
  • 请求被中止: 未能创建 SSL/TLS 安全通道。
  • Nature Communications|马兰戈尼效应构建空心金字塔微阵列制备高灵敏度柔性电容式压力传感器(健康监测/柔性压力传感/柔性电子/人机交互)
  • Spring 核心技术解析【纯干货版】- XXII:Spring 扫描效率提升模块 Spring-Context-Indexer 模块精讲
  • Lock接口方法介绍
  • JavaScript 中那些不常见的 for 循环命名与高阶用法
  • ModuleNotFoundError: No module named ‘matplotlib_inline‘
  • C++进阶知识复习 31~38
  • SSL证书自动化管理(ACME协议)工作流程介绍
  • 小动物多导生理记录仪产品需求定义
  • 12.青龙面板自动化我的生活
  • 01背包问题:详细解释为什么重量维度必须从大到小遍历。
  • SignalR给特定User发送消息
  • Spring event 和 MQ的区别及应用场景
  • 《Linux内存管理:实验驱动的深度探索》【附录】【实验环境搭建 4】【Qemu 如何模拟numa架构】
  • Nginx-日志配置
  • 【家政平台开发(14)】家政平台PC前端(Element plus)开发指南:从集成到组件应用
  • 【备考高项】附录:商标法(2013年修正 73条全)
  • QEMU源码全解析 —— 块设备虚拟化(14)
  • 深度学习处理文本(10)
  • 解决GraalVM Native Maven Plugin错误:JAVA_HOME未指向GraalVM Distribution
  • 【家政平台开发(15)】解锁Spring Boot:家政平台后端开发全攻略
  • 深度学习处理文本(12)
  • Linux的 /etc/sysctl.conf 笔记250404
  • AI大模型:(二)1.3 linux本地部署通义万相2.1+deepseek视频生成
  • ARM Cortex-A7 处理器支持的汇编指令集全面总结
  • 【Cursor】打开Vscode设置
  • 【nacos安装指南】
  • 关于termux运行pc交叉编译的aarch64 elf的问题