【java执行python】
java执行python脚本
java代码如下
public String executePythonScript(String scriptPath, String... args) {try {// 构建命令List<String> command = new ArrayList<>();command.add("python");command.add(scriptPath);command.addAll(Arrays.asList(args));ProcessBuilder pb = new ProcessBuilder(command);pb.redirectErrorStream(true); // 合并错误流和输出流// 启动进程long startTime = System.currentTimeMillis();Process process = pb.start();// 读取输出StringBuilder output = new StringBuilder();try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {String line;while ((line = reader.readLine()) != null) {output.append(line).append("\n");}}// 等待进程完成int exitCode = process.waitFor();long endTime = System.currentTimeMillis();System.out.println("Python脚本执行时间: " + (endTime - startTime) + "ms");if (exitCode != 0) {throw new RuntimeException("Python脚本执行失败,退出码: " + exitCode);}return output.toString();} catch (IOException | InterruptedException e) {throw new RuntimeException("执行Python脚本出错", e);}
}
python脚本
import argparse
import os
import sys
from urllib.parse import urlparse
from urllib.request import urlretrievefrom PIL import Image
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManagerclass ScreenshotTool:def __init__(self, headless=True):"""初始化截图工具:param headless: 是否使用无头模式"""self.headless = headlessself.driver = self._init_webdriver()def _init_webdriver(self):"""初始化 Selenium WebDriver"""chrome_options = Options()if self.headless:chrome_options.add_argument("--headless")chrome_options.add_argument("--disable-gpu")chrome_options.add_argument("--no-sandbox")chrome_options.add_argument("--disable-dev-shm-usage")chrome_options.add_argument("--window-size=1920,1080")# 使用 webdriver-manager 自动管理 ChromeDriverservice = Service(ChromeDriverManager().install())driver = webdriver.Chrome(service=service, options=chrome_options)return driverdef capture_url(self, url, output_path="screenshot.png"):"""捕获网页截图:param url: 网页URL:param output_path: 输出文件路径:return: 截图保存路径"""try:self.driver.get(url)# 等待页面加载(简单实现,生产环境应使用更智能的等待方式)self.driver.implicitly_wait(10)# 获取页面实际高度并设置窗口大小total_height = self.driver.execute_script("return document.body.scrollHeight")self.driver.set_window_size(1920, total_height)# 截图并保存self.driver.save_screenshot(output_path)print(f"截图已保存到: {os.path.abspath(output_path)}")return output_pathexcept Exception as e:print(f"截图失败: {str(e)}", file=sys.stderr)return Nonedef capture_local_image(self, image_path, output_path="screenshot.png"):"""打开本地图片并保存(主要用于统一接口):param image_path: 本地图片路径:param output_path: 输出文件路径:return: 输出文件路径"""try:img = Image.open(image_path)img.save(output_path)print(f"图片已保存到: {os.path.abspath(output_path)}")return output_pathexcept Exception as e:print(f"处理本地图片失败: {str(e)}", file=sys.stderr)return Nonedef download_and_capture(self, url, output_path="screenshot.png"):"""下载远程图片并保存:param url: 图片URL:param output_path: 输出文件路径:return: 输出文件路径"""try:# 临时文件路径temp_file = "temp_download_image"# 下载图片urlretrieve(url, temp_file)# 验证是否为有效图片try:with Image.open(temp_file) as img:img.save(output_path)print(f"图片已保存到: {os.path.abspath(output_path)}")return output_pathfinally:# 删除临时文件if os.path.exists(temp_file):os.remove(temp_file)except Exception as e:print(f"下载和处理图片失败: {str(e)}", file=sys.stderr)return Nonedef close(self):"""关闭 WebDriver"""if self.driver:self.driver.quit()def is_url(self, input_str):"""检查输入是否是URL"""try:result = urlparse(input_str)return all([result.scheme, result.netloc])except ValueError:return Falsedef capture(self, source, output_path="screenshot.png"):"""根据输入源进行截图或保存图片:param source: 图片源(URL或本地路径):param output_path: 输出文件路径:return: 输出文件路径"""if self.is_url(source):# 检查URL是否是图片if source.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):return self.download_and_capture(source, output_path)else:return self.capture_url(source, output_path)else:# 本地文件路径return self.capture_local_image(source, output_path)def main():# 设置命令行参数解析parser = argparse.ArgumentParser(description="网页/图片截图工具")parser.add_argument("source", help="图片或网页的URL/本地路径")parser.add_argument("-o", "--output", help="输出文件路径", default="screenshot.png")parser.add_argument("--visible", help="使用可见浏览器模式", action="store_true")args = parser.parse_args()# 初始化截图工具tool = ScreenshotTool(headless=not args.visible)try:# 执行截图result = tool.capture(args.source, args.output)if not result:sys.exit(1)finally:tool.close()if __name__ == "__main__":main()
使用说明
安装依赖
在运行脚本前,需要安装以下依赖:
bash
pip install selenium pillow webdriver-manager
使用示例
截取网页并保存为 screenshot.png:
bash
python screenshot.py https://www.example.com
截取网页并指定输出文件名:
bash
python screenshot.py https://www.example.com -o example.png
使用可见浏览器模式截图:
bash
python screenshot.py https://www.example.com --visible
处理本地图片文件:
bash
python screenshot.py /path/to/local/image.jpg
处理远程图片文件:
bash
python screenshot.py https://example.com/image.jpg
功能说明
-
支持网页截图和图片处理(本地和远程)
-
自动检测输入是URL还是本地路径
-
自动区分网页和图片URL
-
使用无头浏览器模式(默认)或可见浏览器模式(–visible)
-
自动管理ChromeDriver版本
-
错误处理和清理资源
注意事项
-
确保系统已安装Chrome浏览器
-
首次运行会自动下载合适的ChromeDriver
-
对于复杂的网页,可能需要调整等待时间或添加更智能的等待逻辑