Pyppeteer 使用教程
Pyppeteer 完整使用教程
目录
- 简介
- 安装与配置
- 基础用法
- 页面操作
- 元素交互
- 高级功能
- 最佳实践
- 实战案例
- 故障排除
- 总结
- 参考
简介
什么是 Pyppeteer?
Pyppeteer 是一个 Python 版本的 Puppeteer,是一个用于控制无头 Chrome 或 Chromium 浏览器的库。
提供高级 API 来控制无头 Chrome/Chromium 浏览器。
它支持网页自动化、截图、PDF 生成、爬虫等功能。
安装与配置
基础安装
pip install pyppeteer
安装系统依赖(Linux)
# Ubuntu/Debian
sudo apt update
sudo apt install -y gconf-service libasound2 libatk1.0-0 libatk-bridge2.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
浏览器下载
- 显示浏览器下载相关信息
from pyppeteer import chromium_downloader
print('默认版本是:{}'.format(chromium_downloader.REVISION))
print('可执行文件默认路径:{}'.format(chromium_downloader.chromiumExecutable.get('win64')))
print('win64平台下载链接为:{}'.format(chromium_downloader.downloadURLs.get('win64')))
- 下载地址:
-
可用下载
https://registry.npmmirror.com/binary.html?path=chromium-browser-snapshots/Win_x64/
https://github.com/ungoogled-software/ungoogled-chromium-windows/releases -
官网下载
https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html
https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Mac/330231/
https://storage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Win_x64/118
-
从上面下载chrome浏览器,解压
将chrome解压到:C:\Users%username%\AppData\Local\pyppeteer\pyppeteer\local-chromium\1181205\chrome-win\chrome.exe
├─1181205
│ └─chrome-win
│ ├─IwaKeyDistribution
│ ├─locales
│ ├─userdata
│ │ 141.0.7390.122.manifest
│ │ chrome.dll
│ │ chrome.exe
│ │ chrome_100_percent.pak
│ │ chrome_200_percent.pak
│ │ chrome_elf.dll
│ │ chrome_proxy.exe
验证安装
import asyncio
import pyppeteerasync def check_installation():print(f"Pyppeteer版本: {pyppeteer.__version__}")asyncio.run(check_installation())
基础用法
1. 启动浏览器
import asyncio
from pyppeteer import launchasync def main():# 启动浏览器browser = await launch(headless=False) # headless=False 表示显示浏览器界面# 打开新页面page = await browser.newPage()# 访问网页await page.goto('https://www.example.com')# 等待一段时间await asyncio.sleep(5)# 关闭浏览器await browser.close()# 运行主函数
asyncio.get_event_loop().run_until_complete(main())
2. 最简单的示例
import asyncio
from pyppeteer import launchasync def basic_example():# 启动浏览器browser = await launch(headless=True)page = await browser.newPage()# 导航到页面await page.goto('https://httpbin.org/html')# 获取页面信息title = await page.title()url = page.urlprint(f"标题: {title}")print(f"URL: {url}")# 关闭浏览器await browser.close()asyncio.run(basic_example())
3. 浏览器启动选项
import asyncio
from pyppeteer import launchasync def browser_options():browser = await launch(headless=False, # 显示浏览器devtools=True, # 打开开发者工具args=['--window-size=1200,800','--disable-infobars','--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage'],userDataDir='./user_data' # 用户数据目录)page = await browser.newPage()await page.setViewport({'width': 1200, 'height': 800})await page.goto('https://example.com')await asyncio.sleep(3)await browser.close()asyncio.run(browser_options())
页面操作
导航控制
import asyncio
from pyppeteer import launchasync def navigation_control():browser = await launch(headless=True)page = await browser.newPage()# 设置导航超时page.setDefaultNavigationTimeout(30000)# 导航到页面response = await page.goto('https://httpbin.org/html', {'waitUntil': 'networkidle2', # 等待网络空闲'timeout': 15000})print(f"状态码: {response.status}")# 刷新页面await page.reload()# 获取页面内容content = await page.content()print(f"页面内容长度: {len(content)}")await browser.close()asyncio.run(navigation_control())
多页面管理
import asyncio
from pyppeteer import launchasync def multi_page_management():browser = await launch(headless=True)# 创建多个页面page1 = await browser.newPage()page2 = await browser.newPage()# 在不同页面执行操作await page1.goto('https://httpbin.org/html')await page2.goto('https://httpbin.org/json')# 获取所有页面pages = await browser.pages()print(f"打开的页面数量: {len(pages)}")# 切换到特定页面for page in pages:if 'json' in page.url:await page.bringToFront()breakawait browser.close()asyncio.run(multi_page_management())
元素交互
元素选择器
import asyncio
from pyppeteer import launchasync def element_selectors():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/forms/post')# 多种选择元素的方式# CSS 选择器element = await page.querySelector('#custname')# 选择多个元素all_inputs = await page.querySelectorAll('input')print(f"找到 {len(all_inputs)} 个输入框")# XPath 选择器xpath_elements = await page.xpath('//input[@type="text"]')print(f"通过XPath找到 {len(xpath_elements)} 个元素")await browser.close()asyncio.run(element_selectors())
表单操作
import asyncio
from pyppeteer import launchasync def form_operations():browser = await launch(headless=False)page = await browser.newPage()await page.goto('https://httpbin.org/forms/post')# 填写文本字段await page.type('#custname', '测试用户')await page.type('#custtel', '13800138000')await page.type('#email', 'test@example.com')# 选择单选按钮await page.click('#size2')# 选择复选框await page.click('#topping1')await page.click('#topping3')# 选择下拉框await page.select('#delivery', '17:00')# 填写文本区域await page.type('#comments', '这是测试评论')# 提交表单await page.click('input[type="submit"]')# 等待导航完成await page.waitForNavigation()print("表单提交成功!")await asyncio.sleep(2)await browser.close()asyncio.run(form_operations())
处理对话框
import asyncio
from pyppeteer import launchasync def main():browser = await launch(headless=False)page = await browser.newPage()# 监听对话框事件page.on('dialog', lambda dialog: asyncio.ensure_future(dialog.accept()))await page.goto('https://www.example.com')# 触发一个警告框(示例)await page.evaluate('alert("这是一个测试对话框")')await asyncio.sleep(2)await browser.close()asyncio.get_event_loop().run_until_complete(main())
键盘鼠标操作
import asyncio
from pyppeteer import launchasync def input_operations():browser = await launch(headless=False)page = await browser.newPage()await page.goto('https://httpbin.org/forms/post')# 键盘操作await page.keyboard.down('Shift')await page.keyboard.press('Tab')await page.keyboard.up('Shift')# 输入文本await page.keyboard.type('Hello World!')# 鼠标操作await page.mouse.click(100, 200) # 点击坐标await page.mouse.down() # 按下鼠标await page.mouse.move(300, 400) # 移动鼠标await page.mouse.up() # 释放鼠标# 滚动页面await page.evaluate('window.scrollBy(0, 500)')await asyncio.sleep(3)await browser.close()asyncio.run(input_operations())
等待策略
显式等待
import asyncio
from pyppeteer import launchasync def waiting_strategies():browser = await launch(headless=True)page = await browser.newPage()# 设置默认超时page.setDefaultTimeout(10000)page.setDefaultNavigationTimeout(30000)await page.goto('https://httpbin.org/delay/3') # 延迟3秒的页面# 等待元素出现await page.waitForSelector('h1')# 等待特定时间await page.waitFor(2000) # 等待2秒# 等待函数条件满足await page.waitForFunction('document.querySelectorAll("p").length > 0')# 等待网络空闲await page.waitForNavigation({'waitUntil': 'networkidle0'})print("所有等待条件满足!")await browser.close()asyncio.run(waiting_strategies())
智能等待元素
import asyncio
from pyppeteer import launchasync def smart_waiting():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/html')try:# 等待元素可见element = await page.waitForSelector('h1', {'timeout': 5000,'visible': True})if element:text = await page.evaluate('(element) => element.textContent', element)print(f"找到元素: {text}")except Exception as e:print(f"等待元素超时: {e}")await browser.close()asyncio.run(smart_waiting())
截图与PDF
网页截图
import asyncio
from pyppeteer import launchasync def screenshot_examples():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/html')# 视口截图await page.screenshot({'path': 'viewport.png','type': 'png','fullPage': False})# 全页截图await page.screenshot({'path': 'fullpage.png','fullPage': True})# 元素截图element = await page.querySelector('h1')if element:await element.screenshot({'path': 'element.png'})# 指定区域截图await page.screenshot({'path': 'clip.png','clip': {'x': 0, 'y': 0, 'width': 400, 'height': 300}})await browser.close()print("截图完成!")asyncio.run(screenshot_examples())
PDF生成
import asyncio
from pyppeteer import launchasync def pdf_generation():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/html')# 生成PDFawait page.pdf({'path': 'document.pdf','format': 'A4','printBackground': True,'margin': {'top': '1cm','right': '1cm','bottom': '1cm','left': '1cm'},'displayHeaderFooter': True,'headerTemplate': '<div style="font-size: 10px; margin-left: 20px;">Page <span class="pageNumber"></span> of <span class="totalPages"></span></div>'})await browser.close()print("PDF生成完成!")asyncio.run(pdf_generation())
网络请求处理
设置用户代理和视口
import asyncio
from pyppeteer import launchasync def main():browser = await launch()page = await browser.newPage()# 设置用户代理await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')# 设置视口大小await page.setViewport({'width': 1366, 'height': 768})await page.goto('https://httpbin.org/user-agent')# 截图保存await page.screenshot({'path': 'user_agent.png'})await browser.close()asyncio.get_event_loop().run_until_complete(main())
处理 Cookie
import asyncio
from pyppeteer import launchasync def main():browser = await launch()page = await browser.newPage()await page.goto('https://httpbin.org/cookies')# 设置 Cookieawait page.setCookie({'name': 'test', 'value': '12345'})# 刷新页面查看 Cookieawait page.reload()# 获取所有 Cookiecookies = await page.cookies()print('Cookies:', cookies)await browser.close()asyncio.get_event_loop().run_until_complete(main())
请求拦截
import asyncio
from pyppeteer import launchasync def request_interception():browser = await launch(headless=True)page = await browser.newPage()# 启用请求拦截await page.setRequestInterception(True)async def intercept_handler(request):# 阻止图片和样式表加载以提高性能if request.resourceType in ['image', 'stylesheet', 'font']:await request.abort()else:await request.continue_()page.on('request', lambda req: asyncio.ensure_future(intercept_handler(req)))await page.goto('https://example.com')await page.screenshot({'path': 'optimized.png'})await browser.close()asyncio.run(request_interception())
响应监控
import asyncio
from pyppeteer import launchasync def response_monitoring():browser = await launch(headless=True)page = await browser.newPage()# 监控响应responses = []async def response_handler(response):responses.append({'url': response.url,'status': response.status,'type': response.headers.get('content-type', '')})print(f"响应: {response.url} - {response.status}")page.on('response', lambda resp: asyncio.ensure_future(response_handler(resp)))await page.goto('https://httpbin.org/html')print(f"总共收到 {len(responses)} 个响应")await browser.close()asyncio.run(response_monitoring())
JavaScript执行
页面内执行JS
import asyncio
from pyppeteer import launchasync def javascript_execution():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/html')# 执行JavaScript并返回值page_title = await page.evaluate('() => document.title')print(f"页面标题: {page_title}")# 传递参数到JavaScriptelement_count = await page.evaluate('(selector) => document.querySelectorAll(selector).length', 'p')print(f"段落数量: {element_count}")# 执行复杂JavaScriptpage_info = await page.evaluate('''() => {return {title: document.title,url: window.location.href,userAgent: navigator.userAgent,viewport: {width: window.innerWidth,height: window.innerHeight}};}''')print("页面信息:", page_info)# 修改页面内容await page.evaluate('''() => {document.body.style.backgroundColor = 'lightblue';const h1 = document.querySelector('h1');if (h1) {h1.textContent = '修改后的标题';h1.style.color = 'red';}}''')await page.screenshot({'path': 'modified.png'})await browser.close()asyncio.run(javascript_execution())
暴露Python函数
import asyncio
from pyppeteer import launchasync def expose_functions():browser = await launch(headless=True)page = await browser.newPage()# 定义Python函数def python_callback(message):print(f"从JavaScript接收: {message}")return f"Python回复: {message}"# 暴露函数到页面上下文await page.exposeFunction('pyFunc', python_callback)await page.goto('https://httpbin.org/html')# 在页面中调用暴露的函数result = await page.evaluate('''async () => {const response = await pyFunc('Hello from JavaScript!');console.log(response);return response;}''')print(f"调用结果: {result}")await browser.close()asyncio.run(expose_functions())
高级功能
使用浏览器上下文
import asyncio
from pyppeteer import launchasync def main():browser = await launch()# 创建匿名浏览器上下文context = await browser.createIncognitoBrowserContext()page = await context.newPage()await page.goto('https://www.example.com')print('在匿名上下文中浏览')await asyncio.sleep(2)# 关闭上下文await context.close()await browser.close()asyncio.get_event_loop().run_until_complete(main())
设备模拟
import asyncio
from pyppeteer import launch
from pyppeteer.devices import devicesasync def device_emulation():browser = await launch(headless=True)page = await browser.newPage()# 模拟 iPhone Xiphone = devices['iPhone X']await page.emulate(iphone)await page.goto('https://httpbin.org/html')await page.screenshot({'path': 'iphone.png'})# 模拟 iPadipad = devices['iPad landscape']await page.emulate(ipad)await page.goto('https://httpbin.org/html')await page.screenshot({'path': 'ipad.png'})await browser.close()asyncio.run(device_emulation())
Cookie管理
import asyncio
from pyppeteer import launchasync def cookie_management():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/cookies')# 设置Cookieawait page.setCookie({'name': 'test_cookie', 'value': 'test_value', 'domain': 'httpbin.org'},{'name': 'session_id', 'value': 'abc123', 'domain': 'httpbin.org'})# 获取Cookiecookies = await page.cookies()print("当前Cookie:", cookies)# 刷新页面查看Cookieawait page.reload()# 删除特定Cookieawait page.deleteCookie({'name': 'test_cookie'})# 获取更新后的Cookieupdated_cookies = await page.cookies()print("更新后的Cookie:", updated_cookies)await browser.close()asyncio.run(cookie_management())
性能监控
import asyncio
from pyppeteer import launchasync def performance_monitoring():browser = await launch(headless=True)page = await browser.newPage()# 开始性能追踪await page.tracing.start({'path': 'trace.json','categories': ['devtools.timeline']})await page.goto('https://httpbin.org/html')# 停止追踪await page.tracing.stop()# 获取性能指标metrics = await page.metrics()print("性能指标:", metrics)# 获取执行时间navigation_timing = await page.evaluate('''() => {return JSON.stringify(window.performance.timing);}''')print("导航时间:", navigation_timing)await browser.close()asyncio.run(performance_monitoring())
最佳实践
- 始终使用 async/await:Pyppeteer 是基于 asyncio 的,确保正确使用异步编程模式
- 合理管理浏览器实例:避免创建过多浏览器实例,及时关闭不再使用的浏览器
- 使用合适的等待策略:优先使用
waitForSelector或waitForNavigation而不是固定的sleep - 错误处理:使用 try-except 块处理可能的异常
- 资源清理:确保在程序结束时正确关闭浏览器和页面
实战案例
网页爬虫
import asyncio
from pyppeteer import launch
import json
import csvasync def web_scraper():browser = await launch(headless=True)page = await browser.newPage()await page.goto('https://httpbin.org/html')# 提取数据scraped_data = await page.evaluate('''() => {const data = {};// 提取标题const titleElement = document.querySelector('h1');data.title = titleElement ? titleElement.textContent : '';// 提取所有段落data.paragraphs = [];const paragraphs = document.querySelectorAll('p');paragraphs.forEach(p => {data.paragraphs.push(p.textContent);});// 提取链接data.links = [];const links = document.querySelectorAll('a');links.forEach(link => {data.links.push({text: link.textContent,href: link.href});});data.timestamp = new Date().toISOString();return data;}''')# 保存为JSONwith open('scraped_data.json', 'w', encoding='utf-8') as f:json.dump(scraped_data, f, ensure_ascii=False, indent=2)# 保存为CSVwith open('scraped_data.csv', 'w', newline='', encoding='utf-8') as f:writer = csv.writer(f)writer.writerow(['类型', '内容'])writer.writerow(['标题', scraped_data['title']])for i, paragraph in enumerate(scraped_data['paragraphs']):writer.writerow([f'段落{i+1}', paragraph])print("数据爬取完成!")await browser.close()asyncio.run(web_scraper())
自动化测试
import asyncio
from pyppeteer import launchasync def automated_testing():browser = await launch(headless=True)page = await browser.newPage()test_results = {'passed': 0,'failed': 0,'tests': []}try:# 测试1: 页面加载await page.goto('https://httpbin.org/html', {'waitUntil': 'networkidle2'})title = await page.title()if title and 'httpbin' in title.lower():test_results['tests'].append({'name': '页面加载', 'status': '通过'})test_results['passed'] += 1else:test_results['tests'].append({'name': '页面加载', 'status': '失败'})test_results['failed'] += 1# 测试2: 元素存在h1_element = await page.querySelector('h1')if h1_element:test_results['tests'].append({'name': 'H1元素存在', 'status': '通过'})test_results['passed'] += 1else:test_results['tests'].append({'name': 'H1元素存在', 'status': '失败'})test_results['failed'] += 1# 测试3: 截图功能await page.screenshot({'path': 'test_screenshot.png'})test_results['tests'].append({'name': '截图功能', 'status': '通过'})test_results['passed'] += 1except Exception as e:test_results['tests'].append({'name': '测试执行', 'status': f'异常: {str(e)}'})test_results['failed'] += 1# 输出测试结果print(f"测试完成: 通过 {test_results['passed']}, 失败 {test_results['failed']}")for test in test_results['tests']:print(f" {test['name']}: {test['status']}")await browser.close()asyncio.run(automated_testing())
故障排除
处理 Chromium 下载问题
import asyncio
from pyppeteer import launchasync def main():# 指定 Chromium 路径或下载选项browser = await launch(headless=True,executablePath='/path/to/chromium', # 指定 Chromium 路径args=['--no-sandbox', '--disable-setuid-sandbox'] # 解决 Linux 权限问题)page = await browser.newPage()await page.goto('https://www.example.com')await browser.close()asyncio.get_event_loop().run_until_complete(main())
处理超时问题
import asyncio
from pyppeteer import launchasync def main():browser = await launch()page = await browser.newPage()# 设置页面超时page.setDefaultNavigationTimeout(60000) # 60秒page.setDefaultTimeout(30000) # 30秒try:await page.goto('https://www.example.com')except Exception as e:print(f'页面加载超时: {e}')await browser.close()asyncio.get_event_loop().run_until_complete(main())
常见错误处理
import asyncio
from pyppeteer import launch
from pyppeteer.errors import TimeoutError, PageErrorasync def error_handling():browser = Nonetry:browser = await launch(headless=True,args=['--no-sandbox', '--disable-setuid-sandbox'])page = await browser.newPage()# 设置超时page.setDefaultNavigationTimeout(10000)page.setDefaultTimeout(5000)try:# 尝试访问可能不存在的页面await page.goto('https://invalid-url-that-probably-does-not-exist.com')except TimeoutError:print("页面加载超时")except PageError as e:print(f"页面错误: {e}")# 处理元素不存在的情况try:element = await page.waitForSelector('.non-existent-element', {'timeout': 2000})except TimeoutError:print("元素未找到,继续执行其他操作")except Exception as e:print(f"浏览器错误: {e}")finally:if browser:await browser.close()asyncio.run(error_handling())
性能优化配置
import asyncio
from pyppeteer import launchasync def optimized_configuration():browser = await launch(headless=True,args=['--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage','--disable-accelerated-2d-canvas','--no-first-run','--no-zygote','--disable-gpu','--disable-extensions','--disable-background-timer-throttling','--disable-backgrounding-occluded-windows','--disable-renderer-backgrounding','--window-size=1920,1080'],ignoreHTTPSErrors=True,handleSIGINT=False,handleSIGTERM=False,handleSIGHUP=False)page = await browser.newPage()# 优化页面设置await page.setViewport({'width': 1920, 'height': 1080})await page.setJavaScriptEnabled(True)await page.setRequestInterception(True)# 拦截不必要的资源async def intercept_request(request):if request.resourceType in ['image', 'stylesheet', 'font']:await request.abort()else:await request.continue_()page.on('request', lambda req: asyncio.ensure_future(intercept_request(req)))await page.goto('https://httpbin.org/html')await browser.close()asyncio.run(optimized_configuration())
内存泄漏预防
import asyncio
from pyppeteer import launchasync def memory_management():browser = await launch(headless=True)try:pages = []for i in range(5):page = await browser.newPage()await page.goto(f'https://httpbin.org/html?page={i}')# 执行一些操作title = await page.title()print(f"页面 {i}: {title}")pages.append(page)# 定期清理不需要的页面if len(pages) > 2:old_page = pages.pop(0)await old_page.close()print("已关闭旧页面")# 最终清理for page in pages:await page.close()except Exception as e:print(f"错误: {e}")finally:await browser.close()print("浏览器已关闭")asyncio.run(memory_management())
这个完整的 Pyppeteer 教程涵盖了从基础安装到高级用法的所有内容,包括实战案例和故障排除。每个示例都可以直接运行或根据需要进行修改。
© 著作权归作者所有
userDataDir参数使用
使用userDataDir时,会报如下错误:
Exception ignored in atexit callback: <function Launcher.launch.<locals>._close_process at 0x0000027AFE8BFBA0>
Traceback (most recent call last):raise RuntimeError('Event loop is closed')
RuntimeError: Event loop is closed
sys:1: RuntimeWarning: coroutine 'Launcher.killChrome' was never awaited
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
解决方法:
from pathlib import Path
from pyppeteer.launcher import Launcher
async def get_page_content(url):options = {'headless': False, 'args': ['--no-sandbox', '--disable-setuid-sandbox', '--disable-infobars', '--window-size=2560,1080'],'ignoreDefaultArgs': ['--enable-automation'],'userDataDir': str(Path('./userdata').resolve()),'defaultViewport': {'width': 2560, 'height': 1080},"dumpio": True}# options = {'headless': False, 'args': ['--no-sandbox', '--disable-setuid-sandbox']}launcher = Launcher(**options)browser = await launcher.launch()page = await browser.newPage()await page.goto('https://www.baidu.com')await page.close()await browser.close()await launcher.killChrome()launcher.waitForChromeToClose()
总结
Pyppeteer 是一个功能强大的浏览器自动化工具,适用于网页测试、爬虫、截图等多种场景。通过本教程,你应该已经掌握了 Pyppeteer 的基本用法和一些高级功能。在实际使用中,请根据具体需求选择合适的 API 和方法。
记住,浏览器自动化可能会对目标网站造成压力,请遵守网站的 robots.txt 和相关法律法规,合理使用自动化工具。
参考
可用下载
https://registry.npmmirror.com/binary.html?path=chromium-browser-snapshots/Win_x64/
https://github.com/ungoogled-software/ungoogled-chromium-windows/releases
官网下载
https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html
https://commondatastorage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Mac/330231/
https://storage.googleapis.com/chromium-browser-snapshots/index.html?prefix=Win_x64/118
官网文档
https://pyppeteer.github.io/pyppeteer/reference.html#launcher
相关博客
- 爬虫基础之自动化工具 Pyppeteer 的使用
- Pyppeteer 的使用
- 详细介绍:Python pyppeteer库详解:从入门到反爬虫实战
- 网络爬虫之使用pyppeteer替代selenium完美绕过webdriver检测
