【爬虫】通过模拟鼠标点击和键盘操作抓取网页数据
通过模拟鼠标点击,打开网页,然后屏幕截图,全选文字,并将文字保存到文本文件,翻页,逐步循环。
import os.pathimport pyautogui
import time
import pyperclipdef select_and_save_to_file(filename="selected_content.txt"):try:# 设置操作延迟,确保系统有足够时间响应pyautogui.PAUSE = 1# 执行Ctrl+A全选pyautogui.hotkey('ctrl', 'a')print("执行了全选操作")# 执行Ctrl+C复制选中内容pyautogui.hotkey('ctrl', 'c')print("执行了复制操作")# 等待剪贴板内容准备好time.sleep(0.5)# 从剪贴板获取内容content = pyperclip.paste()if not content:print("剪贴板中没有内容")return# 将内容写入文件with open(filename, 'w', encoding='utf-8') as f:f.write(content)print(f"成功将内容保存到 {filename}")except Exception as e:print(f"操作过程中出现错误: {str(e)}")list_cord = [461,499,537,577,618,656,693,734,773,811]
list_cord = [461,499,537]
# for page in range(1, 124):
page = 13
for idx, i in enumerate(list_cord):if os.path.exists(f'./save_data/page{page}_{idx+1}.jpg') and os.path.exists(f'./save_data/page{page}_{idx+1}.txt'):continue# 移动鼠标到指定位置pyautogui.moveTo(1830, i) # x=100, y=150# 执行鼠标左键点击pyautogui.click()time.sleep(3)pyautogui.screenshot(f'./save_data/page{page}_{idx+1}.jpg')select_and_save_to_file(f'./save_data/page{page}_{idx+1}.txt')print(f'page{page}_{idx+1}抓取成功')pyautogui.moveTo(494, 19) # x=100, y=150# 执行鼠标左键点击pyautogui.click()time.sleep(1)# break# pyautogui.moveTo(1324, 868) # x=100, y=150# # 执行鼠标左键点击# pyautogui.click()# time.sleep(3)