Python监控网站更新则推送到企业微信
import requests
from lxml import etree
import redis
r = redis.Redis(host="localhost", port=6379, db=0)
def get_page_content(url):
# 获取指定网页中的标题和链接
url_lists = []
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers, verify=False)
if response.status_code == 200:
html = etree.HTML(response.content.decode())
if html is not None:
a_urls = html.xpath("//a")
for a in a_urls:
title = "".join(a.xpath(".//text()")).strip()
a_urls = a.xpath("./@href")
if len(a_urls) > 0:
a_url = a_urls[0]
url_lists.append({"url": a_url, "title": title})
return url_lists
# 监控函数
def monitor_website(url, interval=7200):
while True:
time.sleep(interval) # 休眠指定的时间
new_content = get_page_content(url)
changes = []
if new_content:
for item in new_content:
# 检查 URL 是否已经存在
if r.sismember("myset", item["url"]):
print("URL 已经存在,跳过")
else:
# 添加到 Set 中并进行爬取
r.sadd("myset", item["url"])
changes.append(item)
if len(changes) > 0:
print(f"{url}网站已更新,新增数据为{changes}")
bot(f"{url}网站已更新,新增数据为{changes}")
def bot(text):
# 构建消息内容
message_data = {
"msgtype": "text",
"text": {
"content": text,
"mentioned_list": ["@all"]
}
}
robot_webhook = 'https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=你的key'
# 发送请求
response = requests.post(robot_webhook, headers={"Content-Type": "application/json"}, data=json.dumps(message_data))
# 检查响应状态码
if response.status_code == 200:
print("消息发送成功")
else:
print(f"消息发送失败,状态码:{response.status_code}")
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.dtcms.com/a/120394.html
如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!