requests+lxml 实现
import requests
from lxml import html# 发送 GET 请求
url = 'https://knight.blog.csdn.net'
response = requests.get(url)# 检查请求是否成功
if response.status_code == 200:# 解析 HTML 内容tree = html.fromstring(response.content)# 查找对应的父元素parent_element = tree.xpath('//*[@id="navList-box"]/div[2]/div/div')if parent_element:# 找到所有子 div 元素child_elements = parent_element[0].xpath('./div')# 提取并输出每个子 div 的文本for child in child_elements:child_h4 = child.xpath('.//article/a/div/div[1]/div[1]/h4/text()')if child_h4:text = child_h4[0].strip() # 去掉前后空格print(text)
else:print(f"请求失败,状态码: {response.status_code}")