当前位置：首页 > news >正文

通过BingAPI爬取Bing半个月内壁纸

news 2025/10/21 15:51:46

通过BingAPI爬取Bing半个月内壁纸

一、前言
二、爬虫代码
三、代码说明

一、前言

爬取Bing搜索网站首页壁纸的方式主要有两种，第一种为间接爬取，即并不直接对Bing网站发起请求，而是对那些收集汇总了Bing壁纸的网站发起请求，爬取图片。第二种为直接爬取，即直接对Bing网站发起访问。

本文使用第二种，通过Bing官方提供的一种API进行壁纸下载，通过这种方式可以下载从下载当天起半个月内Bing搜索网站首页使用过的壁纸。
在这里插入图片描述

二、爬虫代码

import json
import requests
import os
import re
import time
from colorama import Fore, Back, Style
import colorama

colorama.init(autoreset = True)

def download_imgWallpaper_of_bing(headers, save_folder_path = None):
    """下载必应从昨天起前15天的壁纸图片"""
    print(Fore.GREEN + Style.BRIGHT + '\n' + '-' * 30 + 'Bing必应 壁纸图片' + '-' * 30 + '\n')
    
    if save_folder_path is None:
        save_folder_path = 'bing壁纸图片'
    if not os.path.exists(save_folder_path):
        os.mkdir(save_folder_path)
    
    exist_imgs = os.listdir(save_folder_path)
    url01 = 'http://cn.bing.com/HPImageArchive.aspx?format=js&idx=0&n=7'
    url02 = 'http://cn.bing.com/HPImageArchive.aspx?format=js&idx=8&n=8'
    imgs_data = []
    
    try:
        for url in [url01, url02]:
            print(Fore.BLUE + Style.BRIGHT + '\n正在下载html文件，地址如下：')
            print(url)
            res = requests.get(url, headers = headers, timeout = (5, 5))
            res.raise_for_status()
            
            if len(res.content) < 10:
                continue
            
            data = json.loads(res.content)
            imgs_data.extend(data['images'])
    except Exception as e:
        print(Fore.RED + Back.WHITE + "\n下载html文件失败，详情如下：")
        print(e)
        return

    img_url_count = len(imgs_data)
    if img_url_count == 0:
        print(Fore.RED + Back.WHITE + "\n从html文件中提取到的壁纸图片Url数量为零")
        return
    
    img_num = 0
    for img_data in imgs_data:
        try:
            img_name = img_data['startdate'] + '_' + img_data['copyright'] + '.jpg'
            img_name = re.sub(r'[<>:"/\\|?*]', '_', img_name)
            if img_name in exist_imgs:
                continue
            
            img_path = save_folder_path + '/' + img_name
            img_url = 'http://cn.bing.com' + img_data['url']
            print(Fore.BLUE + Style.BRIGHT + "\n正在下载第%s/%s张图片，地址如下：" % (img_num + 1, img_url_count))
            print(img_url)
            res = requests.get(img_url, headers = headers, timeout = (5, 10))
            res.raise_for_status()
            
            with open('%s' % img_path,'wb') as f:
                f.write(res.content)
            
            print(Fore.BLUE + Style.BRIGHT + "完成下载")
            img_num += 1
            time.sleep(1)
        except Exception as e:
            print(Fore.RED + Back.WHITE + "\n下载异常，详情如下：")
            print(e)
            continue
    
    if img_num == img_url_count:
        print(Fore.YELLOW + Style.BRIGHT + "\n所有壁纸图片下载完成，保存文件夹为：" + os.path.abspath(save_folder_path))
    else:
        print(Fore.YELLOW + Style.BRIGHT + "\n壁纸图片已下载%s，未下载%s张，保存文件夹为%s" % (img_num, img_url_count - img_num, os.path.abspath(save_folder_path)))

if __name__ == '__main__':
    
    headers={}
    headers["Accept"]="*/*"
    headers["Accept-Encoding"]="gzip, deflate, br, zstd"
    headers["Accept-Language"]="zh-CN,zh;q=0.9"
    headers["Connection"]="keep-alive"
    headers["Cache-Control"]="max-age=0"
    headers["Upgrade-Insecure-Requests"]="1"
    headers["User-Agent"]="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"
    
    download_imgWallpaper_of_bing(headers)