当前位置: 首页 > news >正文

百度地图小区边界爬取

1、准备需要爬取的小区名称,存放在txt文本中

# 从文件中读取小区名称
def read_residential_names(file_path):
    """
    从文件中读取小区名称
    :param file_path: 文件路径
    :return: 小区名称列表
    """
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return []

    with open(file_path, "r", encoding="utf-8") as file:
        names = [line.strip() for line in file.readlines() if line.strip()]
    return names

2、需要根据住宅区名称和所在地区获取其UID

def get_residential_uid(residential_name, region, bmap_key):
    """
    根据住宅区名称和所在地区获取其UID
    :param residential_name: 住宅区名称
    :param region: 地区
    :param bmap_key: 百度地图API密钥
    :return: UID或None
    """
    bmap_localsearch_url = f"http://api.map.baidu.com/place/v2/search?query={residential_name}&region={region}&output=json&city_limit=true&ak={bmap_key}"
    s = requests.Session()
    s.mount('http://', HTTPAdapter(max_retries=3))
    s.mount('https://', HTTPAdapter(max_retries=3))

    try:
        response = s.get(bmap_localsearch_url, timeout=5, headers={"Connection": "close"})
        data = response.json()
        if data['status'] == 0 and len(data['results']) > 0:
            for info in data['results']:
                if '-' not in info['name']:
                    return info['uid']
            print(f"No valid UID found for {residential_name} in {region}")
            return None
        else:
            print(f"No results found for {residential_name} in {region}")
            return None
    except Exception as e:
        print(f"Error in get_residential_uid: {e}\nURL: {bmap_localsearch_url}")
        return None

3、根据UID获取住宅区的边界信息

def get_boundary_by_uid(uid, bmap_key):
    """
    根据UID获取住宅区的边界信息
    :param uid: 百度地图目标UID
    :param bmap_key: 百度地图API密钥
    :return: 边界坐标字符串或None
    """
    bmap_boundary_url = f"http://map.baidu.com/?reqflag=pcmap&from=webmap&qt=ext&uid={uid}&ext_ver=new&l=18&ak={bmap_key}"
    s = requests.Session()
    s.mount('http://', HTTPAdapter(max_retries=3))
    s.mount('https://', HTTPAdapter(max_retries=3))

    try:
        response = s.get(bmap_boundary_url, timeout=5, headers={"Connection": "close"})
        data = response.json()
        if 'content' in data and 'geo' in data['content']:
            geo = data['content']['geo']
            coordinates = []
            for point in geo.split('|')[2].split('-')[1].split(','):
                coordinates.append(point.strip(';'))
            boundary = ';'.join([f"{coordinates[i]},{coordinates[i + 1]}" for i in range(0, len(coordinates), 2)])
            return boundary
        else:
            print(f"No boundary information found for UID: {uid}")
            return None
    except Exception as e:
        print(f"Error in get_boundary_by_uid: {e}\nURL: {bmap_boundary_url}")
        return None

4、解析百度地图返回的geo数据,提取坐标点

def parse_geo_data(geo_data):
    """
    解析百度地图返回的geo数据,提取坐标点
    :param geo_data: 百度地图返回的geo字符串
    :return: 包含(x, y)坐标对的列表
    """
    if not geo_data or '|' not in geo_data:
        return []
    try:
        # 提取详细坐标部分
        coordinates = geo_data.split('|')[2].split('-')[1].split(',')
        # 将坐标转换为(x, y)对
        return [(float(coordinates[i].strip(';')), float(coordinates[i+1].strip(';'))) 
                for i in range(0, len(coordinates)-1, 2)]
    except Exception as e:
        print(f"Error parsing geo data: {e}")
        return []

5、将Web Mercator坐标转换为WGS-84经纬度坐标

def web_mercator_to_wgs84(x, y):
    """
    将Web Mercator坐标转换为WGS-84经纬度坐标
    :param x: Web Mercator X坐标
    :param y: Web Mercator Y坐标
    :return: WGS-84经纬度坐标 (lon, lat)
    """
    transformer = Transformer.from_crs("EPSG:3857", "EPSG:4326")
    return transformer.transform(x, y)

6、将数据保存到CSV文件中

def save_to_csv(data, filename="output.csv"):
    """
    将数据保存到CSV文件中
    :param data: 包含坐标的字典
    :param filename: 输出文件名
    """
    # 获取文件的目录部分
    directory = os.path.dirname(filename)

    # 如果目录不为空,则创建目录
    if directory:
        os.makedirs(directory, exist_ok=True)

    # 写入CSV文件
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        writer.writerow(["Residential Name", "Longitude", "Latitude"])  # 写入表头
        for name, coords in data.items():
            for coord in coords.split(';'):
                lon, lat = coord.split(',')
                writer.writerow([name, lon, lat])  # 写入每一行数据

    print(f"Data saved to {filename}")

7、主函数,bmap_key输入百度地图API密钥,region 输入默认查询地区, input_file 输入小区名称存储文件。

if __name__ == "__main__":
    bmap_key = "***"  # 替换为你的百度地图API密钥
    region = "北京"  # 默认查询地区
    input_file = "**.txt"  # 小区名称文件
    output_file = "transformed_coordinates.csv"  # 输出文件

    # 读取小区名称
    residential_names = read_residential_names(input_file)
    if not residential_names:
        print("No residential names found in the input file.")
        exit()

    # 存储所有小区的边界坐标
    all_boundaries = {}

    for residential_name in residential_names:
        print(f"Processing: {residential_name}")
        uid = get_residential_uid(residential_name, region, bmap_key)
        if uid:
            boundary = get_boundary_by_uid(uid, bmap_key)
            if boundary:
                all_boundaries[residential_name] = boundary
            else:
                print(f"Failed to get boundary information for {residential_name}.")
        else:
            print(f"Failed to get UID for {residential_name}.")

    # 将结果保存到CSV文件
    save_to_csv(all_boundaries, filename=output_file)

完整代码下载
https://download.csdn.net/download/cc605523/90592963

http://www.dtcms.com/a/123301.html

相关文章:

  • 从PPT到PNG:Python实现的高效PPT转图工具
  • Edge浏览器IE兼容模式设置
  • JavaScript(JS进阶)
  • 【AI论文】OmniSVG:一种统一的(可扩展)矢量图形生成模型
  • STM32单片机入门学习——第31节: [10-1] I2C通信协议
  • 需求开发与需求管理的全景解析
  • 4.10学习总结
  • MQTT:基于Keil开发工具移植入MQTTClient-C开源库
  • JS—同源策略:2分钟掌握同源策略
  • vue2添加背景水印-手动实现(无组件模式)
  • 4月10(信息差)
  • linux系统下如何提交git和调试
  • +++++背到厌倦。持续更新
  • python基础语法:缩进规则
  • netty中的ChannelPipeline详解
  • 认知风险→风险转移→保障未来
  • AUTOSAR图解=>AUTOSAR_SWS_TimeSyncOverEthernet
  • C++: unordered_map、unordered_set
  • 3DGS之光栅化
  • Python爬虫第10节-lxml解析库用 XPath 解析网页
  • 【Pandas】pandas DataFrame head
  • C#容器源码分析 --- List
  • Web前端之Vue+Element实现表格动态不同列合并多行、localeCompare、forEach、table、push、sort、Map
  • 每日算法-250410
  • 队列缓冲最新请求结合线程池的优化方案
  • STM32Cubemx-H7-14-Bootloader(上)-ST和串口烧录
  • django寻味美食分享与交流网站-计算机毕业设计源码74984
  • 重载和重写的区别
  • 年龄增长,特发性震颤为何愈发严重 ?
  • 详解如何从零用 Python复现类似 GPT-4o 的多模态模型