当前位置：首页 > news >正文

【行政区划获取】

news 2025/7/19 8:01:34

行政区划获取

获取2023年的行政区划，并以编码: 省市区格式保存为字典方便后续调用
注：网址可能会更新，根据最新的来

# 获取并保存行政区划代码
import requests
from lxml import etree
import json

def fetch_html(url):
    """获取网页内容"""
    try:
        response = requests.get(url)
        response.raise_for_status()
        response.encoding = 'utf-8'
        return response.text
    except requests.RequestException as e:
        print(f"请求失败: {e}")
        return None

def parse_html(html):
    """解析HTML并生成行政区划字典"""
    if not html:
        return None

    html_tree = etree.HTML(html)
    rows = html_tree.xpath('//*[text()="行政区划代码"]/../following-sibling::tr')
    
    location_dict = {}
    current_province = ''
    current_city = ''

    for row in rows:
        if not row.xpath('./td[2]/text()'):
            continue

        code = row.xpath('./td[2]/text()')[0].strip()
        name = row.xpath('./td[3]/text()')[0].replace('*', '').strip()

        if code.endswith('0000'):  # 省级
            current_province = name
            location_dict[code] = {'province': current_province, 'city': '', 'district': ''}
        elif code.endswith('00'):  # 市级
            current_city = name
            location_dict[code] = {'province': current_province, 'city': current_city, 'district': ''}
        else:  # 区县级
            location_dict[code] = {'province': current_province, 'city': current_city, 'district': name}

    return location_dict

def generate_full_address(location_dict):
    """生成完整的地址字符串"""
    finally_location_dict = {}
    for code, location in location_dict.items():
        full_address = f"{location['province']} {location['city']} {location['district']}".strip()
        finally_location_dict[code] = full_address
    return finally_location_dict

def save_to_json(data, filename):
    """将数据保存为JSON文件"""
    try:
        with open(filename, 'w+', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"数据已成功保存到 {filename}")
    except IOError as e:
        print(f"文件保存失败: {e}")

def main():
    url = 'https://www.mca.gov.cn/mzsj/xzqh/2023/202301xzqh.html'
    html = fetch_html(url)
    if html:
        location_dict = parse_html(html)
        if location_dict:
            finally_location_dict = generate_full_address(location_dict)
            save_to_json(finally_location_dict, 'city_and_code.json')

if __name__ == "__main__":
    main()

# 提取编码对应省市区
# with open(r'city_and_code.json', "r", encoding="utf-8") as f:
#     dict_ = json.load(f)
# print(dict_["320507"])

查看全文

http://www.dtcms.com/a/47408.html