【行政区划获取】
行政区划获取
获取2023年的行政区划,并以 编码: 省市区 格式保存为字典方便后续调用
注:网址可能会更新,根据最新的来
# 获取并保存行政区划代码
import requests
from lxml import etree
import json
def fetch_html(url):
"""获取网页内容"""
try:
response = requests.get(url)
response.raise_for_status()
response.encoding = 'utf-8'
return response.text
except requests.RequestException as e:
print(f"请求失败: {e}")
return None
def parse_html(html):
"""解析HTML并生成行政区划字典"""
if not html:
return None
html_tree = etree.HTML(html)
rows = html_tree.xpath('//*[text()="行政区划代码"]/../following-sibling::tr')
location_dict = {}
current_province = ''
current_city = ''
for row in rows:
if not row.xpath('./td[2]/text()'):
continue
code = row.xpath('./td[2]/text()')[0].strip()
name = row.xpath('./td[3]/text()')[0].replace('*', '').strip()
if code.endswith('0000'): # 省级
current_province = name
location_dict[code] = {'province': current_province, 'city': '', 'district': ''}
elif code.endswith('00'): # 市级
current_city = name
location_dict[code] = {'province': current_province, 'city': current_city, 'district': ''}
else: # 区县级
location_dict[code] = {'province': current_province, 'city': current_city, 'district': name}
return location_dict
def generate_full_address(location_dict):
"""生成完整的地址字符串"""
finally_location_dict = {}
for code, location in location_dict.items():
full_address = f"{location['province']} {location['city']} {location['district']}".strip()
finally_location_dict[code] = full_address
return finally_location_dict
def save_to_json(data, filename):
"""将数据保存为JSON文件"""
try:
with open(filename, 'w+', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"数据已成功保存到 {filename}")
except IOError as e:
print(f"文件保存失败: {e}")
def main():
url = 'https://www.mca.gov.cn/mzsj/xzqh/2023/202301xzqh.html'
html = fetch_html(url)
if html:
location_dict = parse_html(html)
if location_dict:
finally_location_dict = generate_full_address(location_dict)
save_to_json(finally_location_dict, 'city_and_code.json')
if __name__ == "__main__":
main()
# 提取编码对应省市区
# with open(r'city_and_code.json', "r", encoding="utf-8") as f:
# dict_ = json.load(f)
# print(dict_["320507"])