当前位置: 首页 > news >正文

批量采集培训机构数据进行查询

使用php写了一个批量采集培训机构数据进行查询,在php环境新建一个peixun.php

代码如下:

<?php
// 接口地址
$url = 'https://xwpx.eduyun.cn/tolSpInfo/getSpInfoList';
$page = $_GET['page'] ?? 1; // 提供默认页码,避免未传参数错误// 定义省级地区编码数组
$numbers = [110000, 120000, 130000, 140000, 150000, 210000, 220000, 230000, 310000, 320000, 330000, 340000, 350000, 360000, 370000, 410000, 420000, 430000, 440000, 450000, 460000, 500000, 510000, 520000, 530000, 540000, 610000, 620000, 630000, 640000, 650000, 660000
];// 获取并验证地区参数
$area = $_GET['area'] ?? '';
$areaid = isset($numbers[$area]) ? $numbers[$area] : '';
if (empty($areaid)) {echo json_encode(['error' => '无效的地区参数'], JSON_UNESCAPED_UNICODE);exit();
}/*** 获取子地区编码列表* @param string $areaid 父地区编码* @return array 子地区编码数组*/
function getareaid($areaid) {// 接口URL$url = 'https://xwpx.eduyun.cn/xspxRegister/getChildArea';// 请求参数$data = ['PAGE_SERIAL_VERSION_UID' => '','areaCode' => $areaid];// 构建POST数据$postData = http_build_query($data);// 创建cURL资源$ch = curl_init();// 设置URL和相应的选项curl_setopt_array($ch, [CURLOPT_URL => $url,CURLOPT_POST => true,CURLOPT_POSTFIELDS => $postData,CURLOPT_RETURNTRANSFER => true,CURLOPT_HTTPHEADER => ['Referer: https://xwpx.eduyun.cn/tol/toHomePageParentServices','User-Agent: mozilla/5.0 (macintosh; intel mac os x 10_15_1) applewebkit/537.36 (khtml, like gecko) brave chrome/78.0.3904.70 safari/537.36 Edg/139.0.0.0','Content-Type: application/x-www-form-urlencoded'],CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D; Hm_lvt_c3f009f814f701e8fad8a17f9682ec79=1755596687; HMACCOUNT=497681945067787A; Hm_lpvt_c3f009f814f701e8fad8a17f9682ec79=1755596697',CURLOPT_SSL_VERIFYPEER => false,CURLOPT_SSL_VERIFYHOST => false]);// 执行cURL请求并获取响应$response = curl_exec($ch);// 检查是否有错误发生if (curl_errno($ch)) {error_log('cURL错误: ' . curl_error($ch));curl_close($ch);return [];}// 关闭cURL资源curl_close($ch);// 解析JSON响应$result = json_decode($response, true);// 检查解析是否成功if (json_last_error() !== JSON_ERROR_NONE) {error_log('JSON解析错误: ' . json_last_error_msg());return [];}// 提取并返回子地区编码$areaCodes = [];if ($result['retCode'] === '000000' && !empty($result['data'])) {foreach ($result['data'] as $item) {if (!empty($item['areaCode'])) {$areaCodes[] = $item['areaCode'];}}}return $areaCodes;
}// 获取子地区编码列表
$childAreaCodes = getareaid($areaid);// 如果没有子地区,直接退出
if (empty($childAreaCodes)) {echo json_encode(['error' => '未获取到子地区编码'], JSON_UNESCAPED_UNICODE);exit();
}// 处理其他请求参数
$object = $_GET['object'] ?? '';
if ($object == 5) {$object = "0,1,2,3,4";
}$profitType = $_GET['type'] ?? '';// 获取当前循环索引,默认为0
$currentIndex = isset($_GET['index']) ? intval($_GET['index']) : 0;// 确保索引在有效范围内
if ($currentIndex < 0 || $currentIndex >= count($childAreaCodes)) {$currentIndex = 0; // 超出范围则重置为0
}// 获取当前要使用的城市编码
$currentCityCode = $childAreaCodes[$currentIndex];// 计算下一个索引(循环)
$nextIndex = ($currentIndex + 1) % count($childAreaCodes);// 构建最终请求参数,只传入当前城市编码
$postData = ['PAGE_SERIAL_VERSION_UID' => '','province' => $areaid,'city' => $currentCityCode, // 传入当前城市编码'area' => '','object' => $object,'profitType' => '1','businessType' => '1','pageNo' => $page,'pageSize' => 10
];// 转换为URL编码的字符串
$postDataString = http_build_query($postData);// 创建cURL资源
$ch = curl_init();// 设置URL和相应的选项
curl_setopt_array($ch, [CURLOPT_URL => $url,CURLOPT_POST => true,CURLOPT_POSTFIELDS => $postDataString,CURLOPT_RETURNTRANSFER => true,CURLOPT_HTTPHEADER => ['Content-Type: application/x-www-form-urlencoded','Referer: https://xwpx.eduyun.cn/tolSpInfo/index','User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'],CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fcn.bing.com%2F%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D',// 忽略SSL证书验证(生产环境建议开启验证)CURLOPT_SSL_VERIFYPEER => false,CURLOPT_SSL_VERIFYHOST => false
]);// 执行请求并获取响应
$response = curl_exec($ch);// 检查是否有错误发生
if (curl_errno($ch)) {$result = ['error' => '请求错误: ' . curl_error($ch)];
} else {// 解析响应$responseData = json_decode($response, true);$result = $responseData ?: ['error' => '无法解析响应数据'];// 添加当前索引和下一个索引信息,方便前端循环调用$result['current_index'] = $currentIndex;$result['next_index'] = $nextIndex;$result['current_city_code'] = $currentCityCode;$result['total_cities'] = count($childAreaCodes);
}// 关闭cURL资源
curl_close($ch);// 格式化输出JSON
echo json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
?>

执行的参数为:peixun.php?page=[地址参数]&area=[地址参数1]&object=[地址参数2]&type=[地址参数3]&index=[地址参数4]

参数为:1-40即可

效果:培训机构学校有哪些_培训机构大全-找校通找校通培训机构频道提供全国培训机构的详细信息,包含学校简介、地址等关键信息,支持按地区筛选查询,是学生和家长寻找合适培训机构的便捷平台。http://www.6983.net/company/5/

http://www.dtcms.com/a/358164.html

相关文章:

  • Axios 实例配置指南
  • 基于物联网设计的园林灌溉系统(华为云IOT)_274
  • k8s--efk日志收集
  • PostgreSQL令牌机制解析
  • C++多态介绍
  • sunset: sunrise
  • 安全多方计算(MPC):技术原理、典型应用与 Python 工程实现详解
  • POLAR 社区交流平台 PRD v1.0
  • DDR5 介绍
  • 关于PXIe工控机的网速问题XH-PXIe7313万兆网卡
  • 【LeetCode每日一题】21. 合并两个有序链表 2. 两数相加
  • Linux三剑客grep-sed-awk
  • # `std::basic_istream`总结
  • 从零到一:使用Flask构建“我的笔记”网站
  • Elasticsearch面试精讲 Day 2:索引、文档与映射机制
  • 如何在 Jenkins Docker 容器中切换到 root 用户并解决权限问题
  • WPF和WinFrom区别
  • WPF中的ref和out
  • 基于Ubuntu本地GitLab 搭建 Git 服务器
  • 小迪安全v2023学习笔记(七十四讲)—— 验证机制篇验证码绕过思路SRC挖掘演示
  • web渗透ASP.NET(Webform)反序列化漏洞
  • SpringBoot整合Actuator实现健康检查
  • windows系统中安装zip版本mysql,配置环境
  • Spring Cloud Gateway 网关(五)
  • 电子战:Maritime SIGINT Architecture Technical Standards Handbook
  • 系统分析师考试大纲新旧版本深度分析与备考策略
  • 拼团小程序源码分享拼团余额提现小程序定制教程开发源码二开
  • 深入理解 RabbitMQ:从底层原理到实战落地的全维度指南
  • (纯新手教学)计算机视觉(opencv)实战十——轮廓特征(轮廓面积、 轮廓周长、外接圆与外接矩形)
  • 在Kotlin中安全的管理资源