批量采集培训机构数据进行查询
使用php写了一个批量采集培训机构数据进行查询,在php环境新建一个peixun.php
代码如下:
<?php
// 接口地址
$url = 'https://xwpx.eduyun.cn/tolSpInfo/getSpInfoList';
$page = $_GET['page'] ?? 1; // 提供默认页码,避免未传参数错误// 定义省级地区编码数组
$numbers = [110000, 120000, 130000, 140000, 150000, 210000, 220000, 230000, 310000, 320000, 330000, 340000, 350000, 360000, 370000, 410000, 420000, 430000, 440000, 450000, 460000, 500000, 510000, 520000, 530000, 540000, 610000, 620000, 630000, 640000, 650000, 660000
];// 获取并验证地区参数
$area = $_GET['area'] ?? '';
$areaid = isset($numbers[$area]) ? $numbers[$area] : '';
if (empty($areaid)) {echo json_encode(['error' => '无效的地区参数'], JSON_UNESCAPED_UNICODE);exit();
}/*** 获取子地区编码列表* @param string $areaid 父地区编码* @return array 子地区编码数组*/
function getareaid($areaid) {// 接口URL$url = 'https://xwpx.eduyun.cn/xspxRegister/getChildArea';// 请求参数$data = ['PAGE_SERIAL_VERSION_UID' => '','areaCode' => $areaid];// 构建POST数据$postData = http_build_query($data);// 创建cURL资源$ch = curl_init();// 设置URL和相应的选项curl_setopt_array($ch, [CURLOPT_URL => $url,CURLOPT_POST => true,CURLOPT_POSTFIELDS => $postData,CURLOPT_RETURNTRANSFER => true,CURLOPT_HTTPHEADER => ['Referer: https://xwpx.eduyun.cn/tol/toHomePageParentServices','User-Agent: mozilla/5.0 (macintosh; intel mac os x 10_15_1) applewebkit/537.36 (khtml, like gecko) brave chrome/78.0.3904.70 safari/537.36 Edg/139.0.0.0','Content-Type: application/x-www-form-urlencoded'],CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D; Hm_lvt_c3f009f814f701e8fad8a17f9682ec79=1755596687; HMACCOUNT=497681945067787A; Hm_lpvt_c3f009f814f701e8fad8a17f9682ec79=1755596697',CURLOPT_SSL_VERIFYPEER => false,CURLOPT_SSL_VERIFYHOST => false]);// 执行cURL请求并获取响应$response = curl_exec($ch);// 检查是否有错误发生if (curl_errno($ch)) {error_log('cURL错误: ' . curl_error($ch));curl_close($ch);return [];}// 关闭cURL资源curl_close($ch);// 解析JSON响应$result = json_decode($response, true);// 检查解析是否成功if (json_last_error() !== JSON_ERROR_NONE) {error_log('JSON解析错误: ' . json_last_error_msg());return [];}// 提取并返回子地区编码$areaCodes = [];if ($result['retCode'] === '000000' && !empty($result['data'])) {foreach ($result['data'] as $item) {if (!empty($item['areaCode'])) {$areaCodes[] = $item['areaCode'];}}}return $areaCodes;
}// 获取子地区编码列表
$childAreaCodes = getareaid($areaid);// 如果没有子地区,直接退出
if (empty($childAreaCodes)) {echo json_encode(['error' => '未获取到子地区编码'], JSON_UNESCAPED_UNICODE);exit();
}// 处理其他请求参数
$object = $_GET['object'] ?? '';
if ($object == 5) {$object = "0,1,2,3,4";
}$profitType = $_GET['type'] ?? '';// 获取当前循环索引,默认为0
$currentIndex = isset($_GET['index']) ? intval($_GET['index']) : 0;// 确保索引在有效范围内
if ($currentIndex < 0 || $currentIndex >= count($childAreaCodes)) {$currentIndex = 0; // 超出范围则重置为0
}// 获取当前要使用的城市编码
$currentCityCode = $childAreaCodes[$currentIndex];// 计算下一个索引(循环)
$nextIndex = ($currentIndex + 1) % count($childAreaCodes);// 构建最终请求参数,只传入当前城市编码
$postData = ['PAGE_SERIAL_VERSION_UID' => '','province' => $areaid,'city' => $currentCityCode, // 传入当前城市编码'area' => '','object' => $object,'profitType' => '1','businessType' => '1','pageNo' => $page,'pageSize' => 10
];// 转换为URL编码的字符串
$postDataString = http_build_query($postData);// 创建cURL资源
$ch = curl_init();// 设置URL和相应的选项
curl_setopt_array($ch, [CURLOPT_URL => $url,CURLOPT_POST => true,CURLOPT_POSTFIELDS => $postDataString,CURLOPT_RETURNTRANSFER => true,CURLOPT_HTTPHEADER => ['Content-Type: application/x-www-form-urlencoded','Referer: https://xwpx.eduyun.cn/tolSpInfo/index','User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'],CURLOPT_COOKIE => 'HWWAFSESID=d82bf97755bdb320d6; HWWAFSESTIME=1755569214521; SESSION=552af9b3-ac7a-4bba-b35a-27978f142f24; sajssdk_2015_cross_new_user=1; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fcn.bing.com%2F%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk4YzAxNDY3MTE5YWItMDkxNmMxNmMxNmMxNmMtNGM2NTdiNTgtMjA3MzYwMC0xOThjMDE0NjcxMjExM2QifQ%3D%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%22%2C%22value%22%3A%22%22%7D%2C%22%24device_id%22%3A%22198c01467119ab-0916c16c16c16c-4c657b58-2073600-198c0146712113d%22%7D',// 忽略SSL证书验证(生产环境建议开启验证)CURLOPT_SSL_VERIFYPEER => false,CURLOPT_SSL_VERIFYHOST => false
]);// 执行请求并获取响应
$response = curl_exec($ch);// 检查是否有错误发生
if (curl_errno($ch)) {$result = ['error' => '请求错误: ' . curl_error($ch)];
} else {// 解析响应$responseData = json_decode($response, true);$result = $responseData ?: ['error' => '无法解析响应数据'];// 添加当前索引和下一个索引信息,方便前端循环调用$result['current_index'] = $currentIndex;$result['next_index'] = $nextIndex;$result['current_city_code'] = $currentCityCode;$result['total_cities'] = count($childAreaCodes);
}// 关闭cURL资源
curl_close($ch);// 格式化输出JSON
echo json_encode($result, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE);
?>
执行的参数为:peixun.php?page=[地址参数]&area=[地址参数1]&object=[地址参数2]&type=[地址参数3]&index=[地址参数4]
参数为:1-40即可
效果:培训机构学校有哪些_培训机构大全-找校通找校通培训机构频道提供全国培训机构的详细信息,包含学校简介、地址等关键信息,支持按地区筛选查询,是学生和家长寻找合适培训机构的便捷平台。http://www.6983.net/company/5/