爬虫学习-Scrape Center spa6 超简单 JS 逆向
关卡
spa6
电影数据网站,无反爬,数据通过 Ajax 加载,数据接口参数加密且有时间限制,适合动态页面渲染爬取或 JavaScript 逆向分析。
首先抓包发现get请求的参数token有加密。 offset表示翻页,limit表示每一页有多少条数据。
抓到加密 Token 之后,就要找出 Token 值是怎么生成的,
搜索token:看js代码在哪里,点进看看
打断点调试
可以看Object(_0x2fa7bd['a'])是个函数这里就是加密函数点进去看看
在打断点调试看看,这里混淆了,看不懂就去解混淆,把代码扣下来
js
const CryptoJS=require('crypto-js');function _0x456254() {for (var _0x5da681 = Math['round'](new Date()['getTime']() / 1000)['toString'](), _0x2a83dd = arguments['length'], _0x31a891 = new Array(_0x2a83dd), _0x596a02 = 0; _0x596a02 < _0x2a83dd; _0x596a02++) {_0x31a891[_0x596a02] = arguments[_0x596a02];}_0x31a891 =['/api/movie'];_0x31a891['push'](_0x5da681);var _0xf7c3c7 = CryptoJS['SHA1'](_0x31a891['join'](','))['toString'](CryptoJS['enc']['Hex']),_0x3c8435 = [_0xf7c3c7, _0x5da681]['join'](','),// _0x104b5b = _0x358b1f['encode'](_0x3c8435);base64Encoded = CryptoJS.enc.Utf8.parse(_0x3c8435).toString(CryptoJS.enc.Base64);return base64Encoded;
}console.log(_0x456254())
python
import requests
import execjs
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',}for offset in range(0,100,10):js_da=execjs.compile(open('5.js','r',encoding='utf-8').read()).call('_0x456254')print(js_da)params = {'limit': '10','offset':offset ,'token': js_da,}response = requests.get('https://spa6.scrape.center/api/movie/', params=params, headers=headers)print(response.text)
运行结果
结束