常用hook钩子函数
爬虫Hook技术常用字段和勾子函数
目录
- Hook技术概述
- 网络请求相关Hook
- 浏览器环境Hook
- JavaScript引擎Hook
- 加密算法Hook
- 反爬虫检测Hook
- 实际应用示例
- Hook工具和框架
Hook技术概述
Hook(钩子)技术是一种在程序运行时拦截和修改函数调用的技术。在爬虫中,Hook技术主要用于:
- 绕过反爬虫检测
- 获取加密参数
- 模拟真实浏览器行为
- 动态修改请求参数
网络请求相关Hook
1. XMLHttpRequest Hook
// 拦截XMLHttpRequest的open方法
(function() {const originalOpen = XMLHttpRequest.prototype.open;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Request:', {method: method,url: url,async: async,user: user,password: password});// 可以在这里修改请求参数if (url.includes('api.example.com')) {url = url.replace('api.example.com', 'api.hooked.com');}return originalOpen.call(this, method, url, async, user, password);};// 拦截send方法const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send Data:', data);// 可以在这里修改发送的数据if (data && typeof data === 'string') {data = data.replace('old_value', 'new_value');}return originalSend.call(this, data);};
})();
2. Fetch API Hook
// 拦截fetch方法
(function() {const originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url: url,options: options});// 修改请求头if (options && options.headers) {options.headers['X-Hooked'] = 'true';}return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};
})();
3. Axios Hook
// 拦截axios请求
(function() {if (window.axios) {// 请求拦截器window.axios.interceptors.request.use(function(config) {console.log('Axios Request Config:', config);// 修改请求头config.headers['X-Hooked'] = 'true';// 修改请求数据if (config.data) {config.data.hooked = true;}return config;}, function(error) {return Promise.reject(error);});// 响应拦截器window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;}, function(error) {console.log('Axios Error:', error);return Promise.reject(error);});}
})();
浏览器环境Hook
1. Navigator对象Hook
// Hook navigator.userAgent
(function() {const originalUserAgent = Object.getOwnPropertyDescriptor(Navigator.prototype, 'userAgent');Object.defineProperty(Navigator.prototype, 'userAgent', {get: function() {const userAgent = originalUserAgent.get.call(this);console.log('UserAgent accessed:', userAgent);// 返回修改后的userAgentreturn 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36';}});
})();// Hook navigator.platform
(function() {const originalPlatform = Object.getOwnPropertyDescriptor(Navigator.prototype, 'platform');Object.defineProperty(Navigator.prototype, 'platform', {get: function() {const platform = originalPlatform.get.call(this);console.log('Platform accessed:', platform);return 'Win32';}});
})();// Hook navigator.language
(function() {const originalLanguage = Object.getOwnPropertyDescriptor(Navigator.prototype, 'language');Object.defineProperty(Navigator.prototype, 'language', {get: function() {const language = originalLanguage.get.call(this);console.log('Language accessed:', language);return 'zh-CN';}});
})();
2. Screen对象Hook
// Hook screen.width和screen.height
(function() {const originalWidth = Object.getOwnPropertyDescriptor(Screen.prototype, 'width');const originalHeight = Object.getOwnPropertyDescriptor(Screen.prototype, 'height');Object.defineProperty(Screen.prototype, 'width', {get: function() {const width = originalWidth.get.call(this);console.log('Screen width accessed:', width);return 1920;}});Object.defineProperty(Screen.prototype, 'height', {get: function() {const height = originalHeight.get.call(this);console.log('Screen height accessed:', height);return 1080;}});
})();
3. Window对象Hook
// Hook window.innerWidth和window.innerHeight
(function() {const originalInnerWidth = Object.getOwnPropertyDescriptor(Window.prototype, 'innerWidth');const originalInnerHeight = Object.getOwnPropertyDescriptor(Window.prototype, 'innerHeight');Object.defineProperty(Window.prototype, 'innerWidth', {get: function() {const width = originalInnerWidth.get.call(this);console.log('Inner width accessed:', width);return 1366;}});Object.defineProperty(Window.prototype, 'innerHeight', {get: function() {const height = originalInnerHeight.get.call(this);console.log('Inner height accessed:', height);return 768;}});
})();
4. Document对象Hook
// Hook document.cookie
(function() {const originalCookie = Object.getOwnPropertyDescriptor(Document.prototype, 'cookie');Object.defineProperty(Document.prototype, 'cookie', {get: function() {const cookie = originalCookie.get.call(this);console.log('Cookie accessed:', cookie);return cookie;},set: function(value) {console.log('Cookie set:', value);return originalCookie.set.call(this, value);}});
})();// Hook document.referrer
(function() {const originalReferrer = Object.getOwnPropertyDescriptor(Document.prototype, 'referrer');Object.defineProperty(Document.prototype, 'referrer', {get: function() {const referrer = originalReferrer.get.call(this);console.log('Referrer accessed:', referrer);return 'https://www.google.com/';}});
})();
JavaScript引擎Hook
1. Date对象Hook
// Hook Date构造函数
(function() {const originalDate = Date;Date = function(...args) {console.log('Date constructor called with:', args);if (args.length === 0) {// 返回固定时间return new originalDate('2023-01-01T00:00:00.000Z');}return new originalDate(...args);};// 复制静态方法Date.now = originalDate.now;Date.parse = originalDate.parse;Date.UTC = originalDate.UTC;
})();// Hook Date.now()
(function() {const originalNow = Date.now;Date.now = function() {const now = originalNow();console.log('Date.now() called:', now);// 返回固定时间戳return 1672531200000; // 2023-01-01 00:00:00};
})();
2. Math对象Hook
// Hook Math.random()
(function() {const originalRandom = Math.random;Math.random = function() {const random = originalRandom();console.log('Math.random() called:', random);// 返回固定值或修改后的值return 0.5;};
})();// Hook Math.floor()
(function() {const originalFloor = Math.floor;Math.floor = function(x) {const result = originalFloor(x);console.log('Math.floor() called with:', x, 'result:', result);return result;};
})();
3. JSON对象Hook
// Hook JSON.stringify()
(function() {const originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify() called with:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify() result:', result);return result;};
})();// Hook JSON.parse()
(function() {const originalParse = JSON.parse;JSON.parse = function(text, reviver) {console.log('JSON.parse() called with:', text);const result = originalParse(text, reviver);console.log('JSON.parse() result:', result);return result;};
})();
加密算法Hook
1. Crypto API Hook
// Hook crypto.getRandomValues()
(function() {const originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('crypto.getRandomValues() called with:', array);const result = originalGetRandomValues.call(this, array);console.log('crypto.getRandomValues() result:', result);return result;};
})();// Hook crypto.subtle.digest()
(function() {const originalDigest = crypto.subtle.digest;crypto.subtle.digest = function(algorithm, data) {console.log('crypto.subtle.digest() called with:', {algorithm: algorithm,data: data});return originalDigest.call(this, algorithm, data).then(result => {console.log('crypto.subtle.digest() result:', result);return result;});};
})();
2. 常见加密库Hook
// Hook CryptoJS
(function() {if (window.CryptoJS) {// Hook MD5const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('CryptoJS.MD5() called with:', message);const result = originalMD5(message, options);console.log('CryptoJS.MD5() result:', result.toString());return result;};// Hook SHA256const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('CryptoJS.SHA256() called with:', message);const result = originalSHA256(message, options);console.log('CryptoJS.SHA256() result:', result.toString());return result;};// Hook AESconst originalAES = CryptoJS.AES;CryptoJS.AES = {encrypt: function(message, key, options) {console.log('CryptoJS.AES.encrypt() called with:', {message: message,key: key,options: options});const result = originalAES.encrypt(message, key, options);console.log('CryptoJS.AES.encrypt() result:', result.toString());return result;},decrypt: function(ciphertext, key, options) {console.log('CryptoJS.AES.decrypt() called with:', {ciphertext: ciphertext,key: key,options: options});const result = originalAES.decrypt(ciphertext, key, options);console.log('CryptoJS.AES.decrypt() result:', result.toString());return result;}};}
})();
反爬虫检测Hook
1. WebDriver检测Hook
// Hook webdriver属性
(function() {Object.defineProperty(navigator, 'webdriver', {get: function() {console.log('webdriver property accessed');return false; // 返回false表示不是webdriver}});
})();// Hook chrome对象
(function() {if (!window.chrome) {window.chrome = {runtime: {},loadTimes: function() {return {commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true};}};}
})();
2. 指纹检测Hook
// Hook canvas指纹
(function() {const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {console.log('Canvas toDataURL() called');const result = originalToDataURL.call(this, type, quality);// 可以在这里修改canvas指纹if (type === 'image/png') {// 返回固定的canvas指纹return '';}return result;};
})();// Hook WebGL指纹
(function() {const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {console.log('WebGL getParameter() called with:', parameter);const result = originalGetParameter.call(this, parameter);// 修改某些WebGL参数if (parameter === 37445) { // UNMASKED_VENDOR_WEBGLreturn 'Intel Inc.';}if (parameter === 37446) { // UNMASKED_RENDERER_WEBGLreturn 'Intel Iris OpenGL Engine';}return result;};
})();
3. 行为检测Hook
// Hook鼠标事件
(function() {const originalAddEventListener = EventTarget.prototype.addEventListener;EventTarget.prototype.addEventListener = function(type, listener, options) {console.log('addEventListener() called with:', {type: type,listener: listener,options: options});// 如果是鼠标事件,可以在这里添加随机延迟if (type.startsWith('mouse')) {const originalListener = listener;listener = function(event) {console.log('Mouse event triggered:', type, event);return originalListener.call(this, event);};}return originalAddEventListener.call(this, type, listener, options);};
})();// Hook键盘事件
(function() {const originalKeyEvent = KeyboardEvent.prototype;const originalKeyDown = originalKeyEvent.key;const originalKeyCode = originalKeyEvent.keyCode;Object.defineProperty(originalKeyEvent, 'key', {get: function() {const key = originalKeyDown.get.call(this);console.log('Keyboard key accessed:', key);return key;}});Object.defineProperty(originalKeyEvent, 'keyCode', {get: function() {const keyCode = originalKeyCode.get.call(this);console.log('Keyboard keyCode accessed:', keyCode);return keyCode;}});
})();
实际应用示例
1. 完整的反检测Hook脚本
// 完整的反检测Hook脚本
(function() {'use strict';console.log('Anti-detection hooks loaded');// 1. 隐藏webdriverObject.defineProperty(navigator, 'webdriver', {get: () => false});// 2. 修改userAgentObject.defineProperty(navigator, 'userAgent', {get: () => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'});// 3. 添加chrome对象if (!window.chrome) {window.chrome = {runtime: {},loadTimes: () => ({commitLoadTime: 0,connectionInfo: 'h2',finishDocumentLoadTime: 0,finishLoadTime: 0,firstPaintAfterLoadTime: 0,navigationType: 'Other',npnNegotiatedProtocol: 'h2',requestTime: 0,startLoadTime: 0,wasAlternateProtocolAvailable: false,wasFetchedViaSpdy: true,wasNpnNegotiated: true})};}// 4. Hook canvas指纹const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;HTMLCanvasElement.prototype.toDataURL = function(type, quality) {if (type === 'image/png') {return '';}return originalToDataURL.call(this, type, quality);};// 5. Hook WebGL指纹const originalGetParameter = WebGLRenderingContext.prototype.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) return 'Intel Inc.';if (parameter === 37446) return 'Intel Iris OpenGL Engine';return originalGetParameter.call(this, parameter);};// 6. Hook Math.randomconst originalRandom = Math.random;Math.random = function() {const random = originalRandom();// 可以在这里添加随机性return random;};// 7. Hook Date.nowconst originalNow = Date.now;Date.now = function() {const now = originalNow();// 可以在这里添加时间偏移return now;};console.log('Anti-detection hooks completed');
})();
2. 加密参数Hook脚本
// 加密参数Hook脚本
(function() {'use strict';console.log('Encryption parameter hooks loaded');// Hook CryptoJSif (window.CryptoJS) {const originalMD5 = CryptoJS.MD5;CryptoJS.MD5 = function(message, options) {console.log('MD5 input:', message);const result = originalMD5(message, options);console.log('MD5 output:', result.toString());return result;};const originalSHA256 = CryptoJS.SHA256;CryptoJS.SHA256 = function(message, options) {console.log('SHA256 input:', message);const result = originalSHA256(message, options);console.log('SHA256 output:', result.toString());return result;};}// Hook crypto APIconst originalGetRandomValues = crypto.getRandomValues;crypto.getRandomValues = function(array) {console.log('getRandomValues input:', array);const result = originalGetRandomValues.call(this, array);console.log('getRandomValues output:', result);return result;};// Hook JSON.stringifyconst originalStringify = JSON.stringify;JSON.stringify = function(value, replacer, space) {console.log('JSON.stringify input:', value);const result = originalStringify(value, replacer, space);console.log('JSON.stringify output:', result);return result;};console.log('Encryption parameter hooks completed');
})();
3. 网络请求Hook脚本
// 网络请求Hook脚本
(function() {'use strict';console.log('Network request hooks loaded');// Hook XMLHttpRequestconst originalOpen = XMLHttpRequest.prototype.open;const originalSend = XMLHttpRequest.prototype.send;XMLHttpRequest.prototype.open = function(method, url, async, user, password) {console.log('XHR Open:', {method, url, async, user, password});return originalOpen.call(this, method, url, async, user, password);};XMLHttpRequest.prototype.send = function(data) {console.log('XHR Send:', data);return originalSend.call(this, data);};// Hook fetchconst originalFetch = window.fetch;window.fetch = function(url, options) {console.log('Fetch Request:', {url, options});return originalFetch.call(this, url, options).then(response => {console.log('Fetch Response:', response);return response;});};// Hook axiosif (window.axios) {window.axios.interceptors.request.use(function(config) {console.log('Axios Request:', config);return config;});window.axios.interceptors.response.use(function(response) {console.log('Axios Response:', response);return response;});}console.log('Network request hooks completed');
})();
Hook工具和框架
1. 浏览器扩展
- Tampermonkey: 用户脚本管理器
- Greasemonkey: Firefox用户脚本管理器
- Violentmonkey: 现代化的用户脚本管理器
2. 代理工具
- Fiddler: 网络调试代理
- Charles: 网络代理工具
- Burp Suite: Web应用安全测试工具
3. 浏览器自动化
- Puppeteer: Node.js浏览器自动化
- Selenium: 浏览器自动化框架
- Playwright: 现代化的浏览器自动化
4. 移动端Hook
- Frida: 动态插桩工具
- Xposed: Android框架Hook
- Substrate: iOS越狱Hook框架
本文档提供了爬虫中常用的Hook技术和勾子函数,建议在实际使用中根据具体需求进行调整和优化。请注意遵守相关法律法规和网站使用条款。