萝岗区营销型网站建设网页模板免费版
抖音视频下载工具
功能介绍
这是一个基于Python开发的抖音视频下载工具,可以方便地下载抖音平台上的视频内容。
主要特点
- 支持无水印视频下载
- 自动提取视频标题作为文件名
- 显示下载进度条
- 支持自动重试机制
- 支持调试模式
使用要求
- Python 3.10+
- Chrome浏览器
- 必要的Python包:
- selenium
- requests
- tqdm
- webdriver_manager(可选)
安装依赖
# 核心依赖
requests>=2.31.0
beautifulsoup4==4.12.2
lxml==4.9.3
urllib3>=2.1.0 # URL处理和安全连接
selenium>=4.18.1
pip install selenium requests tqdm webdriver_manager
使用方法
- 直接运行脚本:
python 抖音视频下载工具.py
- 作为模块导入:
from 抖音视频下载工具 import DouyinDownloaderdownloader = DouyinDownloader()
url = "你的抖音视频链接"
main(url)
参数说明
download_dir
: 下载目录,默认为"downloads"max_retries
: 最大重试次数,默认为3debug
: 是否开启调试模式,默认为False
注意事项
- 确保系统已安装Chrome浏览器
- 需要稳定的网络连接
- 部分视频可能因为权限设置无法下载
- 建议不要频繁下载,以免被限制
常见问题
- 如果出现ChromeDriver相关错误,请确保Chrome浏览器版本与ChromeDriver版本匹配
- 如果下载失败,可以尝试增加重试次数或开启调试模式查看详细错误信息
代码实现
import tracebackimport requests
import re
import json
import os
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from urllib.parse import unquote
import logging
import argparse
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdmclass DouyinDownloader:def __init__(self, download_dir="downloads", max_retries=3, debug=False):"""初始化抖音下载器Args:download_dir (str): 下载目录max_retries (int): 最大重试次数debug (bool): 是否开启调试模式"""self.download_dir = download_dirself.max_retries = max_retriesself.debug = debugself.setup_logging()self.setup_chrome()if not os.path.exists(download_dir):os.makedirs(download_dir)def setup_logging(self):"""设置日志"""if self.debug:logging.basicConfig(level=logging.DEBUG,format='%(asctime)s - %(levelname)s - %(message)s',handlers=[logging.FileHandler('douyin_downloader.log'),logging.StreamHandler()])else:logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s - %(message)s',handlers=[logging.StreamHandler()])self.logger = logging.getLogger(__name__)def setup_chrome(self):"""设置Chrome浏览器"""chrome_options = Options()chrome_options.add_argument('--headless') # 开启无头模式chrome_options.add_argument('--disable-gpu')chrome_options.add_argument('--no-sandbox')chrome_options.add_argument('--disable-dev-shm-usage')chrome_options.add_argument('--disable-blink-features=AutomationControlled')chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])chrome_options.add_experimental_option('useAutomationExtension', False)self.driver = webdriver.Chrome(options=chrome_options)self.driver.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'})def __del__(self):"""析构函数,确保关闭浏览器"""if hasattr(self, 'driver'):self.driver.quit()def _save_debug_file(self, content, filename, message=""):"""保存调试文件Args:content: 要保存的内容filename: 文件名message: 提示信息"""if self.debug:try:with open(filename, 'w', encoding='utf-8') as f:f.write(content)if message:print(message)except Exception as e:print(f"保存调试文件 {filename} 失败: {str(e)}")def download_webpage(self, url):"""使用Selenium下载抖音视频页面"""try:print("正在打开网页...")self.driver.get(url)# 等待页面加载print("等待页面加载...")time.sleep(1)# 获取当前URLcurrent_url = self.driver.current_urlprint(f"最终URL: {current_url}")# 提取视频IDvideo_id = self._extract_video_id(current_url)if not video_id:print("无法提取视频ID")return None# 等待页面完全加载print("等待页面完全加载...")time.sleep(1)# 尝试提取页面数据print("尝试提取页面数据...")try:methods = [self._try_get_video_data_from_render_data,self._try_get_video_data_from_hydration,self._try_get_video_data_from_player,self._try_get_video_data_from_element]for method in methods:try:data = method()if data:return dataexcept Exception as e:if self.debug:print(f"方法 {method.__name__} 失败: {str(e)}")continueprint("所有提取方法都失败了")if self.debug:self._save_debug_file(self.driver.page_source,'page_source.html',"页面源代码已保存到page_source.html(用于调试)")return Noneexcept Exception as e:print(f"提取页面数据失败: {str(e)}")if self.debug:self._save_debug_file(self.driver.page_source,'page_source.html',"页面源代码已保存到page_source.html(用于调试)")print("详细错误信息:")print(traceback.format_exc())return Noneexcept Exception as e:print(f"下载网页时出错: {str(e)}")if self.debug:try:self._save_debug_file(self.driver.page_source,'page_source.html',"页面源代码已保存到page_source.html(用于调试)")except:print("保存页面源代码失败")print("详细错误信息:")print(traceback.format_exc())return Nonedef _try_get_video_data_from_render_data(self):"""尝试从RENDER_DATA获取视频数据"""script = """var renderData = null;try {// 方法1:直接从SSR_HYDRATED_DATA获取if (window.SSR_HYDRATED_DATA) {return JSON.stringify(window.SSR_HYDRATED_DATA);}// 方法2:从__NEXT_DATA__获取var nextDataElement = document.getElementById('__NEXT_DATA__');if (nextDataElement) {return nextDataElement.textContent;}// 方法3:从script标签中查找var scripts = document.getElementsByTagName('script');for (var i = 0; i < scripts.length; i++) {var content = scripts[i].textContent || '';if (content.includes('"video"') && content.includes('"play_addr"')) {return content;}}} catch (e) {console.log('获取数据时出错:', e);}return null;"""data = self.driver.execute_script(script)if data:print("找到页面数据")try:if self.debug:self._save_debug_file(data, 'raw_page_data.txt', "已保存原始数据到raw_page_data.txt")try:json_data = json.loads(data)if self.debug:self._save_debug_file(json.dumps(json_data, ensure_ascii=False, indent=2),'parsed_data.json',"已保存解析后的数据到parsed_data.json")return json.dumps(json_data)except:json_pattern = r'({[^{]*?"video"[^}]*?})'matches = re.finditer(json_pattern, data)for match in matches:try:json_str = match.group(1)json.loads(json_str) # 验证是否为有效JSONreturn json_strexcept:continueexcept Exception as e:if self.debug:print(f"处理页面数据时出错: {str(e)}")print("详细错误信息:")print(traceback.format_exc())return Nonedef _try_get_video_data_from_hydration(self):"""尝试从__HYDRA_DATA__获取视频数据"""script = """if (window.__HYDRA_DATA__) {return JSON.stringify(window.__HYDRA_DATA__);}return null;"""data = self.driver.execute_script(script)if data:print("从HYDRA_DATA中找到数据")return datareturn Nonedef _try_get_video_data_from_player(self):"""尝试从播放器获取视频数据"""script = """try {// 查找视频元素var videoElement = document.querySelector('video');if (videoElement && videoElement.src) {var videoData = {video_data: {nwm_video_url: videoElement.src}};// 尝试获取视频标题var title = document.title || '';if (!title) {var titleElement = document.querySelector('title, .video-title, .title, .desc, [data-e2e="video-desc"]');if (titleElement) {title = titleElement.textContent.trim();}}if (title) {videoData.desc = title;}return JSON.stringify(videoData);}// 如果没有找到视频元素,尝试从source标签获取var sourceElement = document.querySelector('source[src*="http"]');if (sourceElement && sourceElement.src) {var sourceData = {video_data: {nwm_video_url: sourceElement.src},desc: document.title || ''};return JSON.stringify(sourceData);}} catch (e) {console.log('获取视频数据时出错:', e);}return null;"""data = self.driver.execute_script(script)if data:print("从播放器中找到数据")return datareturn Nonedef _try_get_video_data_from_element(self):"""尝试从视频元素直接获取数据"""try:video_element = WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "video")))video_url = video_element.get_attribute('src')if video_url:print("从视频元素中找到数据")return json.dumps({'video_data': {'nwm_video_url': video_url},'desc': self._get_video_title()})except:passreturn Nonedef _get_video_title(self):"""获取视频标题"""try:# 首先尝试从title标签获取title = self.driver.titleif title:return title# 如果title为空,尝试其他选择器selectors = ['title', # title标签'.video-title', # 视频标题类'.desc', # 描述类'[data-e2e="video-desc"]', # 抖音特定属性'.title', # 通用标题类]for selector in selectors:try:element = self.driver.find_element(By.CSS_SELECTOR, selector)if element and element.text.strip():return element.text.strip()except:continue# 如果还是没找到,尝试从页面源码中直接提取titlematch = re.search(r'<title[^>]*>(.*?)</title>', self.driver.page_source)if match:return match.group(1)except Exception as e:if self.debug:print(f"获取视频标题时出错: {str(e)}")return '未命名视频'def extract_video_info(self, json_str):"""从JSON响应中提取视频信息"""try:print("开始解析视频信息...")if not json_str:print("输入的JSON字符串为空")return Noneprint(f"JSON字符串长度: {len(json_str)}")print("JSON字符串前100个字符:", json_str[:100])try:data = json.loads(json_str)except json.JSONDecodeError as e:print(f"JSON解析错误: {str(e)}")print("尝试修复JSON数据...")# 尝试提取JSON部分json_pattern = r'({[^{]*?"video"[^}]*?})'match = re.search(json_pattern, json_str)if match:json_str = match.group(1)data = json.loads(json_str)if self.debug:# 保存完整的JSON数据用于调试with open('debug_response.json', 'w', encoding='utf-8') as f:json.dump(data, f, ensure_ascii=False, indent=2)print("已保存完整响应到debug_response.json")video_data = {'desc': '未命名视频','create_time': str(int(time.time())),'video_urls': []}# 处理不同的数据格式if isinstance(data, dict):# 直接获取的视频URL格式if 'video_data' in data and 'nwm_video_url' in data['video_data']:video_data['desc'] = data.get('desc', '未命名视频')video_data['video_urls'].append(data['video_data']['nwm_video_url'])return video_data# 遍历所有可能包含视频信息的字段video_url = self._find_video_url(data)if video_url:video_data['video_urls'].append(video_url)desc = self._find_video_desc(data)if desc:video_data['desc'] = descif video_data['video_urls']:return video_dataprint("无法从响应中提取视频信息")print("数据结构:", json.dumps(data, indent=2, ensure_ascii=False)[:500])return Noneexcept Exception as e:print(f"解析视频信息时出错: {str(e)}")import tracebackprint("详细错误信息:")print(traceback.format_exc())return Nonedef _find_video_url(self, data):"""递归查找视频URL"""if isinstance(data, dict):# 检查常见的视频URL字段url_fields = ['playApi', 'playAddr', 'downloadAddr', 'video_url', 'nwm_video_url']for field in url_fields:if field in data:url = data[field]if isinstance(url, str) and url.startswith('http'):return urlelif isinstance(url, dict) and 'url_list' in url:urls = url['url_list']if urls and isinstance(urls, list):return urls[0]# 递归搜索for value in data.values():result = self._find_video_url(value)if result:return resultelif isinstance(data, list):for item in data:result = self._find_video_url(item)if result:return resultreturn Nonedef _find_video_desc(self, data):"""递归查找视频描述"""if isinstance(data, dict):if 'desc' in data and isinstance(data['desc'], str):return data['desc']# 递归搜索for value in data.values():result = self._find_video_desc(value)if result:return resultelif isinstance(data, list):for item in data:result = self._find_video_desc(item)if result:return resultreturn Nonedef _clean_title(self, title):"""清理视频标题Args:title (str): 原始标题Returns:str: 清理后的标题"""# 移除HTML标签title = re.sub(r'<[^>]+>', '', title)# 移除抖音常见的标签(#xxx)title = re.sub(r'#[^ ]+', '', title)# 移除" - 抖音"后缀title = re.sub(r' *- *抖音.*$', '', title)# 移除特殊字符和空格title = re.sub(r'[\\/*?:"<>|]', '', title)title = re.sub(r'\s+', '', title)# 如果标题为空,使用默认标题if not title:title = '未命名视频'return titledef download_video(self, video_url, title, retry_count=0):"""下载视频文件Args:video_url (str): 视频URLtitle (str): 视频标题retry_count (int): 当前重试次数Returns:bool: 下载是否成功"""try:# 清理标题title = self._clean_title(title)print(f"处理后的文件名: {title}")filepath = os.path.join(self.download_dir, f"{title}.mp4")# 如果文件已存在,添加数字后缀base_filepath = filepathcounter = 1while os.path.exists(filepath):filename, ext = os.path.splitext(base_filepath)filepath = f"{filename}_{counter}{ext}"counter += 1headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36','Referer': 'https://www.douyin.com/'}response = requests.get(video_url, headers=headers, stream=True)response.raise_for_status()# 获取文件大小total_size = int(response.headers.get('content-length', 0))with open(filepath, 'wb') as f, tqdm(desc=os.path.basename(filepath),total=total_size,unit='iB',unit_scale=True,unit_divisor=1024,) as pbar:for data in response.iter_content(chunk_size=1024):size = f.write(data)pbar.update(size)self.logger.info(f"视频已保存到: {filepath}")return Trueexcept Exception as e:self.logger.error(f"下载视频时出错: {str(e)}")if retry_count < self.max_retries:self.logger.info(f"正在进行第{retry_count + 1}次重试...")time.sleep(1) # 等待2秒后重试return self.download_video(video_url, title, retry_count + 1)return Falsedef _extract_video_id(self, url):"""从URL中提取视频ID"""patterns = [r'/video/(\d+)',r'item_ids=(\d+)',r'aweme_id=(\d+)']for pattern in patterns:match = re.search(pattern, url)if match:video_id = match.group(1)print(f"提取到视频ID: {video_id}")return video_idreturn Nonedef main(url):try:# 创建下载器实例downloader = DouyinDownloader(download_dir="downloads",max_retries=3,debug=False # 默认关闭调试模式)print(f"正在处理链接: {url}")json_str = downloader.download_webpage(url)if json_str:video_info = downloader.extract_video_info(json_str)if video_info and video_info['video_urls']:print(f"视频标题: {video_info['desc']}")video_url = video_info['video_urls'][0]success = downloader.download_video(video_url, video_info['desc'])if success:print("下载成功!")else:print("下载失败!")else:print("无法获取视频下载地址")else:print("下载网页失败")except Exception as e:print(f"程序执行出错: {str(e)}")finally:if 'downloader' in locals():del downloaderif __name__ == "__main__":# 使用固定的测试链接url = 'https://v.douyin.com/Fw35vv97K4s/'main(url)