py7zr解压文件时报错CrcError(crc32, f.crc32, f.filename)
报错信息
Traceback (most recent call last):File "/home/hp/project/test/file_util.py", line 130, in extract_archive_7zarchive.extract(targets=[fixed_file], path=output_dir, recursive=True)File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1025, in extractself._extract(path, targets, return_dict=False, recursive=recursive)File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 634, in _extractself.worker.extract(File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1266, in extractself.extract_single(File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1354, in extract_singleraise eFile "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1351, in extract_singleself._extract_single(fp, files, path, src_end, q, skip_notarget)File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1388, in _extract_singleself._check(fp, just_check, src_end)File "/home/hp/miniconda3/envs/celery/lib/python3.10/site-packages/py7zr/py7zr.py", line 1445, in _checkraise CrcError(crc32, f.crc32, f.filename)
py7zr.exceptions.CrcError: (599713723, 4127376704, 'valid/0001_JPEG.rf.9fcffd546df665288bd059e2975eb307.jpg')
代码如下:
def extract_archive_7z(archive_path: str, output_dir: str = ".", password: str = None):"""解压压缩文件使用 py7zr 解压 7z 格式压缩包,支持中文文件名。"""try:with py7zr.SevenZipFile(archive_path, mode='r', password=password) as archive:# 获取所有文件列表(用于进度条)all_files = archive.getnames()# 创建目标目录(若不存在)os.makedirs(output_dir, exist_ok=True)for file in tqdm(all_files, desc=f"解压 {os.path.basename(archive_path)}", unit="file"):fixed_file = fix_encoding(file)archive.extract(targets=[fixed_file], path=output_dir)logger.info(f"{archive_path} 解压完成")except Exception as e:logger.error(f"解压失败 {archive_path}: {e}", exc_info=True)
错误原因
py7zr 的archive.extract() 方法无法多次被调用
以下是修改后的代码:
def extract_archive_7z(archive_path: str, output_dir: str = ".", password: str = None):"""解压压缩文件使用 py7zr 解压 7z 格式压缩包,支持中文文件名。"""try:with py7zr.SevenZipFile(archive_path, mode='r', password=password) as archive:# 获取所有文件列表(用于进度条)all_files = archive.getnames()# 创建目标目录(若不存在)os.makedirs(output_dir, exist_ok=True)targets = []for file in tqdm(all_files, desc=f"解压 {os.path.basename(archive_path)}", unit="file"):targets = targets.append(file)archive.extract(targets=targets, path=output_dir) # 或使用extractAll()logger.info(f"{archive_path} 解压完成")except Exception as e:logger.error(f"解压失败 {archive_path}: {e}", exc_info=True)
参考:
https://github.com/miurahr/py7zr/issues/313