基于python实现的高效文件压缩工具:Zstandard、LZ4、Brotli 一站式解决方案
文件压缩已成为提升存储效率、加速网络传输的核心技术。本文介绍一款基于python与Streamlit库构建的多功能压缩工具,支持 Zstandard、LZ4、Brotli 等主流算法,满足不同场景下的性能需求。
多算法支持Zstandard
(Zstd):压缩比与速度的黄金平衡
LZ4:极致的压缩速度(最高可达 500MB/s)
Brotli:Web 压缩标准(适合文本/HTML 压缩)
Gzip:兼容性最强的通用压缩算法
可以自动识别之前压缩的压缩格式(.zst/.lz4/.gz/.br),进行解压缩,实时压缩时间的一个大概估算,可以尝试调整压缩等级进行控制压缩文件的大小。
大概的使用流程:
步骤1:上传文件
支持任意格式文件(文档/图片/视频等)
实时显示文件大小(精确到 KB 级)
步骤2:算法选择
Zstandard:推荐压缩级别 3-6(平衡性能)
LZ4:压缩级别 1(极速)或 9(高压缩)
Brotli:压缩级别 11(最高质量)
Gzip:压缩级别 6(推荐)
步骤3:执行压缩
实时进度条(显示压缩进度)
压缩比动态显示(百分比计算)
import streamlit as st
import pyzstd
import lz4.frame
import gzip
import brotli
import io
import os
import time
from pathlib import Path
import math
import humanize# 设置页面标题和图标
st.set_page_config(page_title="高级文件压缩工具箱",page_icon="📦",layout="wide"
)# 压缩算法详细信息
COMPRESSION_INFO = {"Zstandard": {"extension": "zst","description": "Zstandard (Zstd) 是Facebook开发的高压缩比算法,提供极高的压缩和解压速度","recommended_levels": "1-22 (推荐3-5)","best_for": "大型文件,需要快速压缩和解压的场景","default_level": 3,"min_level": 1,"max_level": 22},"LZ4": {"extension": "lz4","description": "LZ4 是速度最快的压缩算法,提供极快的压缩和解压速度","recommended_levels": "1-16 (推荐1-4)","best_for": "实时数据流,需要极速压缩的场景","default_level": 1,"min_level": 1,"max_level": 16},"Gzip": {"extension": "gz","description": "Gzip 是应用最广泛的压缩算法,兼容性强","recommended_levels": "0-9 (推荐6-9)","best_for": "网络传输,需要高兼容性的场景","default_level": 6,"min_level": 0,"max_level": 9},"Brotli": {"extension": "br","description": "Brotli 是Google开发的算法,提供最高的压缩比","recommended_levels": "0-11 (推荐9-11)","best_for": "网页内容,需要最大压缩比的场景","default_level": 9,"min_level": 0,"max_level": 11}
}# 侧边栏说明
st.sidebar.title("📦 高级文件压缩工具箱")
st.sidebar.markdown("""
## 支持的压缩格式:
- Zstandard (.zst)
- LZ4 (.lz4)
- Gzip (.gz)
- Brotli (.br)
""")
st.sidebar.markdown("""
## 使用提示:
1. 压缩大型文件(100MB+)时建议使用Zstd
2. 需要最高压缩比时使用Brotli
3. 需要最快速度时使用LZ4
4. 需要兼容性时使用Gzip
""")def get_safe_level(algorithm, level):"""确保压缩级别在有效范围内"""min_level = COMPRESSION_INFO[algorithm]["min_level"]max_level = COMPRESSION_INFO[algorithm]["max_level"]return max(min_level, min(max_level, level))def compress_zstandard(data, level=3):"""使用Zstandard压缩数据"""safe_level = get_safe_level("Zstandard", level)return pyzstd.compress(data, safe_level)def decompress_zstandard(data):"""使用Zstandard解压数据"""return pyzstd.decompress(data)def compress_lz4(data, level=1):"""使用LZ4压缩数据"""safe_level = get_safe_level("LZ4", level)return lz4.frame.compress(data, compression_level=safe_level)def decompress_lz4(data):"""使用LZ4解压数据"""return lz4.frame.decompress(data)def compress_gzip(data, level=6):"""使用Gzip压缩数据"""safe_level = get_safe_level("Gzip", level)buf = io.BytesIO()with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=safe_level) as f:f.write(data)return buf.getvalue()def decompress_gzip(data):"""使用Gzip解压数据"""with gzip.GzipFile(fileobj=io.BytesIO(data)) as f:return f.read()def compress_brotli(data, level=9):"""使用Brotli压缩数据"""safe_level = get_safe_level("Brotli", level)return brotli.compress(data, quality=safe_level)def decompress_brotli(data):"""使用Brotli解压数据"""return brotli.decompress(data)def get_extension(filename):"""获取文件扩展名(不带点)"""return Path(filename).suffix[1:].lower()def get_mime_type(filename):"""根据文件扩展名获取MIME类型"""extension = Path(filename).suffix.lower()if extension in ['.txt', '.csv', '.log', '.json']:return 'text/plain'elif extension in ['.jpg', '.jpeg']:return 'image/jpeg'elif extension == '.png':return 'image/png'elif extension == '.pdf':return 'application/pdf'elif extension == '.zip':return 'application/zip'else:return 'application/octet-stream'def process_file(uploaded_file, action, algorithm=None, level=None, original_filename=None):"""处理文件压缩/解压操作"""file_data = uploaded_file.getvalue()file_size = len(file_data)max_file_size = 200 * 1024 * 1024 # 200MB# 显示处理进度progress_bar = st.progress(0)status_text = st.empty()# 大文件处理if file_size > max_file_size:status_text.warning(f"文件较大({humanize.naturalsize(file_size)}),处理可能需要较长时间...")def update_progress(percent):progress_bar.progress(percent)if percent < 100:status_text.text(f"处理中: {percent:.0f}%")# 处理压缩操作if action == "压缩":output_filename = f"{Path(uploaded_file.name).stem}.{COMPRESSION_INFO[algorithm]['extension']}"# 估算压缩时间和大小(基于文件大小和算法)estimation_factor = {"Zstandard": 0.0001 * level + 0.00005,"LZ4": 0.00003 * level + 0.00001,"Gzip": 0.0002 * level + 0.0001,"Brotli": 0.0005 * level + 0.0002}estimated_time = file_size * estimation_factor[algorithm]st.info(f"预计压缩时间: {estimated_time:.2f}秒")start_time = time.time()if algorithm == "Zstandard":processed_data = compress_zstandard(file_data, level)elif algorithm == "LZ4":processed_data = compress_lz4(file_data, level)elif algorithm == "Gzip":processed_data = compress_gzip(file_data, level)elif algorithm == "Brotli":processed_data = compress_brotli(file_data, level)else:raise ValueError(f"不支持的格式: {algorithm}")end_time = time.time()actual_time = end_time - start_time# 在压缩数据前加上原始文件名header = f"ORIGINAL_FILENAME:{uploaded_file.name}\n".encode('utf-8')processed_data = header + processed_datacompression_ratio = (1 - len(processed_data) / file_size) * 100return output_filename, processed_data, actual_time, compression_ratio, None# 处理解压操作elif action == "解压":# 检查是否有原始文件名信息filename_header = b"ORIGINAL_FILENAME:"if file_data.startswith(filename_header):header_end = file_data.find(b"\n")original_filename = file_data[len(filename_header):header_end].decode('utf-8')file_data = file_data[header_end+1:]file_ext = get_extension(uploaded_file.name)start_time = time.time()if file_ext == "zst":processed_data = decompress_zstandard(file_data)elif file_ext == "lz4":processed_data = decompress_lz4(file_data)elif file_ext == "gz":processed_data = decompress_gzip(file_data)elif file_ext == "br":processed_data = decompress_brotli(file_data)else:st.error("无法检测压缩格式!")return None, None, None, None, Noneend_time = time.time()actual_time = end_time - start_time# 使用原始文件名(如果存在)或生成新文件名if original_filename:output_filename = original_filenameelse:# 移除所有已知压缩后缀base_name = Path(uploaded_file.name).stemwhile base_name.lower().endswith(('.zst', '.lz4', '.gz', '.br')):base_name = Path(base_name).stemoutput_filename = base_namereturn output_filename, processed_data, actual_time, None, len(processed_data)def main():st.title("📦 高级文件压缩/解压工具箱")# 创建标签页tab_compress, tab_decompress, tab_info = st.tabs(["压缩文件", "解压文件", "压缩算法说明"])# 算法信息页面with tab_info:st.subheader("压缩算法对比指南")cols = st.columns(4)for i, (algo, info) in enumerate(COMPRESSION_INFO.items()):with cols[i]:st.markdown(f"### {algo}")st.markdown(f"**文件后缀**: .{info['extension']}")st.markdown(f"**压缩级别**: {info['recommended_levels']}")st.markdown(f"**最佳适用**: {info['best_for']}")st.info(info['description'])st.subheader("性能对比")st.image("https://raw.githubusercontent.com/facebook/zstd/dev/doc/images/Cspeed2.png", caption="Zstandard压缩性能对比", width=600)st.image("https://cran.r-project.org/web/packages/brotli/vignettes/compression-comparison.png",caption="Brotli压缩比对比", width=600)st.markdown("""**性能总结**:- ⚡ **最快速度**: LZ4- 🗜️ **最高压缩比**: Brotli (高质量)- ⚖️ **最佳平衡**: Zstandard- 🔌 **最广泛兼容**: Gzip""")# 压缩页面with tab_compress:st.subheader("压缩文件")with st.expander("上传说明", expanded=True):st.info("1. 上传需要压缩的文件(支持任何格式)\n2. 选择压缩算法和压缩级别\n3. 点击压缩按钮")uploaded_file = st.file_uploader("选择要压缩的文件", type=None, key="compress")if uploaded_file:file_size = len(uploaded_file.getvalue())st.success(f"已上传文件: {uploaded_file.name} ({humanize.naturalsize(file_size)})")# 选择压缩算法algorithm = st.selectbox("选择压缩算法",list(COMPRESSION_INFO.keys()),key="algo_select",help="选择合适的压缩算法:LZ4速度最快,Brotli压缩比最高")# 显示算法详情algo_info = COMPRESSION_INFO[algorithm]with st.expander(f"{algorithm}算法详情", expanded=True):st.markdown(f"**描述**: {algo_info['description']}")st.markdown(f"**推荐压缩级别**: {algo_info['recommended_levels']}")st.markdown(f"**最佳适用场景**: {algo_info['best_for']}")# 压缩级别滑块level = st.slider("压缩级别", algo_info["min_level"], algo_info["max_level"], value=algo_info["default_level"],key="comp_level",help="级别越高压缩比越大,但需要更多时间和资源")# 压缩估算if st.button("估算压缩效果", key="estimate"):# 简化的估算逻辑(实际压缩比会因文件类型而异)estimation_ratios = {"Zstandard": max(5, 100 - level * 3),"LZ4": max(10, 100 - level * 2),"Gzip": max(15, 100 - level * 4),"Brotli": max(20, 100 - level * 5)}estimated_ratio = estimation_ratios[algorithm]estimated_size = math.ceil(file_size * (estimated_ratio/100))st.info(f"估算压缩效果:压缩到 {humanize.naturalsize(estimated_size)} (压缩比约{100-estimated_ratio:.1f}%)")# 开始压缩if st.button("开始压缩", key="compress_btn", type="primary"):output_filename, processed_data, comp_time, comp_ratio, _ = process_file(uploaded_file, "压缩", algorithm=algorithm, level=level)if processed_data:comp_size = len(processed_data)st.success(f"文件压缩成功!耗时: {comp_time:.2f}秒")col1, col2 = st.columns(2)col1.metric("原始大小", humanize.naturalsize(file_size))col2.metric("压缩后大小", humanize.naturalsize(comp_size), f"{file_size - comp_size}减少", delta_color="inverse")st.info(f"压缩比: {comp_ratio:.2f}%")st.download_button(label="下载压缩文件",data=processed_data,file_name=output_filename,mime=get_mime_type(output_filename))# 解压页面with tab_decompress:st.subheader("解压文件")with st.expander("上传说明", expanded=True):st.info("1. 上传之前通过本工具压缩的文件(自动恢复原始文件名)\n2. 也支持其他.zst/.lz4/.gz/.br文件")uploaded_file = st.file_uploader("选择要解压的文件", type=["zst", "lz4", "gz", "br"], key="decompress")if uploaded_file:file_ext = get_extension(uploaded_file.name)st.info(f"文件格式: {file_ext.upper()}")if st.button("开始解压", key="decompress_btn", type="primary"):output_filename, processed_data, decomp_time, _, decomp_size = process_file(uploaded_file, "解压")if processed_data:orig_size = len(uploaded_file.getvalue())st.success(f"文件解压成功!耗时: {decomp_time:.2f}秒")col1, col2 = st.columns(2)col1.metric("压缩大小", humanize.naturalsize(orig_size))col2.metric("解压后大小", humanize.naturalsize(decomp_size), f"{decomp_size - orig_size}增加")st.download_button(label="下载解压文件",data=processed_data,file_name=output_filename,mime=get_mime_type(output_filename))if __name__ == "__main__":main()