当前位置: 首页 > news >正文

利用DeepSeek编写一个使用lzav算法的文件压缩工具

LZAV是Aleksey Vaneev开发的一种嵌入式压缩算法,内存中压缩速度和压缩率在LZ4和ZSTD之间,我让DeepSeek编写了一个用于文件压缩的工具,并通过在文件头处保存源文件大小解决了解压缩需要了解源文件大小的问题。
为了提供不同的压缩级别,引入了-l参数,1表示默认,非1表示深度压缩。
代码如下:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "lzav.h"#define MAX_FILENAME 256void print_help() {printf("lzavcli - LZAV Compression Tool (with size header)\n");printf("Usage: lzavcli [options] <input_file>\n");printf("Options:\n");printf("  -l <level>  compress level (default: 1,high: other)\n");printf("  -o <file>   Output file (default: <input>.lzav)\n");printf("  -d          Decompress mode\n");printf("  -h          Show this help\n");
}int compress_lzav(const char* input_path, const char* output_path, int level) {clock_t start = clock();FILE* fin = fopen(input_path, "rb");if (!fin) {fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);return -1;}// 获取原始文件大小fseek(fin, 0, SEEK_END);uint32_t src_len = (uint32_t)ftell(fin);fseek(fin, 0, SEEK_SET);// 读取原始数据void* src_buf = malloc(src_len);if (!src_buf) {fclose(fin);fprintf(stderr, "Error: Memory allocation failed\n");return -1;}if (fread(src_buf, 1, src_len, fin) != src_len) {fclose(fin);free(src_buf);fprintf(stderr, "Error: Failed to read input file\n");return -1;}fclose(fin);// 计算压缩后大小并分配缓冲区size_t max_len = (level==1?lzav_compress_bound(src_len):lzav_compress_bound_hi(src_len))+32;void* comp_buf = malloc(max_len);if (!comp_buf) {free(src_buf);fprintf(stderr, "Error: Memory allocation failed\n");return -1;}// 执行压缩int comp_len = level==1?lzav_compress_default(src_buf, comp_buf, src_len, max_len):lzav_compress_hi(src_buf, comp_buf, src_len, max_len);if (comp_len == 0 && src_len != 0) {free(src_buf);free(comp_buf);fprintf(stderr, "Error: Compression failed\n");return -1;}// 写入输出文件(包含4字节原始大小头)FILE* fout = fopen(output_path, "wb");if (!fout) {free(src_buf);free(comp_buf);fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);return -1;}// 写入原始大小(4字节小端格式)uint8_t header[4] = {(uint8_t)(src_len & 0xFF),(uint8_t)((src_len >> 8) & 0xFF),(uint8_t)((src_len >> 16) & 0xFF),(uint8_t)((src_len >> 24) & 0xFF)};fwrite(header, 1, 4, fout);// 写入压缩数据fwrite(comp_buf, 1, comp_len, fout);fclose(fout);clock_t end = clock();printf("Compressed %u bytes to %d bytes (%.2f%%)\n",src_len, comp_len, (comp_len * 100.0) / src_len);printf("Time: %.2f ms\n", (double)(end - start) * 1000 / CLOCKS_PER_SEC);free(src_buf);free(comp_buf);return 0;
}int decompress_lzav(const char* input_path, const char* output_path) {clock_t start = clock();FILE* fin = fopen(input_path, "rb");if (!fin) {fprintf(stderr, "Error: Cannot open input file '%s'\n", input_path);return -1;}// 读取文件头中的原始大小(4字节小端格式)uint8_t header[4];if (fread(header, 1, 4, fin) != 4) {fclose(fin);fprintf(stderr, "Error: Failed to read size header\n");return -1;}uint32_t src_len = (uint32_t)header[0] | ((uint32_t)header[1] << 8) |((uint32_t)header[2] << 16) |((uint32_t)header[3] << 24);// 获取压缩数据大小fseek(fin, 0, SEEK_END);size_t comp_len = ftell(fin) - 4; // 减去4字节头fseek(fin, 4, SEEK_SET); // 跳过头部// 读取压缩数据void* comp_buf = malloc(comp_len);if (!comp_buf) {fclose(fin);fprintf(stderr, "Error: Memory allocation failed\n");return -1;}if (fread(comp_buf, 1, comp_len, fin) != comp_len) {fclose(fin);free(comp_buf);fprintf(stderr, "Error: Failed to read compressed data\n");return -1;}fclose(fin);// 分配解压缓冲区void* decomp_buf = malloc(src_len);if (!decomp_buf) {free(comp_buf);fprintf(stderr, "Error: Memory allocation failed\n");return -1;}// 执行解压int result_len = lzav_decompress(comp_buf, decomp_buf, comp_len, src_len);if (result_len < 0) {free(comp_buf);free(decomp_buf);fprintf(stderr, "Error: Decompression failed\n");return -1;}// 写入解压后的文件FILE* fout = fopen(output_path, "wb");if (!fout) {free(comp_buf);free(decomp_buf);fprintf(stderr, "Error: Cannot create output file '%s'\n", output_path);return -1;}fwrite(decomp_buf, 1, src_len, fout);fclose(fout);clock_t end = clock();printf("Decompressed %zu bytes to %u bytes\n", comp_len, src_len);printf("Time: %.2f ms\n", (double)(end - start) * 1000 / CLOCKS_PER_SEC);free(comp_buf);free(decomp_buf);return 0;
}int main(int argc, char** argv) {char input_path[MAX_FILENAME] = {0};char output_path[MAX_FILENAME] = {0};int decompress_mode = 0;int compress_level=1;// 参数解析保持不变for (int i = 1; i < argc; i++) {if (strcmp(argv[i], "-h") == 0) {print_help();return 0;} else if (strcmp(argv[i], "-l") == 0 && i+1 < argc) {compress_level = atoi(argv[++i]);} else if (strcmp(argv[i], "-d") == 0) {decompress_mode = 1;} else if (strcmp(argv[i], "-o") == 0 && i+1 < argc) {strncpy(output_path, argv[++i], MAX_FILENAME-1);} else if (argv[i][0] != '-') {strncpy(input_path, argv[i], MAX_FILENAME-1);}}if (!input_path[0]) {print_help();return 1;}if (!output_path[0]) {const char* ext = decompress_mode ? ".decomp" : ".lzav";snprintf(output_path, MAX_FILENAME, "%s%s", input_path, ext);}int result;if (decompress_mode) {result = decompress_lzav(input_path, output_path);} else {result = compress_lzav(input_path, output_path, compress_level);}return result != 0 ? 1 : 0;
}

注意计算输出内存大小的函数lzav_compress_bound(src_len)和lzav_compress_bound_hi必须和压缩函数lzav_compress_default及lzav_compress_hi配套使用,深度压缩所需的内存更大,而且都超过原始文件大小,这与最终输出的压缩文件大小不是一个概念。比如595203472对应的lzav_compress_bound是599171513,lzav_compress_bound_hi是623546512。
编译命令行和执行结果如下,附上lz4和zstd的压缩大小和时间。

gcc lzavhd.c -o lzavhd -I . -O3
time ./lzavhd clickhouse
Compressed 595203472 bytes to 188672728 bytes (31.70%)
Time: 1218.29 msreal    0m3.581s
user    0m0.806s
sys     0m0.419s
time ./lzavhd -l 2 clickhouse
Compressed 595203472 bytes to 165780399 bytes (27.85%)
Time: 4225.82 msreal    0m6.489s
user    0m3.802s
sys     0m0.434s
time ./lzavhd -d clickhouse.lzav
Decompressed 165780399 bytes to 595203472 bytes
Time: 522.35 msreal    0m2.597s
user    0m0.114s
sys     0m0.416stime zstd clickhouse
clickhouse           : 23.59%   (   568 MiB =>    134 MiB, clickhouse.zst)real    0m3.116s
user    0m1.424s
sys     0m0.241stime lz4 -3 clickhouse clickhous.lz4
Compressed 595203472 bytes into 186838255 bytes ==> 31.39%real    0m2.328s
user    0m3.502s
sys     0m0.235s

内部计时和外部用time命令计时差别挺大,留到以后分析。

http://www.dtcms.com/a/293577.html

相关文章:

  • 什么是LLMs.txt?如何在线生成?robots.txt一键转LLMs.txt
  • 第九讲:C++中的list与forward_list
  • OpenCV 零基础到项目实战 | DAY 2:图像预处理全解析
  • 基于STM32驱动ADS1118实现电压采样并输出到串口
  • PetaLinux 使用技巧与缓存配置
  • 从零搭建 OpenCV 项目(新手向)-- 第二天 OpenCV图像预处理(一)
  • 第四章 Freertos物联网实战DHT11温湿度模块
  • 嵌入式学习-(李宏毅)机器学习(1)-day28
  • 本地电脑映射端口到外网访问的开启方法和注意事项,内网服务提供跨网使用简单操作实现
  • 神经网络和机器学习的一些基本概念
  • 某种物联网SIM卡流量查询方法
  • BQ4050RSMR DIOTEC高精度锂电池保护与电量监测芯片 集成保护+计量+通信
  • 2025年Zigbee技术白皮书:全球物联网无线通信的关键创新
  • 【Linux | 网络】应用层(HTTPS)
  • 如何在 Ubuntu 20.04 Linux 上安装 TeamSpeak 客户端
  • SparkSQL 聚合函数 MAX 对 NULL 值的处理
  • AWS Lambda IoT数据处理异常深度分析:从告警到根因的完整排查之路
  • Notepad++工具操作技巧
  • BitDistiller:通过自蒸馏释放 Sub-4-Bit 大语言模型的潜力
  • React Native + Expo 入坑指南:从核心概念到实战演练
  • Android 测试全指南:单元测试与UI测试框架详解
  • 《使用Qt Quick从零构建AI螺丝瑕疵检测系统》——3. QML入门:像搭积木一样构建UI
  • 论文笔记:Tuning Language Models by Proxy
  • 图机器学习(16)——图数据与自然语言处理
  • qiankun 和 Element UI 影响 el-cascader 宽度问题
  • 计算机毕设分享-基于SpringBoot的房屋租赁系统(开题报告+源码+Lun文+开发文档+数据库设计文档)
  • win11安装erlang和rabbitmq
  • 基于 XGBoost 与 SHAP 的医疗自动化办公与可视化系统(上)
  • Kafka——Kafka中的位移提交
  • ITIL 4:云计算与微服务对组织架构的影响