美团龙猫AI修正的二分法提取xlsx的sheet.xml某个范围的数据到csv文件的C程序
这次交互的次数比较多,主要是改用逐个字符解析以应对无换行符的xml文件,同时重写了标签和属性处理。修改后的main函数 - 支持命令行参数。
限制:范围支持单字母的列,即A-Z,xml文件无共享字符串。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>#define MAX_LINE_LENGTH 4096
#define MAX_CELL_CONTENT 1024
#define MAX_SHEET_ROWS 1048576 // Excel最大行数// 用户输入范围
typedef struct {int start_row;int end_row;char start_col; // 列字母,如'A'char end_col; // 列字母,如'Z'
} ParseRange;// 解析结果
typedef struct {int row;char col;char value[MAX_CELL_CONTENT];int is_empty; // 空单元格标记
} CellData;// 全局变量
CellData *results = NULL;
int result_count = 0;
int result_capacity = 0;
// 在全局变量区添加:
ParseRange current_parse_range = {0}; // 保存当前解析范围
// 函数声明
int binary_search_start(FILE *file, ParseRange range);
int find_row_by_binary_search(FILE *file, int target_row, long *start_pos, long *end_pos);
void parse_row_data(FILE *file, ParseRange range, long start_pos, long end_pos);
void add_cell_result(int row, char col, const char *value, int is_empty);
int is_cell_in_range(int row, char col, ParseRange range);
int compare_row_col(int row1, char col1, int row2, char col2);
void free_results();#include <time.h> // 需要包含头文件/*** 获取程序启动后的时间(秒)* @return 从程序启动到现在的秒数(浮点数,精度毫秒)*/
double get_runtime_seconds() {static clock_t start_t = 0;if (start_t == 0) {start_t = clock(); // 首次调用记录启动时间return 0.0;}return (double)(clock() - start_t) / CLOCKS_PER_SEC;
}/*** 获取格式化时间字符串(用于调试输出)* @param prefix 输出前缀(如"解析完成")* @return 格式化字符串,示例: "[00:01.234] 解析完成"*/
const char* get_timestamped_msg(const char *prefix) {static char buf[64];double seconds = get_runtime_seconds();int mins = (int)seconds / 60;int secs = (int)seconds % 60;int msecs = (int)((seconds - (int)seconds) * 1000);snprintf(buf, sizeof(buf), "[%02d:%02d.%03d] %s", mins, secs, msecs, prefix);return buf;
}/*** 主解析函数 - 修正版本* @param filename XML文件路径* @param range 解析范围* @return 0成功,-1失败*/// 修改parse_sheet_xml函数开头:
int parse_sheet_xml(const char *filename, ParseRange range) {current_parse_range = range; // 保存范围// ... 原有代码//int parse_sheet_xml(const char *filename, ParseRange range) {FILE *file = fopen(filename, "r");if (!file) {perror("无法打开文件");return -1;}// 初始化结果数组result_count = 0;result_capacity = 1024;results = (CellData *)malloc(result_capacity * sizeof(CellData));if (!results) {fclose(file);return -1;}// 二分查找起始行if (binary_search_start(file, range)) {// 直接开始解析,从当前位置开始char buffer[MAX_LINE_LENGTH];long row_start_pos = ftell(file);// 向前查找最近的<row标签开始位置for (long pos = row_start_pos; pos >= 0; pos--) {fseek(file, pos, SEEK_SET);if (fgetc(file) == '<') {// 检查是否是<row标签int is_row_tag = 1;for (int i = 1; i < 4; i++) {if (fgetc(file) != "row"[i]) {is_row_tag = 0;break;}}if (is_row_tag) {row_start_pos = pos; // 记录<row标签的起始位置fseek(file, pos, SEEK_SET); // 定位到<row标签开始break;}}}// 获取文件大小作为结束边界long file_size;fseek(file, 0, SEEK_END);file_size = ftell(file);// 定位到<row标签开始位置,准备解析fseek(file, row_start_pos, SEEK_SET);
printf("%s 二分查找\n", get_timestamped_msg(""));printf("二分查找到row_start_pos=%d\n",row_start_pos);// 直接解析数据 - 从<row标签开始到文件末尾parse_row_data(file, range, row_start_pos, file_size);}fclose(file);return 0;
}/*** 二分查找定位起始行* @param file 文件指针* @param range 解析范围* @return 是否找到起始行*/
int binary_search_start(FILE *file, ParseRange range) {long file_size = 0;long low, high, mid;// 获取文件大小fseek(file, 0, SEEK_END);file_size = ftell(file);fseek(file, 0, SEEK_SET);low = 0;high = file_size;int last_found_row = -1;long last_found_pos = -1;while (low <= high) {mid = (low + high) / 2;fseek(file, mid, SEEK_SET);// 向前查找最近的<row标签char buffer[MAX_LINE_LENGTH];long row_start_pos = -1;int row_num = -1;// 从mid位置向前扫描,找到前一个<row标签for (long pos = mid; pos >= low && pos >= 0; pos--) {fseek(file, pos, SEEK_SET);if (fgetc(file) == '<') {if (pos + 4 <= file_size && fgetc(file) == 'r' && fgetc(file) == 'o' && fgetc(file) == 'w') {row_start_pos = pos;break;}}}// 如果向前没找到,从mid向后找if (row_start_pos == -1) {for (long pos = mid; pos <= high && pos < file_size - 4; pos++) {fseek(file, pos, SEEK_SET);if (fgetc(file) == '<') {if (pos + 4 <= file_size && fgetc(file) == 'r' && fgetc(file) == 'o' && fgetc(file) == 'w') {row_start_pos = pos;break;}}}}if (row_start_pos == -1) {// 没有找到<row标签if (mid == low) break;high = mid - 1;continue;}// 解析行号fseek(file, row_start_pos, SEEK_SET);while (fgets(buffer, MAX_LINE_LENGTH, file)) {if (strstr(buffer, "<row")) {char *row_attr = strstr(buffer, "r=\"");if (row_attr) {row_attr += 3; // 跳过r="row_num = atoi(row_attr);break;}}}if (row_num == -1) {// 解析行号失败,调整搜索范围if (row_start_pos < range.start_row) low = mid + 1;else high = mid - 1;continue;}if (row_num == range.start_row) {// 找到精确匹配last_found_row = row_num;last_found_pos = row_start_pos;break;} else if (row_num < range.start_row) {// 当前行小于目标行if (row_num > last_found_row) {last_found_row = row_num;last_found_pos = row_start_pos;}low = mid + 1;} else {// 当前行大于目标行high = mid - 1;}}// 如果找到了合适的起始位置if (last_found_row != -1) {//printf(" last_found_pos=%d\n", last_found_pos);fseek(file, last_found_pos, SEEK_SET);return 1;}return 0;
}/*** 添加单元格结果到结果数组*/
void add_cell_result(int row, char col, const char *value, int is_empty) {// 关键修复:只保存用户指定范围内的单元格if (row < current_parse_range.start_row || row > current_parse_range.end_row ||col < current_parse_range.start_col || col > current_parse_range.end_col) {return; // 直接返回,不保存范围外的数据}// 扩展结果数组if (result_count >= result_capacity) {result_capacity *= 2;results = (CellData *)realloc(results, result_capacity * sizeof(CellData));if (!results) {fprintf(stderr, "内存分配失败\n");return;}}results[result_count].row = row;results[result_count].col = col;strncpy(results[result_count].value, value, MAX_CELL_CONTENT - 1);results[result_count].value[MAX_CELL_CONTENT - 1] = '\0';results[result_count].is_empty = is_empty;result_count++;
//printf("row=%d,col=%c ",row,col);
}/*** 检查单元格是否在用户指定范围内*/
int is_cell_in_range(int row, char col, ParseRange range) {if (row < range.start_row || row > range.end_row) return 0;if (col < range.start_col || col > range.end_col) return 0;return 1;
}/*** 比较两个行列坐标* @return -1: row1<col1 < row2<col2, 0: 相等, 1: row1<col1 > row2<col2*/
int compare_row_col(int row1, char col1, int row2, char col2) {if (row1 != row2) return (row1 < row2) ? -1 : 1;if (col1 != col2) return (col1 < col2) ? -1 : 1;return 0;
}/*** 释放结果内存*/
void free_results() {if (results) {free(results);results = NULL;}result_count = 0;result_capacity = 0;
}/*** 打印解析结果*/
void print_results() {printf("解析结果:\n");for (int i = 0; i < result_count; i++) {if (results[i].is_empty) {printf("单元格 %c%d: (空)\n", results[i].col, results[i].row);} else {printf("单元格 %c%d: %s\n", results[i].col, results[i].row, results[i].value);}}
}/*** 以Excel的A1表示法打印解析范围* 例如:A1:H7* @param range 要打印的解析范围*/
void print_parse_range(ParseRange range) {printf("解析范围: %c%d:%c%d\n", range.start_col, range.start_row,range.end_col, range.end_row);
}
// 使用示例// 前面的所有函数保持不变.../*** 从用户输入解析Excel格式范围(如A1:H5)* @param input 用户输入的字符串* @param range 输出解析结果* @return 0成功,-1失败*/
int parse_excel_range(const char *input, ParseRange *range) {if (!input || !range) return -1;char start_col = '\0', end_col = '\0';int start_row = 0, end_row = 0;int parsed = 0;// 跳过空白while (isspace(*input)) input++;// 解析起始列if (isalpha(*input)) {start_col = toupper(*input);input++;// 解析起始行char *end_ptr;start_row = strtol(input, &end_ptr, 10);if (end_ptr > input) {input = end_ptr;// 解析分隔符while (isspace(*input)) input++;if (*input == ':') {input++;while (isspace(*input)) input++;// 解析结束列if (isalpha(*input)) {end_col = toupper(*input);input++;// 解析结束行end_row = strtol(input, &end_ptr, 10);if (end_ptr > input) {parsed = 1;}}}}}if (parsed) {range->start_row = start_row;range->end_row = end_row;range->start_col = start_col;range->end_col = end_col;
printf("start_row=%d,end_row=%d",start_row,end_row);return 0;}return -1;
}/*** 将结果保存为CSV文件 - 修正版本* @param filename 输出CSV文件名* @return 0成功,-1失败*/
int save_results_to_csv(const char *filename) {if (!filename || result_count == 0) return -1;printf("result_count=%d\n",result_count);FILE *csv = fopen(filename, "w");if (!csv) {perror("无法创建CSV文件");return -1;}// 收集所有行号int *rows = (int *)malloc(result_count * sizeof(int));int row_count = 0;for (int i = 0; i < result_count; i++) {int found = 0;for (int j = 0; j < row_count; j++) {if (rows[j] == results[i].row) {found = 1;break;}}if (!found) {rows[row_count++] = results[i].row;}}// 按行号排序for (int i = 0; i < row_count - 1; i++) {for (int j = i + 1; j < row_count; j++) {if (rows[i] > rows[j]) {int temp = rows[i];rows[i] = rows[j];rows[j] = temp;}}}int start_col = results[0].col; // 实际起始列int end_col = results[0].col; // 实际结束列for (int i = 1; i < result_count; i++) {if (results[i].col < start_col) start_col = results[i].col;if (results[i].col > end_col) end_col = results[i].col;}int col_count = end_col - start_col + 1;// 写入标题行fprintf(csv, "Row,");for (int c = 0; c < col_count; c++) {fprintf(csv, "%c", start_col + c);if (c < col_count - 1) fprintf(csv, ",");}fprintf(csv, "\n");// 为每一行生成CSV数据for (int r = 0; r < row_count; r++) {int current_row = rows[r];// 检查该行是否有数据(在用户指定范围内)int has_data = 0;for (int i = 0; i < result_count; i++) {if (results[i].row == current_row && results[i].col >= start_col && results[i].col <= end_col &&!results[i].is_empty) {has_data = 1;break;}}if (!has_data) continue; // 跳过全空行// 生成该行的CSV数据fprintf(csv, "%d,", current_row); // 行号作为第一列for (int c = 0; c < col_count; c++) {char col = start_col + c;char *value = NULL;int is_empty = 1;// 查找该列的数据for (int i = 0; i < result_count; i++) {if (results[i].row == current_row && results[i].col == col) {value = results[i].value;is_empty = results[i].is_empty;break;}}if (!is_empty && value && strlen(value) > 0) {// 转义CSV特殊字符if (strchr(value, ',') || strchr(value, '"') || strchr(value, '\n')) {fprintf(csv, "\"%s\"", value);} else {fprintf(csv, "%s", value);}} else {// 空单元格fprintf(csv, "");}if (c < col_count - 1) {fprintf(csv, ",");}}fprintf(csv, "\n");}free(rows);fclose(csv);printf("结果已保存到: %s\n", filename);return 0;
}
/*** 处理XML缓冲区内容 - 提取为独立函数* @param buffer 要处理的XML内容* @param range 解析范围* @param in_row 输入/输出:是否在行内* @param current_row 输入/输出:当前行号* @param current_cell_col 输入/输出:当前单元格列* @param temp_value 临时值存储*/
void process_xml_buffer(char *buffer, ParseRange range, int *in_row, int *current_row,char *current_cell_col, char *temp_value) {char *pos = buffer;// 处理每行中的标签while (*pos) {if (strncmp(pos, "<row", 4) == 0) {// 解析行号char *row_attr = strstr(pos, "r=\"");if (row_attr) {row_attr += 3;*current_row = atoi(row_attr);}*in_row = 1;pos += 4;}else if (strncmp(pos, "</row>", 6) == 0) {// 行结束if (*current_row >= range.end_row) {// 超过用户指定范围,停止解析return;}*in_row = 0;*current_row = -1;pos += 6;}else if (*in_row && strncmp(pos, "<c ", 3) == 0) {// 解析单元格char *col_attr = strstr(pos, "r=\"");char *value_start = NULL;int is_empty = 0;int cell_has_value = 0;int is_self_closing = 0;if (col_attr) {col_attr += 3;*current_cell_col = col_attr[0];// 检查自闭合标签char *self_close = strstr(pos, "/>");if (self_close) {is_self_closing = 1;}// 跳过列字母和数字分隔符while (isdigit(col_attr[0])) col_attr++;// 检查单元格值char *v_tag = strstr(pos, "<v>");if (v_tag) {value_start = v_tag + 3;char *v_end = strstr(v_tag, "</v>");if (v_end) {*v_end = '\0';strncpy(temp_value, value_start, MAX_CELL_CONTENT - 1);temp_value[MAX_CELL_CONTENT - 1] = '\0';cell_has_value = 1;}}// 自闭合标签一定是空单元格if (is_self_closing || !cell_has_value) {is_empty = 1;temp_value[0] = '\0';}if (is_cell_in_range(*current_row, *current_cell_col, range)) {add_cell_result(*current_row, *current_cell_col, temp_value, is_empty);}}pos += 3;}else if (strncmp(pos, "</c>", 4) == 0) {// 单元格结束*current_cell_col = '\0';pos += 4;}else {pos++;}}
}
/*** 处理标签 - 修复字符串类型和范围*/
void process_tag(const char *tag_name, const char *attr_value, int attr_count,ParseRange range, int *in_row, int *current_row,char *current_cell_col, char *temp_value, int *is_self_closing,int *value_started, int *value_len) {if(1==0)printf("调试: 处理标签 '%s', value_started=%d\n", tag_name, *value_started);int is_end_tag = (tag_name[0] == '/');const char *tag = is_end_tag ? tag_name + 1 : tag_name;// 范围检查 - 修复:确保使用用户指定范围if (*current_row >= 1 && *current_row <= range.end_row && *current_cell_col >= range.start_col && *current_cell_col <= range.end_col) {if (strcmp(tag, "row") == 0) {if (is_end_tag) {*in_row = 0;*current_row = -1;if(1==0)printf("调试: 行结束\n");} else {*in_row = 1;if(1==0)printf("调试: 进入行\n");}}else if (strcmp(tag, "c") == 0) {if (is_end_tag) {*current_cell_col = '\0';*value_started = 0;*value_len = 0;if(1==0)printf("调试: 单元格结束\n");} else if (*is_self_closing) {if (is_cell_in_range(*current_row, *current_cell_col, range)) {if(1==0)printf("调试: 空单元格 %c%d\n", *current_cell_col, *current_row);add_cell_result(*current_row, *current_cell_col, "", 1);}*current_cell_col = '\0';}}else if (strcmp(tag, "v") == 0) {if (is_end_tag) {temp_value[*value_len] = '\0';if (*value_len > 0 && is_cell_in_range(*current_row, *current_cell_col, range)) {if(1==0)printf("调试: 数值结束 %c%d='%s'\n", *current_cell_col, *current_row, temp_value);add_cell_result(*current_row, *current_cell_col, temp_value, 0);}*value_started = 0;*value_len = 0;} else {*value_started = 1;*value_len = 0;temp_value[0] = '\0';if(1==0)printf("调试: 数值开始\n");}}else if (strcmp(tag, "t") == 0) {// 修复:处理字符串类型 <t>标签if (is_end_tag) {temp_value[*value_len] = '\0';if (*value_len > 0 && is_cell_in_range(*current_row, *current_cell_col, range)) {if(1==0)printf("调试: 字符串结束 %c%d='%s'\n", *current_cell_col, *current_row, temp_value);add_cell_result(*current_row, *current_cell_col, temp_value, 0);}*value_started = 0;*value_len = 0;} else {*value_started = 1;*value_len = 0;temp_value[0] = '\0';if(1==0)printf("调试: 字符串开始\n");}}} else {// 不在用户指定范围内,跳过if (strcmp(tag, "row") == 0 && !is_end_tag) {*in_row = 1;} else if (strcmp(tag, "/row") == 0) {*in_row = 0;*current_row = -1;}}
}/*** 处理属性 - 修复:记录字符串类型*/
void process_attribute(const char *tag_name, const char *attr_name, const char *attr_value,ParseRange range, int *in_row, int *current_row,char *current_cell_col, char *temp_value,int *value_started, int *value_len) {if(1==0)printf("调试: 属性 %s=%s, 标签=%s\n", attr_name, attr_value, tag_name);int is_end_tag = (tag_name[0] == '/');const char *tag = is_end_tag ? tag_name + 1 : tag_name;// 全局变量:记录当前单元格类型static char cell_type[16] = {0};if (strcmp(attr_name, "r") == 0) {if (strcmp(tag, "row") == 0) {*current_row = atoi(attr_value );//+ 1if(1==0)printf("调试: 行号=%d\n", *current_row);} else if (strcmp(tag, "c") == 0) {*current_cell_col = attr_value[0];if(1==0)printf("调试: 列=%c\n", *current_cell_col);}}else if (strcmp(attr_name, "t") == 0) {// 修复:记录单元格类型strncpy(cell_type, attr_value, sizeof(cell_type) - 1);if(1==0)printf("调试: 单元格类型=%s\n", cell_type);// 特殊处理:inlineStr类型需要从<t>标签取值if (strcmp(attr_value, "inlineStr") == 0) {if(1==0)printf("调试: 检测到字符串类型单元格\n");}}else if (strcmp(attr_name, "s") == 0) {// 样式属性,可用于优化}
}/*** 解析行数据 - 最终修复版本* @param file 文件指针* @param range 解析范围* @param start_pos 起始位置* @param end_pos 结束位置*/
void parse_row_data(FILE *file, ParseRange range, long start_pos, long end_pos) {char temp_value[MAX_CELL_CONTENT];int in_row = 0;int current_row = -1;char current_cell_col = '\0';int value_started = 0;int value_len = 0;int is_self_closing = 0;// 状态:0=普通文本, 1=标签开始, 2=标签名, 3=属性名, 4=属性值, 5=值内容int state = 0;char tag_name[32] = {0};char attr_name[16] = {0};char attr_value[256] = {0};int tag_len = 0;int attr_len = 0;int quote_char = 0;fseek(file, start_pos, SEEK_SET);int c;while ((c = fgetc(file)) != EOF && ftell(file) <= end_pos) {switch (state) {case 0: // 普通文本if (c == '<') {state = 1;tag_len = 0;tag_name[0] = '\0';is_self_closing = 0;} else if (value_started) {// 值内容 - 关键修复:直接捕获if (value_len < MAX_CELL_CONTENT - 1) {temp_value[value_len++] = c;temp_value[value_len] = '\0';if(1==0)if(1==0)printf("调试: 捕获值 '%c', 当前值='%s'\n", c, temp_value);}}break;case 1: // 标签开始 '<'if (c == '/') {// 结束标签 </tag>tag_name[tag_len++] = c;} else if (c == '>' || c == ' ') {// 开始标签 <tag> 或 <tag ...tag_name[tag_len] = '\0';// 处理标签process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,¤t_cell_col, temp_value, &is_self_closing,&value_started, &value_len);if (c == ' ') state = 3; // 属性else state = 0; // 文本} else if (c == '?' || c == '!') {// 跳过 <?xml>, <!-- -->state = 0;} else if (tag_len < sizeof(tag_name) - 1) {tag_name[tag_len++] = c;state = 2; // 进入标签名}break;case 2: // 标签名if (c == '>' || c == ' ') {tag_name[tag_len] = '\0';// 处理标签process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,¤t_cell_col, temp_value, &is_self_closing,&value_started, &value_len);if (c == ' ') state = 3; // 属性else state = 0; // 文本} else if (c == '/' && (c = fgetc(file)) == '>') {// 自闭合标签 <tag/>tag_name[tag_len] = '\0';is_self_closing = 1;process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,¤t_cell_col, temp_value, &is_self_closing,&value_started, &value_len);state = 0;} else if (tag_len < sizeof(tag_name) - 1) {tag_name[tag_len++] = c;}break;case 3: // 属性名if (c == '=') {attr_name[attr_len] = '\0';state = 4;attr_len = 0;} else if (c == '>' || (c == '/' && (c = fgetc(file)) == '>')) {// 无属性标签结束process_tag(tag_name, NULL, 0, range, &in_row, ¤t_row,¤t_cell_col, temp_value, &is_self_closing,&value_started, &value_len);state = 0;} else if (attr_len < sizeof(attr_name) - 1) {attr_name[attr_len++] = c;}break;case 4: // 属性值if (c == '"' || c == '\'') {quote_char = c;int val_len = 0;// 读取属性值while ((c = fgetc(file)) != EOF && c != quote_char && val_len < sizeof(attr_value) - 1) {attr_value[val_len++] = c;}attr_value[val_len] = '\0';// 处理属性process_attribute(tag_name, attr_name, attr_value, range, &in_row,¤t_row, ¤t_cell_col, temp_value,&value_started, &value_len);state = 3; // 回到属性名}break;}// 调试:每处理100个字符输出一次static int counter = 0;if (++counter % 100 == 0) {if(1==0)if(1==0)printf("调试: 状态=%d, 字符='%c', 值='%s', 行=%d, 列=%c, value_started=%d\n",state, c, value_started ? temp_value : "", current_row, current_cell_col, value_started);}if (current_row > range.end_row) return;}
}// 修改后的main函数 - 支持命令行参数
int main(int argc, char *argv[]) {char filename[1024] = {0};char csv_filename[1024] = {0};char range_input[64] = {0};ParseRange range;int interactive_mode = 0; // 交互模式标志
printf("%s 程序启动\n", get_timestamped_msg("")); // 解析命令行参数if (argc == 4) {// 命令行模式: program xml_file range csv_filestrncpy(filename, argv[1], sizeof(filename) - 1);strncpy(range_input, argv[2], sizeof(range_input) - 1);strncpy(csv_filename, argv[3], sizeof(csv_filename) - 1);// 确保csv文件名有扩展名if (!strstr(csv_filename, ".csv")) {strcat(csv_filename, ".csv");}} else if (argc == 1) {// 无参数,进入交互模式interactive_mode = 1;printf("=== Excel XML解析器 (交互模式) ===\n");printf("请输入XML文件路径: ");if (!fgets(filename, sizeof(filename), stdin)) {printf("错误: 无法读取文件名\n");return -1;}filename[strcspn(filename, "\n")] = 0;printf("请输入解析范围 (格式如 A1:H5): ");if (!fgets(range_input, sizeof(range_input), stdin)) {printf("错误: 无法读取范围\n");return -1;}range_input[strcspn(range_input, "\n")] = 0;printf("请输入CSV输出文件名 (默认: output.csv): ");if (!fgets(csv_filename, sizeof(csv_filename), stdin)) {strcpy(csv_filename, "output.csv");}csv_filename[strcspn(csv_filename, "\n")] = 0;if (strlen(csv_filename) == 0) {strcpy(csv_filename, "output.csv");}if (!strstr(csv_filename, ".csv")) {strcat(csv_filename, ".csv");}} else {printf("用法:\n");printf(" %s <xml文件路径> <范围(A1:H5)> <csv输出文件名>\n", argv[0]);printf(" %s (进入交互模式)\n", argv[0]);printf("示例:\n");printf(" %s sheet.xml B3:H5 result.csv\n", argv[0]);return -1;}// 解析范围if (parse_excel_range(range_input, &range) != 0) {printf("失败: 范围格式错误,请使用格式如 A1:H5\n");return -1;}print_parse_range(range);// 执行解析if (parse_sheet_xml(filename, range) == 0) {// 命令行模式不输出详细结果,仅保存CSVif (interactive_mode) {printf("\n解析成功!\n");print_results();printf("\n正在保存到CSV...\n");} else {// 命令行模式:静默处理printf("成功: 解析完成\n");}
printf("%s XML解析完成\n", get_timestamped_msg("")); // 保存CSVif (save_results_to_csv(csv_filename) == 0) {
printf("%s CSV保存完成\n", get_timestamped_msg(""));if (interactive_mode) {printf("完成!\n");}} else {printf("失败: 无法保存CSV文件\n");free_results();return -1;}} else {printf("失败: 解析XML文件失败\n");free_results();return -1;}free_results();return 0;
}
编译和运行
gcc bsxml13.c -o bsxml -O3time ./bsxml /shujv/par/dknyc/xl/worksheets/sheet1.xml A210000:Z211000 obig32.csv
[00:00.000] 程序启动
start_row=210000,end_row=211000解析范围: A210000:Z211000
[01:26.800] 二分查找
二分查找到row_start_pos=358064889
成功: 解析完成
[01:26.881] XML解析完成
result_count=28161
结果已保存到: obig32.csv
[01:30.019] CSV保存完成real 1m39.296s
user 0m40.100s
sys 0m49.920s
可见对于行数多的靠后范围,二分查找比较慢,张泽鹏先生已经决定和AI PK一下,拭目以待。