当前位置: 首页 > news >正文

deepseek 的对话json导出成word和pdf

需要将 deepseek网页上的内容全部导出成 word和pdf,其流程分为三步

1.从deepseek的网页端 导出 json
2.使用一份python代码 切分总的json,导出成一个 若干个标准命名的json,一个对话一个
3.使用 nodejs 多线程并行处理,得到word和pdf

效果:
1.pdf可以,对话和 deepseek基本保持
2.word效果不是很好,建议自己修复一下

1.deepseek导出界面,得到一个conversations.json
在这里插入图片描述
2.将json切分成若干个小的 对话json
在这里插入图片描述

对应的代码是:


def printline(*args):print('-'*50,*args,'-'*50)#
# by 广都
# 1975767630
#import traceback
import os
import shutil
import sys# # dll搜索路径,修改值。
# for key, value in os.environ.items():
#     if key == 'PATH':
#         for c in value.split(';'):
#             # print(f"{key}={c}")
#             try:
#                 os.add_dll_directory(c)
#             except:
#                 passimport osdef get_path_list(file_path):directory = os.path.dirname(file_path)filename = os.path.basename(file_path)file_name, file_extension = os.path.splitext(filename)return directory, file_name, file_extensionfrom datetime import datetime
from datetime import datetime, timedeltadef days_to_date(days):# 字母转文字try:try:days = int(float(days))dt_object = datetime(1900, 1,1) + timedelta(days=days)except:passreturn daysreturn dt_object.strftime("%Y-%m-%d")except:passreturn str(days)def weite_xlsx_wps():pass# import os# import time# import win32com.client as win32# # import win32com# # excel = win32.Dispatch("Excel.Application")# excel = win32.Dispatch('Ket.Application')# excel.DisplayAlerts = False  # 关闭警告# excel.Visible = False  # 程序可见# try:#     pwd = os.getcwd()#     myexcel = excel.Workbooks.Open(new_file.replace('/','\\'))  # 打开一个已有的表格#     print('当前改写文件名 ',myexcel.Name)#     mysheet = myexcel.Sheets(1)#     ci=0#     for mi,one in enumerate(end_list):#         try:#             if ci%100==0:#                 print(ci,one)#             ci=ci+1#             # print(mi,one)#             # one[0] +=1#             mysheet.Range(f'S{one[0]}').Value = str(one[2])#             mysheet.Range(f'R{one[0]}').Value = str(one[3])#             # mysheet.Range('P'+str(vone[0])).Value = str(vone[1][1][0])#         except Exception as e0:#             pass#             print(e0)#             print('本行错误  ',vone)#     myexcel.SaveAs(new_file.replace('/','\\'))  # 保存# except Exception as e:#     pass#     print('发生错误',e)# excel.Quit()  # 退出# shutil.copy(new_file,'最新结果.xlsx')# print('计算结束')def get_all_file(path):# import osreturn_list=[]for root,dirs,files in os.walk(path,topdown=True):for file_one in files:use_path=root+'/'+file_onereturn_list.append(use_path.replace('/','\\'))return return_listdef del_file(path):#!/usr/bin/env pythonimport osimport shutilfilelist=[]rootdir=pathfilelist=os.listdir(rootdir)for f in filelist:filepath = os.path.join( rootdir, f )if os.path.isfile(filepath):os.remove(filepath)#print filepath+" removed!"elif os.path.isdir(filepath):shutil.rmtree(filepath,True)#print "dir "+filepath+" removed!"def make_file(path):try:os.makedirs(path)except:passdef get_all_txt(txt_path):return_list=[]lines  = open(txt_path,'r',encoding='utf8',errors='ignore')for line in lines:# print(line)# line=line.replace(' ','')line=line.replace('\n','')return_list.append(line)# if 'def ' in line and '(' in line and ')' in line:#     return_list.append(str(line))# return_list.append(int(line))lines.close()return return_listimport os#  获得打包以后得路径。
def get_exe_path(relative_path=''):if hasattr(sys, '_MEIPASS'):return os.path.join(sys._MEIPASS, relative_path)return os.path.join(os.path.abspath("."), relative_path)[:-1]def get_year_month_day_number():import datetimeyear=str(int(datetime.datetime.now().year))month=str(int(datetime.datetime.now().month))day=str(int(datetime.datetime.now().day))return year+'-'+month+'-'+dayimport xlsxwriter
def write_xlsx_list(path,write_list):workbook_zheng = xlsxwriter.Workbook(path)     #新建excel表worksheet_zheng = workbook_zheng.add_worksheet('sheet1')       #新建sheet(sheet的名称为"sheet1")# worksheet_zheng.set_column(0,1,50)# worksheet_zheng.set_column(1,2,20)# worksheet_zheng.set_column(2,10,50)for fx in range(0,len(write_list)):try:x_list=write_list[fx]for fy in range(0,len(x_list)):y_v=x_list[fy]worksheet_zheng.write(fx,fy,y_v)# print(x_list)except:passworkbook_zheng.close()# def ocr_func_list():
#     # from ocr_system import *
#     ocr = GetOcrApi()
#     png_json = get_png_json(read_one) # 输入图片,返回jso格式得结果
#     get_ocr_png_list(pdfPath, imagePath,num=1)
#     read_file_to_json()
#     write_json_to_file()
#     ocr_show(png_json)
#     ocr.close()
#     pass#---------------------------------------------------------------------------------------------------------------------------------------------------------------------
def get_xlsx_list(xlsx_path,sign=0):import xlrdread_one=xlsx_pathmatch_write_in_fp=xlrd.open_workbook(xlsx_path)sheet_names_list = match_write_in_fp.sheet_names()sh_read=match_write_in_fp.sheet_by_name(sheet_names_list[0])  #根据sheet索引获得第一个sheet。# print(sh_read.row_values(0))sum_list=[]for line_number in range(sign,sh_read.nrows):try:alone_line=sh_read.row_values(line_number)sum_list.append(alone_line)except:passreturn sum_listdef div_data_fuc(datalist,namelist):mlen = int(len(datalist)/len(namelist))div_len = len(datalist) - mlenprint(mlen,div_len)# print(mlen)# if mlen >= int(mlen):#     mlen=mlen+1cx_len =0re_list =[]for mi,one in enumerate(namelist):if mi<= div_len:re_list.append(datalist[cx_len:cx_len+mlen+1])cx_len= cx_len+ mlen+1else:re_list.append(datalist[cx_len:cx_len+mlen])cx_len= cx_len+ mlen# return [[]]return re_list# import shutil
# import os
# import time
# import win32com.client as win32# def get_all_sheet_content(excel_file):
#     excel = win32.Dispatch('Ket.Application')
#     excel.DisplayAlerts = False
#     excel.Visible = False
#     try:
#         p,n = os.path.split(excel_file)
#         workbook = excel.Workbooks.Open(excel_file.replace('/', '\\'))
#         worksheet = workbook.Sheets(1)
#         name_list =[x.Name for x in workbook.Sheets]
#         ss_list =[]
#         for xxx in workbook.Sheets:
#             worksheet=  xxx
#             data = worksheet.UsedRange.Value
#             re_name = worksheet.Name
#             content = [list(row) for row in data]
#             re_list =[]
#             for x in data:
#                 ts=[]
#                 for cx in x:
#                     if cx==None:
#                         ts.append('')
#                     else:
#                         ts.append(cx)
#                 re_list.append(ts)
#             ss_list.append([xxx.Name,re_list])
#         workbook.Close()
#         return ss_list#     except Exception as e:
#         print("Error:", e)
#     finally:
#         excel.Quit()
#     return ['',[]]def rgb_to_hex(r, g, b):return (r << 16) + (g << 8) + b# target_cell.Interior.Color = rgb_to_hex(255, 255, 0)  # 将颜色设置为黄色# 数据ok了,开始写
def write_xlsx_dict_list(path,write_list_2):workbook_zheng = xlsxwriter.Workbook(path)     #新建excel表worksheet_zheng = workbook_zheng.add_worksheet('Lead')       #新建sheet(sheet的名称为"sheet1")# worksheet_zheng.set_column(0,1,50)write_list= write_list_2[0]for fx in range(0,len(write_list)):try:x_list=write_list[fx]red_font = workbook_zheng.add_format(x_list[1])# print(x_list[1])for fy in range(0,len(x_list[0])):y_v=x_list[0][fy]y_v= str(y_v)if y_v[-2:]=='.0':y_v=y_v[:-2]worksheet_zheng.write(fx,fy,y_v,red_font)# print(x_list)except Exception as e:pass# print(e)worksheet_zheng = workbook_zheng.add_worksheet('更多信息')       #新建sheet(sheet的名称为"sheet1")# worksheet_zheng.set_column(0,1,50)write_list=write_list_2[1]for fx in range(0,len(write_list)):try:x_list=write_list[fx]red_font = workbook_zheng.add_format(x_list[1])# print(x_list[1])for fy in range(0,len(x_list[0])):y_v=x_list[0][fy]y_v= str(y_v)if y_v[-2:]=='.0':y_v=y_v[:-2]worksheet_zheng.write(fx,fy,y_v,red_font)# print(x_list)except Exception as e:pass# print(e)workbook_zheng.close()def get_current_time():from datetime import datetimenow = datetime.now()return str(now.strftime("%Y_%m_%d %H_%M_%S"))import json
def read_file_to_json(file_path):with open(file_path, 'r', encoding='utf-8') as file:try:json_data = json.load(file)except UnicodeDecodeError:file.seek(0)json_data = json.load(file, encoding='gbk')return json_dataimport jsondef write_json_to_file(file_path, data_list):with open(file_path, 'w', encoding='utf-8') as file:json.dump(data_list, file, ensure_ascii=False, indent=4)import pickle
def write_dict_to_file(dict_obj, file_path):with open(file_path, 'wb') as file:pickle.dump(dict_obj, file)def read_dict_from_file(file_path):with open(file_path, 'rb') as file:dict_obj = pickle.load(file)return dict_objdef json_to_string(data):"""将JSON对象转换为字符串"""return json.dumps(data)def string_to_json(string):"""将字符串转换为JSON对象"""# print('str  ',string    )return json.loads(string)def extract_numbers2(string):numbers = re.findall(r'\d+', string)return [int(num) for num in numbers]def num_col(number):number=number+1column = ""while number > 0:number -= 1column = chr(number % 26 + 65) + columnnumber //= 26return columndef col_num(column):column=str(column).upper()number = 0for i in range(len(column)):number = number * 26 + ord(column[i]) - 64return number - 1def move_file(p1,p2):xx= p1.replace('\\','/').split('/')[-1]ss= p2+'/'+xxmake_file(ss)for x in get_all_file(p1):shutil.copy(x,ss)import copy
def fill_list_fuc(lst,mlen):lst =copy.deepcopy(lst)# 如果列表长度小于6,则在列表末尾添加空字符串,直到长度为6while len(lst) < mlen:lst.append("")return lstdef split_list_fuc(l, n=2):"""把一个list切分为n份,返回n个list:param l: 要切分的列表:param n: 切分的份数:return: 切分后的列表"""if l is None or n < 1:return []if n >= len(l):return [l[i:i+1] for i in range(len(l))]return [l[i:i + len(l)//n + 1] for i in range(0, len(l), len(l)//n + 1)]import   copy
import random# 给定一个list,打乱顺序
def shuffle_list_fuc(data_list):re_list = data_list.copy()for i in range(len(re_list)-1, 0, -1):j = random.randint(0, i)re_list[i], re_list[j] = re_list[j], re_list[i]return re_listdef num_col(number):number=number+1column = ""while number > 0:number -= 1column = chr(number % 26 + 65) + columnnumber //= 26return columndef col_num(column):column=str(column).upper()number = 0for i in range(len(column)):number = number * 26 + ord(column[i]) - 64return number - 1import redef get_parse_excel_formula_fuc(formula):pattern = r'([A-Za-z]+)(\d+)'matches = re.findall(pattern, formula)results = []for match in matches:alpha = match[0]num = int(match[1])results.append((alpha, num))return resultsdef get_range_fuc(main_list, range_str):# 解析范围区域range_list = get_parse_excel_formula_fuc(range_str)[:2]if len(range_list) == 0:return Noneif len(range_list) == 1:# 单个单元格col = col_num(range_list[0][0])row = range_list[0][1] - 1if col < len(main_list[0]) and row < len(main_list):return main_list[row][col]else:return Noneelse:# 区域范围start_col = col_num(range_list[0][0])start_row = range_list[0][1] - 1end_col = col_num(range_list[1][0])end_row = range_list[1][1] - 1if (start_col < len(main_list[0]) and start_row < len(main_list) andend_col < len(main_list[0]) and end_row < len(main_list)):result = []for row_index in range(start_row, end_row + 1):row_result = []for col_index in range(start_col, end_col + 1):row_result.append(main_list[row_index][col_index])result.append(row_result)return resultelse:return Nonedef get_xlsx_all_list_fuc(xlsx_path,sign=0):import xlrdread_one=xlsx_pathmatch_write_in_fp=xlrd.open_workbook(xlsx_path)sheet_names_list = match_write_in_fp.sheet_names()re_list =[]for one in sheet_names_list[:]:sh_read=match_write_in_fp.sheet_by_name(one)  #根据sheet索引获得第一个sheet。sum_list=[]for line_number in range(sign,sh_read.nrows):try:alone_line1=sh_read.row_values(line_number)# alone_line= [int(x) for x in alone_line]alone_line=[]for x in alone_line1:if x=='':alone_line.append('')else:try:alone_line.append(int(x))except:passalone_line.append(x)sum_list.append(alone_line)except Exception as e:passprint(e)# return sum_listre_list.append([one,sum_list])return re_listimport datetimeclass TimeLogger:def __init__(self):import datetimeself.start_time = datetime.datetime.now()self.previous_time = self.start_timeself.time_log = []self.time_number = 0def get_now_time(self,show_str = ''):current_time = datetime.datetime.now()time_diff = current_time - self.previous_timeself.previous_time = current_timeself.time_log.append((current_time, time_diff))current_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S.%f")time_diff_str = str(time_diff)# time_log,append()if str(show_str)=='':print(["时间差值: ",self.time_number,time_diff_str, current_time_str])else:print(["时间差值: ",self.time_number,time_diff_str, current_time_str,show_str])self.time_number=self.time_number+1# from basci_fuc import *# 示例用法
logger = TimeLogger()logger.get_now_time()def get_sanitize_filename_fuc(input_str: str) -> str:"""将输入字符串转换为合法的Windows 10文件名:1. 替换所有禁止字符(\ / : * ? " < > |)为空格2. 合并连续空格为单个,并去除首尾空格"""# 定义Windows 10禁止的文件名字符集合forbidden_chars = {'\\', '/', ':', '*', '?', '"', '<', '>', '|'}# 步骤1:替换禁止字符为空格temp_str = ''.join(' ' if char in forbidden_chars else char for char in input_str)# 步骤2:合并连续空格并去除首尾空格(split默认分割所有空白,join用单空格连接)return ' '.join(temp_str.split())def get_sanitize_filename_fuc(input_str: str) -> str:"""将输入字符串转换为合法的Windows文件名:1. 替换Windows禁止字符(\ / : * ? " < > |)和中文标点为空格2. 合并连续空格为单个,并去除首尾空格"""# 1. 定义需要替换的字符集合(Windows禁止字符 + 常见中文标点)forbidden_chars = {'\\', '/', ':', '*', '?', '"', '<', '>', '|',  # Windows原生禁止字符',', '。', '!', '?', '、', ';', ':', '“', '”', '‘', '’',  # 中文常用标点'(', ')', '【', '】', '《', '》', '…', '—', '~', '·'       # 扩展中文标点(可根据需求增减)}# 2. 替换所有禁止字符/中文标点为空格cleaned_str = ''.join(' ' if char in forbidden_chars else char for char in input_str)# 3. 合并连续空格 + 去除首尾空格(split默认按所有空白分割,join用单空格连接)return ' '.join(cleaned_str.split())from datetime import datetimedef generate_file_name(data: dict, lang: str = "zh", compact_date: bool = False) -> str:"""从JSON数据中生成包含「创建时间」和「对话时长」的合法文件名:- 创建时间:提取inserted_at的年月日(支持紧凑格式如YYYYMMDD)- 对话时长:updated_at - inserted_at 的「天数+小时」- 参数:- lang: 时长语言("zh"=中文,"en"=英文)- compact_date: 是否用紧凑日期格式(默认False,即YYYY-MM-DD)- 返回:合法文件名(无Windows禁止字符)"""# 1. 提取时间字段(处理缺失情况)inserted_at_str = data.get("inserted_at", "")updated_at_str = data.get("updated_at", "")if not inserted_at_str or not updated_at_str:return "missing_time_info"  # 时间缺失的默认值# 2. 解析时间字符串为datetime对象(支持时区)try:inserted_dt = datetime.fromisoformat(inserted_at_str)updated_dt = datetime.fromisoformat(updated_at_str)except ValueError:return "invalid_time_format"  # 时间格式错误的默认值# 3. 计算时长(确保updated在inserted之后)delta = updated_dt - inserted_dttotal_seconds = delta.total_seconds()# 处理时间倒序(如updated比inserted早)if total_seconds < 0:days = 0hours = 0else:days = int(total_seconds // 86400)  # 总天数(向下取整)hours = int((total_seconds % 86400) // 3600)  # 剩余小时数(向下取整)# 4. 格式化创建时间(年月日)date_format = "%Y%m%d" if compact_date else "%Y-%m-%d"created_date_str = inserted_dt.strftime(date_format)  # 如20250210 或 2025-02-10# 5. 生成时长字符串(支持中英文)duration_str = (f"{days}d{hours}h" if lang == "en" else f"{days}{hours}小时")  # 如0d2h 或 0天2小时# 6. 组合文件名(合法字符:中文/英文/数字/-/_/空格)file_name = f"{created_date_str}_{duration_str}"# (可选)去除可能的法律字符(如多余的空格或符号,此处已处理)return file_name# # ------------------- 测试用例 -------------------
# data = {
#     "id": "f6952608-3a92-4b66-8400-cc2aedab0b42",
#     "title": "矩形下料的算法都有哪学",
#     "inserted_at": "2025-02-10T19:35:14.921000+08:00",
#     "updated_at": "2025-02-10T22:07:14.030000+08:00"  # 时长:2小时31分 → 0天2小时
# }# # 测试1:中文+默认日期格式
# print(generate_file_name(data))  # 输出:2025-02-10_0天2小时# # 测试2:英文+紧凑日期格式
# print(generate_file_name(data, lang="en", compact_date=True))  # 输出:20250210_0d2h# # 测试3:长时间对话(假设updated_at是2天后)
# data["updated_at"] = "2025-02-12T10:00:00+08:00"  # 时长:1天14小时34分 → 1天14小时
# print(generate_file_name(data))  # 输出:2025-02-10_1天14小时# w文件区域。
main_path=os.getcwd()  # exe文件存放的路径。
exe_path=get_exe_path()  #  打包以后资源文件存放的路径。read_path=main_path+'/读取文件'
write_path=main_path+'/写入文件'
# temp_path=main_path+'/临时文件'# make_file(read_path)make_file(write_path)json_dict = read_file_to_json("conversations.json")write_json_to_file("格式化导出文件.json",json_dict)del_file(write_path)for mi,chat in enumerate(json_dict):name = chat['title']name_new= get_sanitize_filename_fuc(name)print([mi,name,name_new])time_str=generate_file_name(chat)print(time_str)  # 输出:2025-02-10_0天2小时# write_json_to_file(f'{write_path    }/{mi}.json',chat)write_json_to_file(f'{write_path    }/{time_str} {name_new}.json',chat)'''3.导出得到 小json了,自己用nodejs的代码处理一下即可![在这里插入图片描述](https://i-blog.csdnimg.cn/direct/2d279d44cb3b4b878c86f22c56a581e6.png)对应的js代码如下```javascript// const fs = require('fs-extra');
// const path = require('path');
// const puppeteer = require('puppeteer-core');
// const { Document, Paragraph, TextRun, HeadingLevel, AlignmentType, Table, TableCell, TableRow, BorderStyle, ShadingType, convertInchesToTwip, Packer } = require('docx');
// const markdownIt = require('markdown-it');
// const hljs = require('highlight.js');// class DeepSeekRenderer {
//     constructor() {
//         this.md = new markdownIt({
//             html: true,
//             linkify: true,
//             typographer: true,
//             highlight: function (str, lang) {
//                 if (lang && hljs.getLanguage(lang)) {
//                     try {
//                         return '<pre class="hljs"><code>' +
//                                hljs.highlight(str, { language: lang, ignoreIllegals: true }).value +
//                                '</code></pre>';
//                     } catch (__) {}
//                 }
//                 return '<pre class="hljs"><code>' + this.md.utils.escapeHtml(str) + '</code></pre>';
//             }
//         });//         // Chrome 路径配置(Windows)
//         this.chromePaths = [
//             'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe',
//             'C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',
//             process.env.LOCALAPPDATA + '\\Google\\Chrome\\Application\\chrome.exe',
//             process.env.PROGRAMFILES + '\\Google\\Chrome\\Application\\chrome.exe',
//             process.env['PROGRAMFILES(X86)'] + '\\Google\\Chrome\\Application\\chrome.exe'
//         ];
//     }//     /**
//      * 主函数:处理 JSON 文件并生成 PDF 和 Word 文档
//      */
//     async main(jsonFile, pdfOutput, docxOutput) {
//         try {
//             console.log('🚀 开始处理对话文件...');//             // 读取和解析 JSON
//             const data = await this.loadJSON(jsonFile);
//             const conversation = this.flattenConversation(data);//             console.log(`📊 解析完成:共 ${conversation.length} 条消息`);//             // 并行生成 PDF 和 Word
//             await Promise.all([
//                 this.generatePDF(conversation, data.title || 'DeepSeek对话', pdfOutput),
//                 this.generateWord(conversation, data.title || 'DeepSeek对话', docxOutput)
//             ]);//             console.log('✅ 文档生成完成!');
//             console.log(`📄 PDF: ${pdfOutput}`);
//             console.log(`📝 Word: ${docxOutput}`);//         } catch (error) {
//             console.error('❌ 处理失败:', error);
//             throw error;
//         }
//     }//     /**
//      * 读取 JSON 文件
//      */
//     async loadJSON(filePath) {
//         try {
//             const data = await fs.readFile(filePath, 'utf8');
//             return JSON.parse(data);
//         } catch (error) {
//             throw new Error(`读取 JSON 文件失败: ${error.message}`);
//         }
//     }//     /**
//      * 扁平化对话树结构为线性消息列表
//      */
//     flattenConversation(data) {
//         const messages = [];
//         const mapping = data.mapping || {};//         // 从 root 开始遍历对话树
//         let currentNode = 'root';
//         while (currentNode && mapping[currentNode]) {
//             const node = mapping[currentNode];
//             const messageData = node.message;//             if (messageData) {
//                 const fragments = messageData.fragments || [];//                 fragments.forEach(fragment => {
//                     if (fragment.content && fragment.content.trim()) {
//                         messages.push({
//                             type: fragment.type,
//                             content: fragment.content.trim(),
//                             timestamp: messageData.inserted_at,
//                             model: messageData.model || 'unknown',
//                             nodeId: currentNode
//                         });
//                     }
//                 });
//             }//             // 移动到下一个节点
//             const children = node.children || [];
//             currentNode = children[0] || null;
//         }//         return messages;
//     }//     /**
//      * 查找 Chrome 浏览器路径
//      */
//     async findChromePath() {
//         for (const chromePath of this.chromePaths) {
//             try {
//                 await fs.access(chromePath);
//                 console.log(`🔍 找到 Chrome: ${chromePath}`);
//                 return chromePath;
//             } catch (e) {
//                 // 继续查找
//             }
//         }
//         throw new Error('未找到 Chrome 浏览器,请确保已安装 Chrome');
//     }//     /**
//      * 生成 PDF 文档
//      */
//     async generatePDF(conversation, title, outputPath) {
//         console.log('📄 开始生成 PDF...');//         const browser = await puppeteer.launch({
//             executablePath: await this.findChromePath(),
//             headless: true,
//             args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage']
//         });//         try {
//             const page = await browser.newPage();
//             const htmlContent = this.generateHTML(conversation, title);//             await page.setContent(htmlContent, { waitUntil: 'networkidle0' });//             await page.pdf({
//                 path: outputPath,
//                 format: 'A4',
//                 printBackground: true,
//                 margin: {
//                     top: '20mm',
//                     right: '20mm',
//                     bottom: '20mm',
//                     left: '20mm'
//                 }
//             });//             await page.close();
//         } finally {
//             await browser.close();
//         }
//     }//     /**
//      * 生成 HTML 内容(用于 PDF 转换)
//      */
//     generateHTML(conversation, title) {
//         const messagesHTML = conversation.map(msg => {
//             const content = this.formatContent(msg.content);
//             const timestamp = this.formatTimestamp(msg.timestamp);//             switch (msg.type) {
//                 case 'REQUEST':
//                     return `
//                     <div class="message user-message">
//                         <div class="message-content">
//                             <div class="message-text">${content}</div>
//                             <div class="message-time">${timestamp}</div>
//                         </div>
//                         <div class="message-avatar">👤</div>
//                     </div>
//                     `;//                 case 'THINK':
//                     return `
//                     <div class="message think-message">
//                         <div class="message-avatar">💭</div>
//                         <div class="message-content">
//                             <div class="message-header">思考过程</div>
//                             <div class="message-text think-text">${content}</div>
//                         </div>
//                     </div>
//                     `;//                 case 'RESPONSE':
//                     return `
//                     <div class="message ai-message">
//                         <div class="message-avatar">🤖</div>
//                         <div class="message-content">
//                             <div class="message-header">AI 回复</div>
//                             <div class="message-text">${content}</div>
//                             <div class="message-time">${timestamp}</div>
//                         </div>
//                     </div>
//                     `;//                 default:
//                     return '';
//             }
//         }).join('');//         return `
//         <!DOCTYPE html>
//         <html>
//         <head>
//             <meta charset="UTF-8">
//             <title>${title}</title>
//             <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.0/styles/github-dark.min.css">
//             <style>
//                 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');//                 * {
//                     margin: 0;
//                     padding: 0;
//                     box-sizing: border-box;
//                 }//                 body {
//                     font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
//                     line-height: 1.6;
//                     color: #1a1a1a;
//                     background: #f8f9fa;
//                     padding: 20px;
//                 }//                 .chat-container {
//                     max-width: 800px;
//                     margin: 0 auto;
//                     background: white;
//                     border-radius: 12px;
//                     box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);
//                     overflow: hidden;
//                 }//                 .chat-header {
//                     background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
//                     color: white;
//                     padding: 30px;
//                     text-align: center;
//                 }//                 .chat-title {
//                     font-size: 24px;
//                     font-weight: 700;
//                     margin-bottom: 8px;
//                 }//                 .chat-subtitle {
//                     font-size: 14px;
//                     opacity: 0.9;
//                 }//                 .messages-container {
//                     padding: 30px;
//                 }//                 .message {
//                     display: flex;
//                     margin-bottom: 20px;
//                     animation: fadeIn 0.3s ease-in;
//                 }//                 .user-message {
//                     justify-content: flex-end;
//                 }//                 .message-avatar {
//                     width: 40px;
//                     height: 40px;
//                     border-radius: 50%;
//                     display: flex;
//                     align-items: center;
//                     justify-content: center;
//                     font-size: 18px;
//                     flex-shrink: 0;
//                 }//                 .user-message .message-avatar {
//                     order: 2;
//                     margin-left: 12px;
//                     background: #007AFF;
//                 }//                 .ai-message .message-avatar,
//                 .think-message .message-avatar {
//                     margin-right: 12px;
//                     background: #34C759;
//                 }//                 .think-message .message-avatar {
//                     background: #FF9500;
//                 }//                 .message-content {
//                     max-width: 85%;
//                     padding: 16px 20px;
//                     border-radius: 18px;
//                     position: relative;
//                 }//                 .user-message .message-content {
//                     background: #007AFF;
//                     color: white;
//                     border-bottom-right-radius: 4px;
//                 }//                 .ai-message .message-content {
//                     background: white;
//                     border: 1px solid #E5E5EA;
//                     border-bottom-left-radius: 4px;
//                     box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
//                 }//                 .think-message .message-content {
//                     background: #FFF3CD;
//                     border: 1px solid #FFEAA7;
//                     border-bottom-left-radius: 4px;
//                     border-left: 4px solid #FFA500;
//                 }//                 .message-header {
//                     font-size: 12px;
//                     font-weight: 600;
//                     margin-bottom: 6px;
//                     opacity: 0.8;
//                     text-transform: uppercase;
//                 }//                 .think-message .message-header {
//                     color: #856404;
//                 }//                 .message-text {
//                     font-size: 14px;
//                     line-height: 1.5;
//                 }//                 .think-text {
//                     font-style: italic;
//                     color: #856404;
//                     font-size: 13px;
//                 }//                 .message-time {
//                     font-size: 11px;
//                     opacity: 0.7;
//                     margin-top: 6px;
//                 }//                 .user-message .message-time {
//                     text-align: right;
//                 }//                 /* 代码高亮样式 */
//                 .hljs {
//                     background: #1a1a1a !important;
//                     border-radius: 8px;
//                     padding: 16px !important;
//                     font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
//                     font-size: 13px;
//                     line-height: 1.4;
//                     margin: 12px 0;
//                 }//                 .hljs-keyword {
//                     color: #ff79c6 !important;
//                 }//                 .hljs-string {
//                     color: #f1fa8c !important;
//                 }//                 .hljs-function {
//                     color: #50fa7b !important;
//                 }//                 .hljs-comment {
//                     color: #6272a4 !important;
//                 }//                 .inline-code {
//                     background: #f1f3f4;
//                     color: #e91e63;
//                     padding: 2px 6px;
//                     border-radius: 4px;
//                     font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
//                     font-size: 12px;
//                     border: 1px solid #e1e5e9;
//                 }//                 /* Markdown 样式 */
//                 .message-text h1, .message-text h2, .message-text h3 {
//                     margin: 16px 0 8px 0;
//                     font-weight: 600;
//                 }//                 .message-text h1 { font-size: 18px; }
//                 .message-text h2 { font-size: 16px; }
//                 .message-text h3 { font-size: 14px; }//                 .message-text ul, .message-text ol {
//                     margin: 8px 0;
//                     padding-left: 24px;
//                 }//                 .message-text li {
//                     margin: 4px 0;
//                 }//                 .message-text blockquote {
//                     border-left: 4px solid #007AFF;
//                     padding-left: 16px;
//                     margin: 12px 0;
//                     color: #666;
//                     font-style: italic;
//                 }//                 @keyframes fadeIn {
//                     from { opacity: 0; transform: translateY(10px); }
//                     to { opacity: 1; transform: translateY(0); }
//                 }//                 @media print {
//                     body {
//                         background: white !important;
//                         padding: 0 !important;
//                     }//                     .chat-container {
//                         box-shadow: none !important;
//                         border-radius: 0 !important;
//                     }
//                 }
//             </style>
//         </head>
//         <body>
//             <div class="chat-container">
//                 <div class="chat-header">
//                     <h1 class="chat-title">${title}</h1>
//                     <div class="chat-subtitle">AI对话记录 · ${new Date().toLocaleDateString('zh-CN')}</div>
//                 </div>
//                 <div class="messages-container">
//                     ${messagesHTML}
//                 </div>
//             </div>
//             <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.0/highlight.min.js"></script>
//             <script>hljs.highlightAll();</script>
//         </body>
//         </html>
//         `;
//     }//     /**
//      * 格式化内容(Markdown 转 HTML)
//      */
//     formatContent(content) {
//         // 先处理代码块,避免被 Markdown 解析干扰
//         let processed = content.replace(/```(\w+)?\n([\s\S]*?)```/g, (match, lang, code) => {
//             return `\n\n\`\`\`${lang || ''}\n${code}\n\`\`\`\n\n`;
//         });//         // 使用 markdown-it 转换
//         processed = this.md.render(processed);//         return processed;
//     }//     /**
//      * 格式化时间戳
//      */
//     formatTimestamp(timestamp) {
//         try {
//             const date = new Date(timestamp);
//             return date.toLocaleString('zh-CN', {
//                 year: 'numeric',
//                 month: '2-digit',
//                 day: '2-digit',
//                 hour: '2-digit',
//                 minute: '2-digit'
//             });
//         } catch (e) {
//             return '';
//         }
//     }//     /**
//      * 生成 Word 文档
//      */
//     async generateWord(conversation, title, outputPath) {
//         console.log('📝 开始生成 Word 文档...');//         const children = [];//         // 添加标题
//         children.push(
//             new Paragraph({
//                 text: title,
//                 heading: HeadingLevel.HEADING_1,
//                 alignment: AlignmentType.CENTER,
//                 spacing: { after: 400 }
//             })
//         );//         // 添加元信息
//         children.push(
//             new Paragraph({
//                 text: `生成时间: ${new Date().toLocaleString('zh-CN')}`,
//                 alignment: AlignmentType.CENTER,
//                 spacing: { after: 600 }
//             })
//         );//         // 添加对话内容
//         for (const msg of conversation) {
//             children.push(...this.createWordMessage(msg));
//             children.push(new Paragraph({ text: '', spacing: { after: 200 } }));
//         }//         // 创建文档
//         const doc = new Document({
//             sections: [{
//                 properties: {},
//                 children: children
//             }]
//         });//         // 保存文档 - 修复这里,使用 Packer 而不是 docx.Packer
//         const buffer = await Packer.toBuffer(doc);
//         await fs.writeFile(outputPath, buffer);
//     }//     /**
//      * 创建 Word 消息元素
//      */
//     createWordMessage(msg) {
//         const elements = [];
//         const timestamp = this.formatTimestamp(msg.timestamp);//         switch (msg.type) {
//             case 'REQUEST':
//                 // 用户消息 - 右侧蓝色
//                 elements.push(
//                     new Paragraph({
//                         alignment: AlignmentType.RIGHT,
//                         spacing: { after: 200 },
//                         children: [
//                             new TextRun({
//                                 text: msg.content,
//                                 color: 'FFFFFF',
//                                 size: 20
//                             })
//                         ],
//                         border: {
//                             bottom: { style: BorderStyle.NONE },
//                             top: { style: BorderStyle.NONE },
//                             left: { style: BorderStyle.NONE },
//                             right: { style: BorderStyle.NONE }
//                         },
//                         shading: {
//                             type: ShadingType.SOLID,
//                             color: '007AFF',
//                             fill: '007AFF'
//                         }
//                     }),
//                     new Paragraph({
//                         alignment: AlignmentType.RIGHT,
//                         children: [
//                             new TextRun({
//                                 text: `👤 用户 · ${timestamp}`,
//                                 color: '666666',
//                                 size: 16,
//                                 italics: true
//                             })
//                         ]
//                     })
//                 );
//                 break;//             case 'THINK':
//                 // 思考过程 - 左侧灰色
//                 elements.push(
//                     new Paragraph({
//                         alignment: AlignmentType.LEFT,
//                         spacing: { after: 200 },
//                         children: [
//                             new TextRun({
//                                 text: '💭 思考过程: ',
//                                 color: 'FF9500',
//                                 size: 18,
//                                 bold: true
//                             }),
//                             new TextRun({
//                                 text: msg.content,
//                                 color: '8B8000',
//                                 size: 18,
//                                 italics: true
//                             })
//                         ]
//                     })
//                 );
//                 break;//             case 'RESPONSE':
//                 // AI 回复 - 左侧带样式
//                 elements.push(
//                     new Paragraph({
//                         alignment: AlignmentType.LEFT,
//                         spacing: { after: 200 },
//                         children: [
//                             new TextRun({
//                                 text: '🤖 AI回复',
//                                 color: '34C759',
//                                 size: 18,
//                                 bold: true
//                             })
//                         ]
//                     }),
//                     new Paragraph({
//                         alignment: AlignmentType.LEFT,
//                         spacing: { after: 200 },
//                         children: [
//                             new TextRun({
//                                 text: msg.content,
//                                 color: '000000',
//                                 size: 20
//                             })
//                         ],
//                         border: {
//                             bottom: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },
//                             top: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },
//                             left: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },
//                             right: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 }
//                         },
//                         shading: {
//                             type: ShadingType.SOLID,
//                             color: 'F2F2F7',
//                             fill: 'F2F2F7'
//                         }
//                     }),
//                     new Paragraph({
//                         alignment: AlignmentType.LEFT,
//                         children: [
//                             new TextRun({
//                                 text: timestamp,
//                                 color: '666666',
//                                 size: 16,
//                                 italics: true
//                             })
//                         ]
//                     })
//                 );
//                 break;
//         }//         return elements;
//     }
// }// // 并发处理函数
// async function processAllFiles() {
//     const renderer = new DeepSeekRenderer();//     try {
//         // 确保输出目录存在
//         await fs.mkdir('./output', { recursive: true });//         // 读取 data 目录下的所有 JSON 文件
//         const dataDir = './data';
//         const files = await fs.readdir(dataDir);
//         const jsonFiles = files.filter(file => file.endsWith('.json'));//         if (jsonFiles.length === 0) {
//             console.log('在 ./data 目录中没有找到 JSON 文件');
//             return;
//         }//         console.log(`找到 ${jsonFiles.length} 个 JSON 文件,开始并发处理...`);//         // 设置并发数(根据你的CPU核心数调整)
//         const CONCURRENCY = 12;//         // 创建任务数组
//         const tasks = jsonFiles.map(jsonFile => {
//             const jsonPath = path.join(dataDir, jsonFile);
//             const baseName = path.basename(jsonFile, '.json');
//             const PDF_OUTPUT = path.join('./output', `${baseName}.pdf`);
//             const DOCX_OUTPUT = path.join('./output', `${baseName}.docx`);//             return {
//                 jsonFile,
//                 jsonPath,
//                 PDF_OUTPUT,
//                 DOCX_OUTPUT
//             };
//         });//         // 并发执行函数
//         const processFile = async (task) => {
//             try {
//                 console.log(`开始处理: ${task.jsonFile}`);
//                 await renderer.main(task.jsonPath, task.PDF_OUTPUT, task.DOCX_OUTPUT);
//                 console.log(`完成: ${task.jsonFile}`);
//                 return { success: true, file: task.jsonFile };
//             } catch (error) {
//                 console.error(`处理文件 ${task.jsonFile} 时出错:`, error);
//                 return { success: false, file: task.jsonFile, error };
//             }
//         };//         // 使用并发控制执行所有任务
//         const results = await runConcurrent(tasks, processFile, CONCURRENCY);//         // 统计结果
//         const successful = results.filter(r => r.success).length;
//         const failed = results.filter(r => !r.success).length;//         console.log(`\n处理完成! 成功: ${successful}, 失败: ${failed}`);//     } catch (error) {
//         console.error('程序执行失败:', error);
//         process.exit(1);
//     }
// }// // 并发控制函数
// async function runConcurrent(tasks, worker, concurrency) {
//     const results = [];
//     const executing = [];//     for (const task of tasks) {
//         // 创建 Promise
//         const p = worker(task).then(result => {
//             results.push(result);
//             // 任务完成后从执行队列中移除
//             executing.splice(executing.indexOf(p), 1);
//         });//         executing.push(p);//         // 如果达到并发限制,等待一个任务完成
//         if (executing.length >= concurrency) {
//             await Promise.race(executing);
//         }
//     }//     // 等待所有剩余任务完成
//     await Promise.all(executing);
//     return results;
// }// // 运行程序
// if (require.main === module) {
//     processAllFiles();
// }// module.exports = DeepSeekRenderer;const fs = require('fs-extra');
const path = require('path');
const puppeteer = require('puppeteer-core');
const { Document, Paragraph, TextRun, HeadingLevel, AlignmentType, BorderStyle, ShadingType, Packer } = require('docx');
const markdownIt = require('markdown-it');
const hljs = require('highlight.js');class DeepSeekRenderer {constructor() {this.md = new markdownIt({html: true,linkify: true,typographer: true,highlight: (str, lang) => {if (lang && hljs.getLanguage(lang)) {try {return '<pre class="hljs"><code>' +hljs.highlight(str, { language: lang, ignoreIllegals: true }).value +'</code></pre>';} catch (__) {}}return '<pre class="hljs"><code>' + this.md.utils.escapeHtml(str) + '</code></pre>';}});// Chrome 路径配置this.chromePaths = ['C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe','C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe',process.env.LOCALAPPDATA + '\\Google\\Chrome\\Application\\chrome.exe',process.env.PROGRAMFILES + '\\Google\\Chrome\\Application\\chrome.exe',process.env['PROGRAMFILES(X86)'] + '\\Google\\Chrome\\Application\\chrome.exe'];this.browser = null;}/*** 初始化浏览器实例(单例模式)*/async getBrowser() {if (!this.browser) {const chromePath = await this.findChromePath();this.browser = await puppeteer.launch({executablePath: chromePath,headless: true,args: ['--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage','--disable-gpu','--disable-web-security','--disable-features=VizDisplayCompositor'],timeout: 120000 // 120秒超时});}return this.browser;}/*** 关闭浏览器*/async closeBrowser() {if (this.browser) {await this.browser.close();this.browser = null;}}/*** 主函数:处理 JSON 文件并生成 PDF 和 Word 文档*/async main(jsonFile, pdfOutput, docxOutput, retryCount = 0) {const maxRetries = 2;try {console.log('🚀 开始处理对话文件...');// 读取和解析 JSONconst data = await this.loadJSON(jsonFile);const conversation = this.flattenConversation(data);console.log(`📊 解析完成:共 ${conversation.length} 条消息`);// 生成 PDF 和 Wordawait this.generatePDF(conversation, data.title || 'DeepSeek对话', pdfOutput);await this.generateWord(conversation, data.title || 'DeepSeek对话', docxOutput);console.log('✅ 文档生成完成!');console.log(`📄 PDF: ${pdfOutput}`);console.log(`📝 Word: ${docxOutput}`);} catch (error) {console.error('❌ 处理失败:', error.message);if (retryCount < maxRetries) {console.log(`🔄 重试中... (${retryCount + 1}/${maxRetries})`);await this.delay(2000); // 等待2秒后重试return await this.main(jsonFile, pdfOutput, docxOutput, retryCount + 1);}throw error;}}/*** 延迟函数*/delay(ms) {return new Promise(resolve => setTimeout(resolve, ms));}/*** 读取 JSON 文件*/async loadJSON(filePath) {try {const data = await fs.readFile(filePath, 'utf8');return JSON.parse(data);} catch (error) {throw new Error(`读取 JSON 文件失败: ${error.message}`);}}/*** 扁平化对话树结构为线性消息列表*/flattenConversation(data) {const messages = [];const mapping = data.mapping || {};let currentNode = 'root';while (currentNode && mapping[currentNode]) {const node = mapping[currentNode];const messageData = node.message;if (messageData) {const fragments = messageData.fragments || [];fragments.forEach(fragment => {if (fragment.content && fragment.content.trim()) {messages.push({type: fragment.type,content: fragment.content.trim(),timestamp: messageData.inserted_at,model: messageData.model || 'unknown',nodeId: currentNode});}});}// 移动到下一个节点const children = node.children || [];currentNode = children[0] || null;}return messages;}/*** 查找 Chrome 浏览器路径*/async findChromePath() {for (const chromePath of this.chromePaths) {try {await fs.access(chromePath);console.log(`🔍 找到 Chrome: ${chromePath}`);return chromePath;} catch (e) {// 继续查找}}throw new Error('未找到 Chrome 浏览器,请确保已安装 Chrome');}/*** 生成 PDF 文档*/async generatePDF(conversation, title, outputPath) {console.log('📄 开始生成 PDF...');const browser = await this.getBrowser();const page = await browser.newPage();try {// 设置页面超时page.setDefaultTimeout(120000);page.setDefaultNavigationTimeout(120000);const htmlContent = this.generateHTML(conversation, title);await page.setContent(htmlContent, {waitUntil: ['networkidle0', 'domcontentloaded'],timeout: 1200000});// 等待所有资源加载完成await page.evaluate(async () => {await new Promise((resolve) => {if (document.readyState === 'complete') {resolve();} else {window.addEventListener('load', resolve, { once: true });}});});await page.pdf({path: outputPath,format: 'A4',printBackground: true,margin: {top: '20mm',right: '20mm',bottom: '20mm',left: '20mm'},timeout: 1200000});} finally {await page.close().catch(() => {}); // 忽略页面关闭错误}}/*** 生成 HTML 内容(用于 PDF 转换)*/generateHTML(conversation, title) {const messagesHTML = conversation.map(msg => {const content = this.formatContent(msg.content);const timestamp = this.formatTimestamp(msg.timestamp);switch (msg.type) {case 'REQUEST':return `<div class="message user-message"><div class="message-content"><div class="message-text">${content}</div><div class="message-time">${timestamp}</div></div><div class="message-avatar">👤</div></div>`;case 'THINK':return `<div class="message think-message"><div class="message-avatar">💭</div><div class="message-content"><div class="message-header">思考过程</div><div class="message-text think-text">${content}</div></div></div>`;case 'RESPONSE':return `<div class="message ai-message"><div class="message-avatar">🤖</div><div class="message-content"><div class="message-header">AI 回复</div><div class="message-text">${content}</div><div class="message-time">${timestamp}</div></div></div>`;default:return '';}}).join('');return `<!DOCTYPE html><html><head><meta charset="UTF-8"><title>${title}</title><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.0/styles/github-dark.min.css"><style>@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');* {margin: 0;padding: 0;box-sizing: border-box;}body {font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;line-height: 1.6;color: #1a1a1a;background: #f8f9fa;padding: 20px;}.chat-container {max-width: 800px;margin: 0 auto;background: white;border-radius: 12px;box-shadow: 0 4px 20px rgba(0, 0, 0, 0.1);overflow: hidden;}.chat-header {background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);color: white;padding: 30px;text-align: center;}.chat-title {font-size: 24px;font-weight: 700;margin-bottom: 8px;}.chat-subtitle {font-size: 14px;opacity: 0.9;}.messages-container {padding: 30px;}.message {display: flex;margin-bottom: 20px;animation: fadeIn 0.3s ease-in;}.user-message {justify-content: flex-end;}.message-avatar {width: 40px;height: 40px;border-radius: 50%;display: flex;align-items: center;justify-content: center;font-size: 18px;flex-shrink: 0;}.user-message .message-avatar {order: 2;margin-left: 12px;background: #007AFF;}.ai-message .message-avatar,.think-message .message-avatar {margin-right: 12px;background: #34C759;}.think-message .message-avatar {background: #FF9500;}.message-content {max-width: 85%;padding: 16px 20px;border-radius: 18px;position: relative;}.user-message .message-content {background: #007AFF;color: white;border-bottom-right-radius: 4px;}.ai-message .message-content {background: white;border: 1px solid #E5E5EA;border-bottom-left-radius: 4px;box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);}.think-message .message-content {background: #FFF3CD;border: 1px solid #FFEAA7;border-bottom-left-radius: 4px;border-left: 4px solid #FFA500;}.message-header {font-size: 12px;font-weight: 600;margin-bottom: 6px;opacity: 0.8;text-transform: uppercase;}.think-message .message-header {color: #856404;}.message-text {font-size: 14px;line-height: 1.5;}.think-text {font-style: italic;color: #856404;font-size: 13px;}.message-time {font-size: 11px;opacity: 0.7;margin-top: 6px;}.user-message .message-time {text-align: right;}/* 代码高亮样式 */.hljs {background: #1a1a1a !important;border-radius: 8px;padding: 16px !important;font-family: 'Consolas', 'Monaco', 'Courier New', monospace;font-size: 13px;line-height: 1.4;margin: 12px 0;}.hljs-keyword {color: #ff79c6 !important;}.hljs-string {color: #f1fa8c !important;}.hljs-function {color: #50fa7b !important;}.hljs-comment {color: #6272a4 !important;}.inline-code {background: #f1f3f4;color: #e91e63;padding: 2px 6px;border-radius: 4px;font-family: 'Consolas', 'Monaco', 'Courier New', monospace;font-size: 12px;border: 1px solid #e1e5e9;}/* Markdown 样式 */.message-text h1, .message-text h2, .message-text h3 {margin: 16px 0 8px 0;font-weight: 600;}.message-text h1 { font-size: 18px; }.message-text h2 { font-size: 16px; }.message-text h3 { font-size: 14px; }.message-text ul, .message-text ol {margin: 8px 0;padding-left: 24px;}.message-text li {margin: 4px 0;}.message-text blockquote {border-left: 4px solid #007AFF;padding-left: 16px;margin: 12px 0;color: #666;font-style: italic;}@keyframes fadeIn {from { opacity: 0; transform: translateY(10px); }to { opacity: 1; transform: translateY(0); }}@media print {body {background: white !important;padding: 0 !important;}.chat-container {box-shadow: none !important;border-radius: 0 !important;}}</style></head><body><div class="chat-container"><div class="chat-header"><h1 class="chat-title">${title}</h1><div class="chat-subtitle">AI对话记录 · ${new Date().toLocaleDateString('zh-CN')}</div></div><div class="messages-container">${messagesHTML}</div></div><script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.0.0/highlight.min.js"></script><script>document.addEventListener('DOMContentLoaded', function() {hljs.highlightAll();});</script></body></html>`;}/*** 格式化内容(Markdown 转 HTML)*/formatContent(content) {if (!content) return '';try {return this.md.render(content);} catch (error) {console.warn('Markdown 渲染失败,返回原始内容:', error.message);return content;}}/*** 格式化时间戳*/formatTimestamp(timestamp) {try {const date = new Date(timestamp);return date.toLocaleString('zh-CN', {year: 'numeric',month: '2-digit',day: '2-digit',hour: '2-digit',minute: '2-digit'});} catch (e) {return '';}}/*** 生成 Word 文档*/async generateWord(conversation, title, outputPath) {console.log('📝 开始生成 Word 文档...');const children = [];// 添加标题children.push(new Paragraph({text: title,heading: HeadingLevel.HEADING_1,alignment: AlignmentType.CENTER,spacing: { after: 400 }}));// 添加元信息children.push(new Paragraph({text: `生成时间: ${new Date().toLocaleString('zh-CN')}`,alignment: AlignmentType.CENTER,spacing: { after: 600 }}));// 添加对话内容for (const msg of conversation) {children.push(...this.createWordMessage(msg));children.push(new Paragraph({ text: '', spacing: { after: 200 } }));}// 创建文档const doc = new Document({sections: [{properties: {},children: children}]});// 保存文档const buffer = await Packer.toBuffer(doc);await fs.writeFile(outputPath, buffer);}/*** 创建 Word 消息元素*/createWordMessage(msg) {const elements = [];const timestamp = this.formatTimestamp(msg.timestamp);// 处理内容中的代码块const processedContent = this.processWordContent(msg.content);switch (msg.type) {case 'REQUEST':elements.push(new Paragraph({alignment: AlignmentType.RIGHT,spacing: { after: 100 },children: [new TextRun({text: `👤 用户 · ${timestamp}`,color: '666666',size: 16,italics: true})]}),new Paragraph({alignment: AlignmentType.RIGHT,spacing: { after: 200 },children: processedContent}));break;case 'THINK':elements.push(new Paragraph({alignment: AlignmentType.LEFT,spacing: { after: 100 },children: [new TextRun({text: '💭 思考过程 ',color: 'FF9500',size: 18,bold: true}),new TextRun({text: `· ${timestamp}`,color: '666666',size: 16,italics: true})]}),new Paragraph({alignment: AlignmentType.LEFT,spacing: { after: 200 },children: processedContent,border: {bottom: { style: BorderStyle.SINGLE, color: 'FFEAA7', size: 1 },top: { style: BorderStyle.SINGLE, color: 'FFEAA7', size: 1 },left: { style: BorderStyle.SINGLE, color: 'FFEAA7', size: 1 },right: { style: BorderStyle.SINGLE, color: 'FFEAA7', size: 1 }},shading: {type: ShadingType.SOLID,color: 'FFF3CD',fill: 'FFF3CD'}}));break;case 'RESPONSE':elements.push(new Paragraph({alignment: AlignmentType.LEFT,spacing: { after: 100 },children: [new TextRun({text: '🤖 AI回复 ',color: '34C759',size: 18,bold: true}),new TextRun({text: `· ${timestamp}`,color: '666666',size: 16,italics: true})]}),new Paragraph({alignment: AlignmentType.LEFT,spacing: { after: 200 },children: processedContent,border: {bottom: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },top: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },left: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 },right: { style: BorderStyle.SINGLE, color: 'E5E5EA', size: 1 }},shading: {type: ShadingType.SOLID,color: 'F2F2F7',fill: 'F2F2F7'}}));break;}return elements;}/*** 处理 Word 文档内容,特别处理代码块*/processWordContent(content) {if (!content) return [new TextRun({ text: '', size: 20 })];const textRuns = [];let currentText = '';let inCodeBlock = false;let codeLanguage = '';// 简单的代码块检测和处理const lines = content.split('\n');for (let i = 0; i < lines.length; i++) {const line = lines[i];// 检测代码块开始if (line.startsWith('```')) {if (!inCodeBlock) {// 开始代码块if (currentText) {textRuns.push(new TextRun({text: currentText,size: 20}));currentText = '';}inCodeBlock = true;codeLanguage = line.replace(/```/, '').trim();// 添加代码块标题textRuns.push(new TextRun({text: '\n代码块',size: 16,bold: true,color: '555555'}),new TextRun({text: codeLanguage ? ` (${codeLanguage})` : '',size: 14,color: '888888',italics: true}),new TextRun({text: '\n',size: 12}));} else {// 结束代码块if (currentText) {textRuns.push(new TextRun({text: currentText,size: 16,font: 'Consolas',color: '333333'}));currentText = '';}textRuns.push(new TextRun({text: '\n',size: 12}));inCodeBlock = false;codeLanguage = '';}continue;}if (inCodeBlock) {// 在代码块中,使用等宽字体currentText += line + '\n';} else {// 普通文本// 处理内联代码const parts = line.split('`');for (let j = 0; j < parts.length; j++) {if (j % 2 === 0) {// 普通文本currentText += parts[j];} else {// 内联代码if (currentText) {textRuns.push(new TextRun({text: currentText,size: 20}));currentText = '';}textRuns.push(new TextRun({text: parts[j],size: 18,font: 'Consolas',color: 'D63384',shading: {type: ShadingType.SOLID,color: 'F8F9FA',fill: 'F8F9FA'}}));}}currentText += '\n';}}// 添加剩余文本if (currentText) {textRuns.push(new TextRun({text: currentText.trim(),size: inCodeBlock ? 16 : 20,font: inCodeBlock ? 'Consolas' : undefined,color: inCodeBlock ? '333333' : undefined}));}return textRuns.length > 0 ? textRuns : [new TextRun({ text: content, size: 20 })];}
}// 并发处理函数
async function processAllFiles() {const renderer = new DeepSeekRenderer();try {// 确保输出目录存在await fs.mkdir('./output', { recursive: true });// 读取 data 目录下的所有 JSON 文件const dataDir = './data';const files = await fs.readdir(dataDir);const jsonFiles = files.filter(file => file.endsWith('.json'));if (jsonFiles.length === 0) {console.log('在 ./data 目录中没有找到 JSON 文件');return;}console.log(`找到 ${jsonFiles.length} 个 JSON 文件,开始处理...`);// 设置更保守的并发数const CONCURRENCY = 6;// 创建任务数组const tasks = jsonFiles.map(jsonFile => {const jsonPath = path.join(dataDir, jsonFile);const baseName = path.basename(jsonFile, '.json');const PDF_OUTPUT = path.join('./output', `${baseName}.pdf`);const DOCX_OUTPUT = path.join('./output', `${baseName}.docx`);return {jsonFile,jsonPath,PDF_OUTPUT,DOCX_OUTPUT};});// 并发执行函数const processFile = async (task) => {try {console.log(`开始处理: ${task.jsonFile}`);await renderer.main(task.jsonPath, task.PDF_OUTPUT, task.DOCX_OUTPUT);console.log(`完成: ${task.jsonFile}`);return { success: true, file: task.jsonFile };} catch (error) {console.error(`处理文件 ${task.jsonFile} 时出错:`, error.message);return { success: false, file: task.jsonFile, error: error.message };}};// 使用并发控制执行所有任务const results = await runConcurrent(tasks, processFile, CONCURRENCY);// 统计结果const successful = results.filter(r => r.success).length;const failed = results.filter(r => !r.success).length;console.log(`\n处理完成! 成功: ${successful}, 失败: ${failed}`);// 输出失败的文件const failedFiles = results.filter(r => !r.success).map(r => r.file);if (failedFiles.length > 0) {console.log('失败的文件:');failedFiles.forEach(file => console.log(`  - ${file}`));}} catch (error) {console.error('程序执行失败:', error);} finally {// 确保关闭浏览器await renderer.closeBrowser();}
}// 并发控制函数
async function runConcurrent(tasks, worker, concurrency) {const results = [];const executing = [];for (const task of tasks) {// 创建 Promiseconst p = worker(task).then(result => {results.push(result);// 任务完成后从执行队列中移除executing.splice(executing.indexOf(p), 1);});executing.push(p);// 如果达到并发限制,等待一个任务完成if (executing.length >= concurrency) {await Promise.race(executing);}}// 等待所有剩余任务完成await Promise.all(executing);return results;
}// 运行程序
if (require.main === module) {processAllFiles();
}module.exports = DeepSeekRenderer;

这样就导出ok了,渲染得到一些pdf
如图
在这里插入图片描述

剩下的自己捣鼓吧,如果实在不会,可以找我帮忙弄一下,完事

http://www.dtcms.com/a/442085.html

相关文章:

  • php 网站 项目如何用wordpress搭建个人博客
  • Prometheus监控K8S集群-ExternalName-endpoints-ElasticStack采集K8S集群日志实战
  • 解读DeepSeek-V3.2-Exp:基于MLA架构的Lightning Index如何重塑长上下文效率
  • 视频网站开发公司有哪些公司国家新闻出版
  • GitHub 热榜项目 - 日榜(2025-10-04)
  • datawhale RAG技术全栈指南 202509 第6次作业
  • 电影网站建设成本百怎么做网站
  • e语言可以做网站吗西安网站建设 翼驰
  • Redis 热点数据与冷数据解析
  • 【计算机视觉】车牌分割定位识别
  • wordpress做网站容易吗用lls建设一个网站
  • 从 3.6 亿订单表到毫秒级查询:分库分表指南
  • 网站怎样设计网页做黄金期货的网站
  • 无线网卡——WIFI7无法在Ubuntu22.04系统中使用
  • Ubuntu20.04下的Pytorch2.7.1安装
  • MySQL:C语言链接
  • 合肥市门户网站中国纪检监察报社长
  • 黑马点评秒杀优化和场景补充
  • 嵌入式硬件——基于IMX6ULL的UART(通用异步收发传输器)
  • Spark Shuffle:分布式计算的数据重分布艺术
  • 网站能看出建设时间吗网页设计工资统计
  • Postgres数据库truncate表无有效备份恢复---惜分飞
  • 【邪修玩法】如何在WPF中开放 RESTful API 服务
  • 开源 C++ QT QML 开发(二)工程结构
  • 2025生成式AI部署避坑指南:芯片卡脖子与依赖链爆炸的实战解决方案
  • 互联网新热土视角下开源AI大模型与S2B2C商城小程序的县域市场渗透策略研究
  • 外文网站制作佛山做企业网站
  • 线上网站建设需求西安做网站 怎样备案
  • 《数据密集型应用系统设计2》--OLTP/OLAP/全文搜索的数据存储与查询
  • 【ROS2学习笔记】RViz 三维可视化