Zotero中进行文献翻译【Windows11】
zotero官网:https://www.zotero.org/
1 在Zotero软件中安装插件
进入Zotero百科全书,依次点击:插件→翻译插件→插件介绍→Zotero 中文社区插件商店
进去后搜索pdf2zh
,然后下载后放入空白文件夹zotero-pdf2zh
打开Zotero软件后,依次点击:工具→插件→齿轮符号↘。将刚刚下载的插件放进去
2 zotero-pdf2zh配置
下述步骤是依照【zotero-pdf2zh】教程页面与B站视频教程【zotero PDF文献全文翻译,无损排版,升级版】以及本人下载安装流程记录,且是我安装过后才写的,所以有些命令我没有执行。如果更喜欢看视频教程,可以直接点击上面蓝字进入B站观看
Zotero软件中依次点击:编辑→设置:照着下图修改
2.1 server.py
回到文件夹zotero-pdf2zh内新建一个txt文件
用记事本打开该文件,粘贴下面代码后Ctrl+S保存
import os
from flask import Flask, request, jsonify, send_file
import base64
import subprocess
from pypdf import PdfWriter, PdfReader
from pypdf.generic import RectangleObject
import sysservices = [ 'bing', 'google','deepl', 'deeplx','ollama', 'xinference','openai', 'azure-openai','zhipu', 'ModelScope','silicon', 'gemini', 'azure','tencent', 'dify', 'anythingllm','argos', 'grok', 'groq','deepseek', 'openailiked', 'qwen-mt'
]class PDFTranslator:DEFAULT_CONFIG = {'port': 8888,'engine': 'pdf2zh','service': 'bing','threadNum': 4,'outputPath': './translated/','configPath': './config.json','sourceLang': 'en','targetLang': 'zh'}def __init__(self):self.app = Flask(__name__)self.setup_routes()def setup_routes(self):self.app.add_url_rule('/translate', 'translate', self.translate, methods=['POST'])self.app.add_url_rule('/cut', 'cut', self.cut_pdf, methods=['POST'])self.app.add_url_rule('/compare', 'compare', self.compare, methods=['POST'])self.app.add_url_rule('/singlecompare', 'singlecompare', self.single_compare, methods=['POST'])self.app.add_url_rule('/translatedFile/<filename>', 'download', self.download_file)class Config:def __init__(self, data):self.threads = data.get('threadNum') if data.get('threadNum') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['threadNum']self.service = data.get('service') if data.get('service') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['service']self.engine = data.get('engine') if data.get('engine') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['engine']self.outputPath = data.get('outputPath') if data.get('outputPath') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['outputPath']self.configPath = data.get('configPath') if data.get('configPath') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['configPath']self.sourceLang = data.get('sourceLang') if data.get('sourceLang') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['sourceLang']self.targetLang = data.get('targetLang') if data.get('targetLang') not in [None, ''] else PDFTranslator.DEFAULT_CONFIG['targetLang']self.skip_last_pages = data.get('skip_last_pages') if data.get('skip_last_pages') not in [None, ''] else 0self.skip_last_pages = int(self.skip_last_pages) if str(self.skip_last_pages).isdigit() else 0self.babeldoc = data.get('babeldoc', False)self.mono_cut = data.get('mono_cut', False)self.dual_cut = data.get('dual_cut', False)self.compare = data.get('compare', False) # 双栏PDF左右对照self.single_compare = data.get('single_compare', False) # 单栏PDF左右对照self.skip_subset_fonts = data.get('skip_subset_fonts', False)self.outputPath = self.get_abs_path(self.outputPath)self.configPath = self.get_abs_path(self.configPath)os.makedirs(self.outputPath, exist_ok=True)if self.engine == 'pdf2zh_next':self.babeldoc = Trueif self.engine != 'pdf2zh' and self.engine in services:print('Engine only support PDF2zh')self.engine = 'pdf2zh'print("[config]: ", self.__dict__)@staticmethoddef get_abs_path(path):return path if os.path.isabs(path) else os.path.abspath(path)def process_request(self):data = request.get_json()config = self.Config(data)self.translated_dir = config.outputPathfile_content = data.get('fileContent', '')if file_content.startswith('data:application/pdf;base64,'):file_content = file_content[len('data:application/pdf;base64,'):]input_path = os.path.join(config.outputPath, data['fileName'])with open(input_path, 'wb') as f:f.write(base64.b64decode(file_content))return input_path, configdef translate_pdf(self, input_path, config):base_name = os.path.basename(input_path).replace('.pdf', '')output_files = {'mono': os.path.join(config.outputPath, f"{base_name}-mono.pdf"),'dual': os.path.join(config.outputPath, f"{base_name}-dual.pdf")}if config.engine == 'pdf2zh':cmd = [config.engine,input_path,'--t', str(config.threads),'--output', config.outputPath,'--service', config.service,'--lang-in', config.sourceLang,'--lang-out', config.targetLang,'--config', config.configPath,]if config.skip_last_pages and config.skip_last_pages > 0: end = len(PdfReader(input_path).pages) - config.skip_last_pages # get pages num of the pdfcmd.append('-p '+str(1)+'-'+str(end))if config.skip_subset_fonts == True or config.skip_subset_fonts == 'true':cmd.append('--skip-subset-fonts')if config.babeldoc == True or config.babeldoc == 'true':cmd.append('--babeldoc')subprocess.run(cmd, check=True)if config.babeldoc == True or config.babeldoc == 'true':os.rename(os.path.join(config.outputPath, f"{base_name}.{config.targetLang}.mono.pdf"), output_files['mono'])os.rename(os.path.join(config.outputPath, f"{base_name}.{config.targetLang}.dual.pdf"), output_files['dual'])return output_files['mono'], output_files['dual']elif config.engine == 'pdf2zh_next':service = config.serviceif service == 'openailiked':service = 'openaicompatible'if service == 'tencent':service = 'tencentmechinetranslation'if service == 'ModelScope':service = 'modelscope'if service == 'silicon':service = 'siliconflow'if service == 'qwen-mt':service = 'qwenmt'cmd = [config.engine,input_path,'--output', config.outputPath,'--'+service,'--lang-in', config.sourceLang,'--lang-out', config.targetLang,'--qps', str(config.threads),]if os.path.exists(config.configPath) and config.configPath != '' and len(config.configPath) > 4 and 'json' not in config.configPath:cmd.append('--config')cmd.append(config.configPath)if config.skip_last_pages and config.skip_last_pages > 0:end = len(PdfReader(input_path).pages) - config.skip_last_pagescmd.append('--pages')cmd.append(f'{1}-{end}')print("pdf2zh_next command: ", cmd)subprocess.run(cmd, check=True)no_watermark_mono = os.path.join(config.outputPath, f"{base_name}.no_watermark.{config.targetLang}.mono.pdf")no_watermark_dual = os.path.join(config.outputPath, f"{base_name}.no_watermark.{config.targetLang}.dual.pdf")if os.path.exists(no_watermark_mono) and os.path.exists(no_watermark_dual):os.rename(no_watermark_mono, output_files['mono'])os.rename(no_watermark_dual, output_files['dual'])else: os.rename(os.path.join(config.outputPath, f"{base_name}.{config.targetLang}.mono.pdf"), output_files['mono'])os.rename(os.path.join(config.outputPath, f"{base_name}.{config.targetLang}.dual.pdf"), output_files['dual'])return output_files['mono'], output_files['dual']else:raise ValueError(f"Unsupported engine: {config.engine}")# 工具函数, 用于将pdf左右拼接def merge_pages_side_by_side(self, input_pdf, output_pdf):reader = PdfReader(input_pdf)writer = PdfWriter()num_pages = len(reader.pages)i = 0while i < num_pages:left_page = reader.pages[i]left_width = left_page.mediabox.widthheight = left_page.mediabox.heightif i + 1 < num_pages:right_page = reader.pages[i + 1]right_width = right_page.mediabox.widthelse:right_page = Noneright_width = left_width # Assume same widthnew_width = left_width + right_widthnew_page = writer.add_blank_page(width=new_width, height=height)new_page.merge_transformed_page(left_page, (1, 0, 0, 1, 0, 0))if right_page:new_page.merge_transformed_page(right_page, (1, 0, 0, 1, left_width, 0))i += 2with open(output_pdf, "wb") as f:writer.write(f)# 工具函数, 用于切割双栏pdf文件def split_pdf(self, input_pdf, output_pdf, compare=False, babeldoc=False):writer = PdfWriter()if ('dual' in input_pdf or compare == True) and babeldoc == False:readers = [PdfReader(input_pdf) for _ in range(4)]for i in range(0, len(readers[0].pages), 2):original_media_box = readers[0].pages[i].mediaboxwidth = original_media_box.widthheight = original_media_box.heightleft_page_1 = readers[0].pages[i]offset = width/20ratio = 4.7for box in ['mediabox', 'cropbox', 'bleedbox', 'trimbox', 'artbox']:setattr(left_page_1, box, RectangleObject((offset, 0, width/2+offset/ratio, height)))left_page_2 = readers[1].pages[i+1]for box in ['mediabox', 'cropbox', 'bleedbox', 'trimbox', 'artbox']:setattr(left_page_2, box, RectangleObject((offset, 0, width/2+offset/ratio, height)))right_page_1 = readers[2].pages[i]for box in ['mediabox', 'cropbox', 'bleedbox', 'trimbox', 'artbox']:setattr(right_page_1, box, RectangleObject((width/2-offset/ratio, 0, width-offset, height)))right_page_2 = readers[3].pages[i+1]for box in ['mediabox', 'cropbox', 'bleedbox', 'trimbox', 'artbox']:setattr(right_page_2, box, RectangleObject((width/2-offset/ratio, 0, width-offset, height)))if compare == True:blank_page_1 = writer.add_blank_page(width, height)blank_page_1.merge_transformed_page(left_page_1, (1, 0, 0, 1, 0, 0))blank_page_1.merge_transformed_page(left_page_2, (1, 0, 0, 1, width / 2, 0))blank_page_2 = writer.add_blank_page(width, height)blank_page_2.merge_transformed_page(right_page_1, (1, 0, 0, 1, -width / 2, 0))blank_page_2.merge_transformed_page(right_page_2, (1, 0, 0, 1, 0, 0))else:writer.add_page(left_page_1)writer.add_page(left_page_2)writer.add_page(right_page_1)writer.add_page(right_page_2)else: readers = [PdfReader(input_pdf) for _ in range(2)]for i in range(len(readers[0].pages)):page = readers[0].pages[i]original_media_box = page.mediaboxwidth = original_media_box.widthheight = original_media_box.heightw_offset = width/20w_ratio = 4.7h_offset = height/20left_page = readers[0].pages[i]left_page.mediabox = RectangleObject((w_offset, h_offset, width/2+w_offset/w_ratio, height-h_offset))right_page = readers[1].pages[i]right_page.mediabox = RectangleObject((width/2-w_offset/w_ratio, h_offset, width-w_offset, height-h_offset))writer.add_page(left_page)writer.add_page(right_page)with open(output_pdf, "wb") as output_file:writer.write(output_file)def translate(self):print("\n########## translating ##########")try:input_path, config = self.process_request()mono, dual = self.translate_pdf(input_path, config)processed_files = []if config.mono_cut == True or config.mono_cut == "true":output = mono.replace('-mono.pdf', '-mono-cut.pdf')self.split_pdf(mono, output)processed_files.append(output)if config.dual_cut == True or config.dual_cut == "true":output = dual.replace('-dual.pdf', '-dual-cut.pdf')self.split_pdf(dual, output, False, config.babeldoc == True or config.babeldoc == "true")processed_files.append(output)if config.babeldoc == False or config.babeldoc == "false":if config.compare == True or config.compare == "true":output = dual.replace('-dual.pdf', '-compare.pdf')self.split_pdf(dual, output, compare=True, babeldoc=False)processed_files.append(output)if config.single_compare == True or config.single_compare == "true":output = dual.replace('-dual.pdf', '-single-compare.pdf')self.merge_pages_side_by_side(dual, output)processed_files.append(output)return jsonify({'status': 'success', 'processed': processed_files}), 200except Exception as e:print("[translate error]: ", e)return jsonify({'status': 'error', 'message': str(e)}), 500def cut_pdf(self):print("\n########## cutting ##########")try:input_path, config = self.process_request()output_path = input_path.replace('.pdf', '-cut.pdf')self.split_pdf(input_path, output_path) # 保留原逻辑return jsonify({'status': 'success', 'path': output_path}), 200except Exception as e:print("[cut error]: ", e)return jsonify({'status': 'error', 'message': str(e)}), 500def single_compare(self):print("\n########## single compare ##########")try:input_path, config = self.process_request()if '-mono.pdf' in input_path:raise Exception('Please provide dual PDF or origial PDF for dual-comparison')if not 'dual' in input_path:_, dual = self.translate_pdf(input_path, config)input_path = dualoutput_path = input_path.replace('-dual.pdf', '-single-compare.pdf')self.merge_pages_side_by_side(input_path, output_path)return jsonify({'status': 'success', 'path': output_path}), 200except Exception as e:print("[compare error]: ", e)return jsonify({'status': 'error', 'message': str(e)}), 500def compare(self):print("\n########## compare ##########")try:input_path, config = self.process_request()if 'mono' in input_path:raise Exception('Please provide dual PDF or origial PDF for dual-comparison')if not 'dual' in input_path:_, dual = self.translate_pdf(input_path, config)input_path = dualoutput_path = input_path.replace('-dual.pdf', '-compare.pdf')self.split_pdf(input_path, output_path, compare=True)return jsonify({'status': 'success', 'path': output_path}), 200except Exception as e:print("[compare error]: ", e)return jsonify({'status': 'error', 'message': str(e)}), 500def download_file(self, filename):file_path = os.path.join(self.translated_dir, filename)return send_file(file_path, as_attachment=True) if os.path.exists(file_path) else ('File not found', 404)def run(self):port = int(sys.argv[1]) if len(sys.argv) > 1 else self.DEFAULT_CONFIG['port']self.app.run(host='0.0.0.0', port=port)if __name__ == '__main__':translator = PDFTranslator()translator.run()
并重命名为server.py
若是没有显示文件后缀,就依次点击:查看→显示→文件扩展名
2.2 translated文件夹
新建一个空文件夹translated
2.3 进入终端
2.3.1 安装uv
# 方法一: 使用pip安装uv
pip install uv
# 方法二: 下载脚本安装
# macOS/Linux
wget -qO- https://astral.sh/uv/install.sh | sh
# windows
powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"
我尝试过方法一,不行。所以我fq用的方法二
安装好后提示需要配置环境变量
1. 右键点击“桌面” → “个性化” → 搜索“高级系统设置” → “环境变量”。2. 在“用户变量”或“系统变量”中找到 `Path`,点击“编辑”。3. 点击“新建”,输入 `C:\Users\FiveTNNO\.local\bin`,然后确认。【FiveTNNO是我的电脑用户名】4. 关闭并重新打开命令行窗口,运行 `uv --version` 测试。
2.3.2 uv安装Python 3.13
依次运行下面两行代码
uv python install 3.13 # 安装3.13版本python
uv venv --python 3.13 # 创建3.13版本python虚拟环境
2.3.3 启动虚拟环境
.\.venv\Scripts\activate
安装需要的包
uv pip install pdf2zh_next pypdf flask -i https://pypi.tuna.tsinghua.edu.cn/simple
粘贴进去后enter,等待就好
2.3.4 测试安装并启动gui
在命令行输入pdf2zh_next --gui
进入图形界面
拖一个英文文献进去,照着下图改【选bing,别选Google】,最后Translate,等待几分钟
翻译完后是这个样子
回到终端,连续Ctrl+C几次,终止程序。然后粘贴下述代码
copy "%USERPROFILE%\.config\pdf2zh\config.v3.toml" config.toml
运行后会发现多了个config.toml文件。还有个pdf2zh_files文件夹应该是刚刚的翻译操作得到的,不用管它
2.3.5 Zotero软件中进行英文文献翻译
在终端执行
uv run python server.py #注意了:这行命令执行了之后才能在Zotero软件中进行翻译操作
2.4 回到Zotero软件进行翻译
随便选择一篇英文文献,右键后单击翻译PDF,等待即可。
翻译结束后我的终端是这样的,不晓得为啥我的translate是77。但是不影响翻译结果我就没管了
没有文献需要翻译了就回到终端Ctrl+C结束就好了
2.5 补充
如果不想每次要翻译文献的时候都进入终端输入那行代码,可以移步这个视频的10:53开始观看