3 分钟学会使用 Puppeteer 将 HTML 转 PDF
方案一、wkhtmltopdf + python
- https://wkhtmltopdf.org/
不推荐,实测效果不佳,2020已停止维护。
sudo apt-get install xfonts-75dpi
sudo dpkg -i wkhtmltox_0.12.6.1-2.jammy_amd64.deb
# 使用示例
wkhtmltopdf http://google.com google.pdf
方案二、Puppeteer
推荐方案,效果最佳,高度还原浏览器打印效果
pnpm install puppeteer-core# which google-chrome
/usr/bin/google-chrome
const puppeteer = require('puppeteer-core');
const fs = require('fs');
const path = require('path');// 自动检测 Chrome 安装路径(支持 Linux/Windows)
const CHROME_PATHS = ['/usr/bin/google-chrome', // Debian/Ubuntu 默认路径'/opt/google/chrome/chrome', // 二进制实际位置'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
];(async () => {try {// 自动查找可用浏览器路径const executablePath = CHROME_PATHS.find(p => fs.existsSync(p));if (!executablePath) throw new Error('未找到 Chrome 浏览器');const browser = await puppeteer.launch({headless: 'new', // 启用新一代无头模式executablePath,args: ['--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage','--font-render-hinting=medium' // 提升中文字体渲染质量]});const page = await browser.newPage();// 加载内容(支持本地文件或远程URL)const htmlPath = path.resolve(__dirname, '/home/sam/下载/blender_python_reference_4_4/aud.html');await page.goto(`file://${htmlPath}`, {waitUntil: 'networkidle2',timeout: 30000});// 组合方案:CSS隐藏 + JS移除await page.addStyleTag({content: '.skip-to-content { display: none !important; }'});await page.evaluate(() => {const skipLink = document.querySelector('a.skip-to-content');if (skipLink) skipLink.remove();});// PDF生成配置await page.pdf({path: 'report.pdf',format: 'A4',printBackground: true,displayHeaderFooter: false,margin: {top: '25mm',right: '15mm',bottom: '25mm',left: '15mm'}});console.log('PDF 导出完成');await browser.close();} catch (error) {console.error('运行失败:', error.message);process.exit(1);}
})();
浏览器导出效果:
Puppeteer 导出效果:
补充:多线程处理导出
pnpm add tinypool
// main.mjs
import Tinypool from 'tinypool';
import fs from 'fs/promises'
import path from 'path'// 创建一个 Tinypool 实例
const pool = new Tinypool({filename: new URL('./worker.mjs', import.meta.url).href, // 指定工作线程文件minThreads: 10, // 设置最小线程数maxThreads: 20, // 设置最大线程数
});// 遍历目录
const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/sldworksapi';
// const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/swcommands';
// const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/swconst';async function getAllFilesAsync(dirPath) {let filesList = [];async function traverse(currentPath) {const files = await fs.readdir(currentPath);for (const file of files) {const fullPath = path.join(currentPath, file);const stats = await fs.stat(fullPath);if (stats.isDirectory()) {await traverse(fullPath); // 递归处理子目录} else if (stats.isFile()) {filesList.push(fullPath); // 收集文件路径}}}await traverse(dirPath);return filesList;
}async function run() {// 遍历文件夹const filesList = await getAllFilesAsync(folderPath);// 使用 Promise.all 并行处理任务const promises = filesList.map(file => pool.run({ htmlPath: file, folderPath: folderPath }));// 等待所有任务完成await Promise.all(promises);// 销毁线程池await pool.destroy();
}await run();
// worker.mjs
import puppeteer from 'puppeteer-core'
import fs1 from 'fs'
import path from 'path'
import { fileURLToPath } from 'url';const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);// 自动检测 Chrome 安装路径(支持 Linux/Windows)
const CHROME_PATHS = ['/usr/bin/google-chrome', // Debian/Ubuntu 默认路径'/opt/google/chrome/chrome', // 二进制实际位置'C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe'
];async function html2pdf(file, folderPath, executablePath) {let flag1 = 0;let flag2 = 0;// 判断文件类型if (file.slice(-4) != '.htm') {flag1 = 1;}if (file.slice(-5) != '.html') {flag2 = 1}if (flag2 + flag1 == 2) {console.log('--> DOC IS NOT A TYPE OF HTML. SKIP');return;}// 判断是否已导出const outputPath = '/home/sam/MyWorkSpace/PDF/SW/sldworksapi' + file.replace(folderPath, '').replace('.htm', '').replace('.html', '') + '.pdf';// const outputPath = '/home/sam/MyWorkSpace/PDF/SW/swcommands' + file.replace(folderPath, '').replace('.htm', '') .replace('.html', '') + '.pdf';// const outputPath = '/home/sam/MyWorkSpace/PDF/SW/swconst' + file.replace(folderPath, '').replace('.htm', '') .replace('.html', '') + '.pdf';if (fs1.existsSync(outputPath)) {console.log('-> Exists. SKIP');return;}// 加载浏览器实例const browser = await puppeteer.launch({headless: 'new', // 启用新一代无头模式executablePath,args: ['--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage','--font-render-hinting=medium' // 提升中文字体渲染质量]});const page = await browser.newPage();// 加载内容(支持本地文件或远程URL)const htmlPath = path.resolve(__dirname, file);await page.goto(`file://${htmlPath}`, {waitUntil: 'networkidle2',timeout: 30000});// PDF生成配置await page.pdf({path: outputPath,format: 'A4',printBackground: false,displayHeaderFooter: false,landscape: true,// 横向打印margin: {top: '25mm',right: '15mm',bottom: '25mm',left: '15mm'}});console.log('PDF导出完成');await browser.close();}export default async ({ htmlPath,folderPath }) => {// 自动查找可用浏览器路径const executablePath = CHROME_PATHS.find(p => fs1.existsSync(p));if (!executablePath) throw new Error('未找到 Chrome 浏览器');// 遍历目录// const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/sldworksapi';// const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/swcommands';// const folderPath = '/home/sam/MyWorkSpace/SOLIDWORKS/html/swconst';await html2pdf(htmlPath, folderPath, executablePath);
};
横向打印 PDF,效果不错,直接转了一万多个文档。