用本地代理 + ZIP 打包 + Excel 命名,优雅批量下载跨域 PDF
本文提供一套可直接落地的方案:前端一键把大量 PDF 批量下载,自动避开跨域限制,并按照 Excel 中的“名称 + pdf名称”精准命名,最终一次性打包成 ZIP 保存,避免多次“另存为”。示例中不包含任何真实域名、文件名或截图,请将占位符替换为你的实际数据。
你将获得
- 一次性打包 ZIP,避免多次保存弹窗
- 跨域无感代理,返回
Content-Disposition: attachment
- Excel 命名:文件名为“承办校名称-pdf名称.pdf”
- 默认仅绑定本机 127.0.0.1,安全发布
最小项目结构
你的项目/
├─ server.mjs # 本地下载/打包代理(Node 18+)
├─ test.html # 前端触发页面(也可嵌入你的系统)
└─ 承办校设备清单.xlsx # Excel(默认读取根目录此文件)
Excel 第一张工作表包含列(列名不必完全一致,程序会智能匹配并兜底):
- 第1列:承办校名称(用于前缀)
- 另一列:pdf名称(用于命名)
- 另一列:pdf地址(URL,如
https://example.org/portal/api/public/view?url=...
)
示例(CSV 展示意图):
承办校名称,pdf名称,pdf地址
示例职业技术学院-中职1组,设备与场地信息清单.pdf,https://example.org/portal/api/public/view?url=...
...
后端:本地代理与 ZIP 打包(更安全默认值)
安装与启动(你执行):
npm i archiver xlsx
node server.mjs
- 仅监听
127.0.0.1
,避免局域网可达 - 固定读取根目录的
承办校设备清单.xlsx
,不开放任意路径 - CORS 仅允许本地来源
// server.mjs (Node 18+)
import http from 'node:http';
import { Readable } from 'node:stream';
import archiver from 'archiver';
import fs from 'node:fs';
import path from 'node:path';
import xlsx from 'xlsx';const PORT = 8787;
const HOST = '127.0.0.1'; // 仅本机
const EXCEL_FILE = path.join(process.cwd(), '承办校设备清单.xlsx');function corsHeaders(req) {const origin = req.headers.origin;const allow = new Set([undefined, 'null', `http://${HOST}:${PORT}`]);return { 'Access-Control-Allow-Origin': allow.has(origin) ? (origin || '*') : 'null' };
}function inferFilenameFromUrl(u) {try {const x = new URL(u);const qp = x.searchParams.get('url');const seg = (qp || x.pathname).split('/').filter(Boolean).pop() || 'file.pdf';return seg.toLowerCase().endsWith('.pdf') ? seg : seg + '.pdf';} catch { return 'file.pdf'; }
}function filenameFromHeaders(headers, fallback) {const cd = headers.get('content-disposition');if (cd) {const m1 = cd.match(/filename\*=([^;]+)/i);if (m1 && m1[1]) {try { return decodeURIComponent(m1[1].replace(/^UTF-8''/,'').replace(/^"|"$/g,'')); } catch {}}const m2 = cd.match(/filename="?([^";]+)"?/i);if (m2 && m2[1]) return m2[1];}return fallback;
}function normalizeFilename(name) {const base = String(name || '').trim().replace(/[\/:*?"<>|\\]+/g, '_');return /\.pdf$/i.test(base) ? base : (base ? `${base}.pdf` : 'file.pdf');
}function loadNameMapFromExcel(excelPath) {const wb = xlsx.readFile(excelPath);const sheet = wb.Sheets[wb.SheetNames[0]];const rows = xlsx.utils.sheet_to_json(sheet, { defval: '' });const map = new Map();if (!rows.length) return map;const keys = Object.keys(rows[0]);const organizerKey = keys[0]; // 第一列:承办校名称let urlKey = keys.find(k => /pdf/i.test(k) && /(地址|链接|url)/i.test(k));let nameKey = keys.find(k => /pdf/i.test(k) && /(名|名称)/.test(k));if (!urlKey) urlKey = keys.find(k => /(地址|链接|url)/i.test(k)) || keys[0];if (!nameKey) nameKey = keys.find(k => /(名|名称)/.test(k)) || keys[1] || keys[0];for (const r of rows) {const url = String(r[urlKey] || '').trim();const organizer = String(r[organizerKey] || '').trim();const baseName = String(r[nameKey] || '').trim();const joined = organizer ? `${organizer}-${baseName}` : baseName;const finalName = normalizeFilename(joined);if (url) map.set(url, finalName);}return map;
}const server = http.createServer(async (req, res) => {try {const u = new URL(req.url, `http://${HOST}:${PORT}`);// 预检if (req.method === 'OPTIONS') {res.writeHead(204, {...corsHeaders(req),'Access-Control-Allow-Methods': 'GET,POST,OPTIONS','Access-Control-Allow-Headers': 'Content-Type','Access-Control-Max-Age': '86400',});return res.end();}// 单文件代理:GET /dl?url=...if (u.pathname === '/dl') {const target = u.searchParams.get('url');if (!target || !(target.startsWith('http://') || target.startsWith('https://'))) {res.writeHead(400, corsHeaders(req)); return res.end('Bad url');}const upstream = await fetch(target, { redirect: 'follow' });if (!upstream.ok) { res.writeHead(502, corsHeaders(req)); return res.end('Upstream ' + upstream.status); }const fallback = inferFilenameFromUrl(target);const name = filenameFromHeaders(upstream.headers, fallback);const type = upstream.headers.get('content-type') || 'application/octet-stream';res.writeHead(200, {...corsHeaders(req),'Content-Type': type,'Content-Disposition': `attachment; filename="${name}"`,'Cache-Control': 'no-store','Access-Control-Expose-Headers': 'Content-Disposition',});if (Readable.fromWeb && upstream.body && typeof upstream.body.getReader === 'function') {Readable.fromWeb(upstream.body).pipe(res);} else {res.end(Buffer.from(await upstream.arrayBuffer()));}return;}// ZIP:POST /zip-excel { urls: string[], name?: string }if (u.pathname === '/zip-excel' && req.method === 'POST') {const body = await readJson(req);const inputUrls = Array.isArray(body?.urls) ? body.urls.filter(Boolean) : [];const zipName = (body?.name && body.name.trim()) || 'pdf-batch.zip';if (!inputUrls.length) { res.writeHead(400, corsHeaders(req)); return res.end('No urls'); }if (!fs.existsSync(EXCEL_FILE)) { res.writeHead(404, corsHeaders(req)); return res.end('Excel not found'); }const nameMap = loadNameMapFromExcel(EXCEL_FILE);res.writeHead(200, {...corsHeaders(req),'Content-Type': 'application/zip','Content-Disposition': `attachment; filename="${zipName}"`,'Cache-Control': 'no-store',});const archive = archiver('zip', { zlib: { level: 9 } });archive.on('error', e => { try { res.destroy(e); } catch {} });archive.pipe(res);const used = new Set();const ensureUnique = (n) => {let base = /\.pdf$/i.test(n) ? n : `${n}.pdf`;if (!used.has(base)) { used.add(base); return base; }let i = 2; let cur = base.replace(/\.pdf$/i, ` (${i}).pdf`);while (used.has(cur)) { i++; cur = base.replace(/\.pdf$/i, ` (${i}).pdf`); }used.add(cur); return cur;};for (const target of inputUrls) {try {if (!(typeof target === 'string' && (target.startsWith('http://') || target.startsWith('https://')))) continue;const upstream = await fetch(target, { redirect: 'follow' }); if (!upstream.ok) continue;const fallback = inferFilenameFromUrl(target);const preferred = nameMap.get(target) || fallback;const name = ensureUnique(preferred);const stream = (Readable.fromWeb && upstream.body && typeof upstream.body.getReader === 'function')? Readable.fromWeb(upstream.body): Readable.from(Buffer.from(await upstream.arrayBuffer()));archive.append(stream, { name });} catch {}}archive.finalize(); return;}res.writeHead(404, corsHeaders(req)); res.end('Not Found');} catch (e) {res.writeHead(500, corsHeaders(req)); res.end('Error: ' + (e?.message || String(e)));}
});server.listen(PORT, HOST, () => {console.log(`ZIP by Excel http://${HOST}:${PORT}/zip-excel (POST {"urls":[...]})`);console.log(`Single file http://${HOST}:${PORT}/dl?url=<encoded>`);
});function readJson(req) {return new Promise(resolve => {let raw = ''; req.setEncoding('utf8');req.on('data', c => { raw += c; });req.on('end', () => { try { resolve(JSON.parse(raw || '{}')); } catch { resolve({}); } });req.on('error', () => resolve({}));});
}
前端:一键打包(按 Excel 命名)
你可以在任意页面发起请求,也可用一个独立 HTML 页。核心调用如下:
<button id="zipExcel">按Excel命名ZIP下载</button>
<script>const RAW_URLS = [// 用你的真实 PDF URL 列表(示例):// 'https://example.org/portal/api/public/view?url=encoded-key-1',// 'https://example.org/portal/api/public/view?url=encoded-key-2',];async function downloadZipByExcel() {const name = `pdf-by-excel-${new Date().toISOString().slice(0,19).replace(/[-:T]/g,'')}.zip`;const resp = await fetch('http://127.0.0.1:8787/zip-excel', {method: 'POST',headers: { 'Content-Type': 'application/json' },body: JSON.stringify({ urls: RAW_URLS, name }),});if (!resp.ok) throw new Error('HTTP ' + resp.status);const blob = await resp.blob();const a = document.createElement('a');a.href = URL.createObjectURL(blob);a.download = name;document.body.appendChild(a); a.click(); a.remove();}document.getElementById('zipExcel').onclick = downloadZipByExcel;
</script>
使用步骤
- 在项目根目录放置
承办校设备清单.xlsx
(第一列承办校名称,另有 pdf名称、pdf地址列) - 启动代理(你执行):
npm i archiver xlsx node server.mjs
- 前端点击“按Excel命名ZIP下载”,保存返回的 ZIP
安全与隐私(发布建议)
- 文章与代码不包含真实域名、URL、Excel 文件名(均为通用占位)
- 服务仅绑定
127.0.0.1
,默认不可被局域网/公网访问 - Excel 路径固定为根目录
承办校设备清单.xlsx
,未开放任意路径 - 发布时不要附上真实 URL 列表与含真实名称的截图;示例中使用
https://example.org/...
如需进一步强化(可选):
- 给代理加限流/超时
- 给 URL 拉取增加并发数控制与失败日志
- ZIP 中附加
errors.txt
记录失败条目