当前位置：首页 > news >正文

用本地代理 + ZIP 打包 + Excel 命名，优雅批量下载跨域 PDF

news 2025/8/19 9:05:57

本文提供一套可直接落地的方案：前端一键把大量 PDF 批量下载，自动避开跨域限制，并按照 Excel 中的“名称 + pdf名称”精准命名，最终一次性打包成 ZIP 保存，避免多次“另存为”。示例中不包含任何真实域名、文件名或截图，请将占位符替换为你的实际数据。

你将获得

一次性打包 ZIP，避免多次保存弹窗
跨域无感代理，返回 Content-Disposition: attachment
Excel 命名：文件名为“承办校名称-pdf名称.pdf”
默认仅绑定本机 127.0.0.1，安全发布

最小项目结构

你的项目/
├─ server.mjs                  # 本地下载/打包代理（Node 18+）
├─ test.html                   # 前端触发页面（也可嵌入你的系统）
└─ 承办校设备清单.xlsx           # Excel（默认读取根目录此文件）

Excel 第一张工作表包含列（列名不必完全一致，程序会智能匹配并兜底）：

第1列：承办校名称（用于前缀）
另一列：pdf名称（用于命名）
另一列：pdf地址（URL，如 https://example.org/portal/api/public/view?url=...）

示例（CSV 展示意图）：

承办校名称,pdf名称,pdf地址
示例职业技术学院-中职1组,设备与场地信息清单.pdf,https://example.org/portal/api/public/view?url=...
...

后端：本地代理与 ZIP 打包（更安全默认值）

安装与启动（你执行）：

npm i archiver xlsx
node server.mjs

仅监听 127.0.0.1，避免局域网可达
固定读取根目录的 承办校设备清单.xlsx，不开放任意路径
CORS 仅允许本地来源

// server.mjs (Node 18+)
import http from 'node:http';
import { Readable } from 'node:stream';
import archiver from 'archiver';
import fs from 'node:fs';
import path from 'node:path';
import xlsx from 'xlsx';const PORT = 8787;
const HOST = '127.0.0.1'; // 仅本机
const EXCEL_FILE = path.join(process.cwd(), '承办校设备清单.xlsx');function corsHeaders(req) {const origin = req.headers.origin;const allow = new Set([undefined, 'null', `http://${HOST}:${PORT}`]);return { 'Access-Control-Allow-Origin': allow.has(origin) ? (origin || '*') : 'null' };
}function inferFilenameFromUrl(u) {try {const x = new URL(u);const qp = x.searchParams.get('url');const seg = (qp || x.pathname).split('/').filter(Boolean).pop() || 'file.pdf';return seg.toLowerCase().endsWith('.pdf') ? seg : seg + '.pdf';} catch { return 'file.pdf'; }
}function filenameFromHeaders(headers, fallback) {const cd = headers.get('content-disposition');if (cd) {const m1 = cd.match(/filename\*=([^;]+)/i);if (m1 && m1[1]) {try { return decodeURIComponent(m1[1].replace(/^UTF-8''/,'').replace(/^"|"$/g,'')); } catch {}}const m2 = cd.match(/filename="?([^";]+)"?/i);if (m2 && m2[1]) return m2[1];}return fallback;
}function normalizeFilename(name) {const base = String(name || '').trim().replace(/[\/:*?"<>|\\]+/g, '_');return /\.pdf$/i.test(base) ? base : (base ? `${base}.pdf` : 'file.pdf');
}function loadNameMapFromExcel(excelPath) {const wb = xlsx.readFile(excelPath);const sheet = wb.Sheets[wb.SheetNames[0]];const rows = xlsx.utils.sheet_to_json(sheet, { defval: '' });const map = new Map();if (!rows.length) return map;const keys = Object.keys(rows[0]);const organizerKey = keys[0]; // 第一列：承办校名称let urlKey = keys.find(k => /pdf/i.test(k) && /(地址|链接|url)/i.test(k));let nameKey = keys.find(k => /pdf/i.test(k) && /(名|名称)/.test(k));if (!urlKey) urlKey = keys.find(k => /(地址|链接|url)/i.test(k)) || keys[0];if (!nameKey) nameKey = keys.find(k => /(名|名称)/.test(k)) || keys[1] || keys[0];for (const r of rows) {const url = String(r[urlKey] || '').trim();const organizer = String(r[organizerKey] || '').trim();const baseName = String(r[nameKey] || '').trim();const joined = organizer ? `${organizer}-${baseName}` : baseName;const finalName = normalizeFilename(joined);if (url) map.set(url, finalName);}return map;
}const server = http.createServer(async (req, res) => {try {const u = new URL(req.url, `http://${HOST}:${PORT}`);// 预检if (req.method === 'OPTIONS') {res.writeHead(204, {...corsHeaders(req),'Access-Control-Allow-Methods': 'GET,POST,OPTIONS','Access-Control-Allow-Headers': 'Content-Type','Access-Control-Max-Age': '86400',});return res.end();}// 单文件代理：GET /dl?url=...if (u.pathname === '/dl') {const target = u.searchParams.get('url');if (!target || !(target.startsWith('http://') || target.startsWith('https://'))) {res.writeHead(400, corsHeaders(req)); return res.end('Bad url');}const upstream = await fetch(target, { redirect: 'follow' });if (!upstream.ok) { res.writeHead(502, corsHeaders(req)); return res.end('Upstream ' + upstream.status); }const fallback = inferFilenameFromUrl(target);const name = filenameFromHeaders(upstream.headers, fallback);const type = upstream.headers.get('content-type') || 'application/octet-stream';res.writeHead(200, {...corsHeaders(req),'Content-Type': type,'Content-Disposition': `attachment; filename="${name}"`,'Cache-Control': 'no-store','Access-Control-Expose-Headers': 'Content-Disposition',});if (Readable.fromWeb && upstream.body && typeof upstream.body.getReader === 'function') {Readable.fromWeb(upstream.body).pipe(res);} else {res.end(Buffer.from(await upstream.arrayBuffer()));}return;}// ZIP：POST /zip-excel { urls: string[], name?: string }if (u.pathname === '/zip-excel' && req.method === 'POST') {const body = await readJson(req);const inputUrls = Array.isArray(body?.urls) ? body.urls.filter(Boolean) : [];const zipName = (body?.name && body.name.trim()) || 'pdf-batch.zip';if (!inputUrls.length) { res.writeHead(400, corsHeaders(req)); return res.end('No urls'); }if (!fs.existsSync(EXCEL_FILE)) { res.writeHead(404, corsHeaders(req)); return res.end('Excel not found'); }const nameMap = loadNameMapFromExcel(EXCEL_FILE);res.writeHead(200, {...corsHeaders(req),'Content-Type': 'application/zip','Content-Disposition': `attachment; filename="${zipName}"`,'Cache-Control': 'no-store',});const archive = archiver('zip', { zlib: { level: 9 } });archive.on('error', e => { try { res.destroy(e); } catch {} });archive.pipe(res);const used = new Set();const ensureUnique = (n) => {let base = /\.pdf$/i.test(n) ? n : `${n}.pdf`;if (!used.has(base)) { used.add(base); return base; }let i = 2; let cur = base.replace(/\.pdf$/i, ` (${i}).pdf`);while (used.has(cur)) { i++; cur = base.replace(/\.pdf$/i, ` (${i}).pdf`); }used.add(cur); return cur;};for (const target of inputUrls) {try {if (!(typeof target === 'string' && (target.startsWith('http://') || target.startsWith('https://')))) continue;const upstream = await fetch(target, { redirect: 'follow' }); if (!upstream.ok) continue;const fallback = inferFilenameFromUrl(target);const preferred = nameMap.get(target) || fallback;const name = ensureUnique(preferred);const stream = (Readable.fromWeb && upstream.body && typeof upstream.body.getReader === 'function')? Readable.fromWeb(upstream.body): Readable.from(Buffer.from(await upstream.arrayBuffer()));archive.append(stream, { name });} catch {}}archive.finalize(); return;}res.writeHead(404, corsHeaders(req)); res.end('Not Found');} catch (e) {res.writeHead(500, corsHeaders(req)); res.end('Error: ' + (e?.message || String(e)));}
});server.listen(PORT, HOST, () => {console.log(`ZIP by Excel  http://${HOST}:${PORT}/zip-excel  (POST {"urls":[...]})`);console.log(`Single file   http://${HOST}:${PORT}/dl?url=<encoded>`);
});function readJson(req) {return new Promise(resolve => {let raw = ''; req.setEncoding('utf8');req.on('data', c => { raw += c; });req.on('end', () => { try { resolve(JSON.parse(raw || '{}')); } catch { resolve({}); } });req.on('error', () => resolve({}));});
}

前端：一键打包（按 Excel 命名）

你可以在任意页面发起请求，也可用一个独立 HTML 页。核心调用如下：

<button id="zipExcel">按Excel命名ZIP下载</button>
<script>const RAW_URLS = [// 用你的真实 PDF URL 列表（示例）：// 'https://example.org/portal/api/public/view?url=encoded-key-1',// 'https://example.org/portal/api/public/view?url=encoded-key-2',];async function downloadZipByExcel() {const name = `pdf-by-excel-${new Date().toISOString().slice(0,19).replace(/[-:T]/g,'')}.zip`;const resp = await fetch('http://127.0.0.1:8787/zip-excel', {method: 'POST',headers: { 'Content-Type': 'application/json' },body: JSON.stringify({ urls: RAW_URLS, name }),});if (!resp.ok) throw new Error('HTTP ' + resp.status);const blob = await resp.blob();const a = document.createElement('a');a.href = URL.createObjectURL(blob);a.download = name;document.body.appendChild(a); a.click(); a.remove();}document.getElementById('zipExcel').onclick = downloadZipByExcel;
</script>