当前位置: 首页 > news >正文

支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片工具类

支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片

1.依赖

<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>5.2.5</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>5.2.5</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>5.2.5</version> <!-- 如果有 DOCX 相关 -->
</dependency>
<dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>3.0.5</version>
</dependency>
<!-- Word转PDF:Docx4J核心 --><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-ReferenceImpl</artifactId><version>8.3.10</version></dependency><!-- Word转PDF:FO导出支持(用于PDF渲染) --><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-export-fo</artifactId><version>8.3.10</version></dependency><dependency><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId><version>2.3.1</version></dependency><dependency><groupId>org.glassfish.jaxb</groupId><artifactId>jaxb-runtime</artifactId><version>2.3.3</version></dependency><!-- 如果 Docx4J 版本旧,还可能需激活模块 --><dependency><groupId>com.sun.activation</groupId><artifactId>javax.activation</artifactId><version>1.2.0</version></dependency>

2.代码

import lombok.extern.slf4j.Slf4j;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;/*** @Description 文档转长图片工具类* 支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片*/@Slf4j
public class DocumentToImageUtil {/*** 将文档(Word或PDF)转换为长图片字节数组* @param content 文档字节数组* @param fileType 文件类型(doc, docx, pdf)* @return PNG图片字节数组* @throws IOException 转换失败*/public byte[] convertToStitchedImage(byte[] content, String fileType) throws IOException {byte[] pdfContent = content;if (fileType.toLowerCase().contains("doc") || fileType.toLowerCase().contains("docx")) {pdfContent = convertWordToPdf(content, fileType.toLowerCase());}return convertPdfToStitchedImage(pdfContent);}/*** Word转PDF(支持doc和docx)*/private byte[] convertWordToPdf(byte[] wordContent, String fileType) throws IOException {if (fileType.toLowerCase().contains("docx")) {// DOCX转PDF使用Docx4Jtry (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent)) {WordprocessingMLPackage loadedPackage = WordprocessingMLPackage.load(bais);ByteArrayOutputStream baos = new ByteArrayOutputStream();Docx4J.toPDF(loadedPackage, baos);return baos.toByteArray();} catch (Exception e) {throw new IOException("DOCX转PDF失败", e);}} else if (fileType.toLowerCase().contains("doc")) {// DOC转PDF使用POI HWPF + Transformer + FOPtry (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent);HWPFDocument document = new HWPFDocument(bais)) {// 1. 使用 WordToFoConverter 生成 FO Document(移除 setFontResolver,使用默认)Document foDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();WordToFoConverter converter = new WordToFoConverter(foDoc);// 注意:在 POI 5.2.5 中,setFontResolver 已弃用或移除,使用默认字体解析器converter.processDocument(document);// 2. Transformer 将 FO Document 序列化为字节流ByteArrayOutputStream foBaos = new ByteArrayOutputStream();TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "xml"); // 输出 XML-FOserializer.transform(new DOMSource(foDoc), new StreamResult(foBaos));byte[] foBytes = foBaos.toByteArray();// 3. FOP 将 FO 转 PDF(使用带URI参数的newInstance,避免解析问题)FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); // 标准方式,设置base URIByteArrayOutputStream pdfBaos = new ByteArrayOutputStream();Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, pdfBaos);Transformer transformer = tf.newTransformer(); // 身份转换器Source src = new StreamSource(new ByteArrayInputStream(foBytes));Result res = new SAXResult(fop.getDefaultHandler());transformer.transform(src, res);return pdfBaos.toByteArray();} catch (Exception e) {throw new IOException("DOC转PDF失败", e);}} else {throw new IOException("不支持的文件类型: " + fileType);}}/*** PDF转垂直拼接的长图片*/private byte[] convertPdfToStitchedImage(byte[] pdfContent) throws IOException {try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfContent);RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(bais);PDDocument document = Loader.loadPDF(buffer)) {PDFRenderer renderer = new PDFRenderer(document);int numPages = document.getNumberOfPages();if (numPages == 0) {return null;}List<BufferedImage> pageImages = new ArrayList<>();int totalHeight = 0;int maxWidth = 0;for (int i = 0; i < numPages; i++) {BufferedImage pageImage = renderer.renderImageWithDPI(i, 150);pageImages.add(pageImage);totalHeight += pageImage.getHeight();maxWidth = Math.max(maxWidth, pageImage.getWidth());}BufferedImage stitchedImage = new BufferedImage(maxWidth, totalHeight, BufferedImage.TYPE_INT_RGB);Graphics2D g2d = stitchedImage.createGraphics();g2d.setBackground(java.awt.Color.WHITE);g2d.clearRect(0, 0, maxWidth, totalHeight);int yOffset = 0;for (BufferedImage pageImage : pageImages) {g2d.drawImage(pageImage, 0, yOffset, null);yOffset += pageImage.getHeight();}g2d.dispose();ByteArrayOutputStream baos = new ByteArrayOutputStream();ImageIO.write(stitchedImage, "png", baos);return baos.toByteArray();} catch (Exception e) {throw new IOException("PDF转图片失败", e);}}
}

3.如何使用

byte[] imageBytes = new DocumentToImageUtil().convertToStitchedImage(content, fileType.toLowerCase());
http://www.dtcms.com/a/482205.html

相关文章:

  • JAVA同城预约服务家政服务美容美发洗车保洁搬家维修家装系统源码小程序+公众号+h5
  • 正规拼多多代运营公司如何优化网站结构
  • 三层前馈神经网络实战:MNIST手写数字识别
  • 深度学习(四)
  • 学习HAL库STM32F103C8T6(MQTT报文)
  • 【C++】C++11特性学习(1)——列表初始化 | 右值引用与移动语义
  • 网站布局 种类手机商城页面设计
  • 如何建设手机端网站电力公司建设安全文化
  • 红色 VR 大空间:技术赋能红色文化传承的运营价值与实践路径
  • 网络协议工程 - eNSP及相关软件安装 - [eNSP, VirtualBox, WinPcap, Wireshark, Win7]
  • WHAT - 前端性能指标(交互和响应性能指标)
  • 专业的媒体发稿网
  • dede旅游网站模板wordpress教学主题
  • 做网站的技术性说明怎么自己做微网站吗
  • VScode安装以及C/C++环境配置20251014
  • 黄页网站大全通俗易懂wordpress 数据库配置错误
  • 常规的红外工业镜头有哪些?能做什么?
  • 一文读懂分子结合位点的预测:为双荧光素酶实验铺路
  • SM4密码核心知识点
  • 当代社会情绪分类及其改善方向深度解析
  • Python 求圆柱体的周长(Find the perimeter of a cylinder)
  • 攻防世界-Web-unseping
  • Python 第十三节 Python中各种输入输出方案详解及注意事项
  • 优秀的网站设计分析西电信息化建设处网站
  • 网页设计第6次课后作业
  • 算法---双指针一
  • ubuntu2404系统安装nocobase的方法
  • FFmpeg 播放播放 HTTP网络流读取数据过程分析
  • 使用Spring Boot构建系统安全层
  • 项目1:高分辨率(1920 * 1080)编码码流推送流媒体讲解