支持Word (doc/docx) 和 PDF 转成一张垂直拼接的长PNG图片
1.依赖
<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>5.2.5</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>5.2.5</version>
</dependency>
<dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>5.2.5</version>
</dependency>
<dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>3.0.5</version>
</dependency>
<dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-ReferenceImpl</artifactId><version>8.3.10</version></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-export-fo</artifactId><version>8.3.10</version></dependency><dependency><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId><version>2.3.1</version></dependency><dependency><groupId>org.glassfish.jaxb</groupId><artifactId>jaxb-runtime</artifactId><version>2.3.3</version></dependency><dependency><groupId>com.sun.activation</groupId><artifactId>javax.activation</artifactId><version>1.2.0</version></dependency>
2.代码
import lombok.extern.slf4j.Slf4j;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToFoConverter;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.w3c.dom.Document;import javax.imageio.ImageIO;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;@Slf4j
public class DocumentToImageUtil {public byte[] convertToStitchedImage(byte[] content, String fileType) throws IOException {byte[] pdfContent = content;if (fileType.toLowerCase().contains("doc") || fileType.toLowerCase().contains("docx")) {pdfContent = convertWordToPdf(content, fileType.toLowerCase());}return convertPdfToStitchedImage(pdfContent);}private byte[] convertWordToPdf(byte[] wordContent, String fileType) throws IOException {if (fileType.toLowerCase().contains("docx")) {try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent)) {WordprocessingMLPackage loadedPackage = WordprocessingMLPackage.load(bais);ByteArrayOutputStream baos = new ByteArrayOutputStream();Docx4J.toPDF(loadedPackage, baos);return baos.toByteArray();} catch (Exception e) {throw new IOException("DOCX转PDF失败", e);}} else if (fileType.toLowerCase().contains("doc")) {try (ByteArrayInputStream bais = new ByteArrayInputStream(wordContent);HWPFDocument document = new HWPFDocument(bais)) {Document foDoc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();WordToFoConverter converter = new WordToFoConverter(foDoc);converter.processDocument(document);ByteArrayOutputStream foBaos = new ByteArrayOutputStream();TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "xml"); serializer.transform(new DOMSource(foDoc), new StreamResult(foBaos));byte[] foBytes = foBaos.toByteArray();FopFactory fopFactory = FopFactory.newInstance(new File(".").toURI()); ByteArrayOutputStream pdfBaos = new ByteArrayOutputStream();Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, pdfBaos);Transformer transformer = tf.newTransformer(); Source src = new StreamSource(new ByteArrayInputStream(foBytes));Result res = new SAXResult(fop.getDefaultHandler());transformer.transform(src, res);return pdfBaos.toByteArray();} catch (Exception e) {throw new IOException("DOC转PDF失败", e);}} else {throw new IOException("不支持的文件类型: " + fileType);}}private byte[] convertPdfToStitchedImage(byte[] pdfContent) throws IOException {try (ByteArrayInputStream bais = new ByteArrayInputStream(pdfContent);RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(bais);PDDocument document = Loader.loadPDF(buffer)) {PDFRenderer renderer = new PDFRenderer(document);int numPages = document.getNumberOfPages();if (numPages == 0) {return null;}List<BufferedImage> pageImages = new ArrayList<>();int totalHeight = 0;int maxWidth = 0;for (int i = 0; i < numPages; i++) {BufferedImage pageImage = renderer.renderImageWithDPI(i, 150);pageImages.add(pageImage);totalHeight += pageImage.getHeight();maxWidth = Math.max(maxWidth, pageImage.getWidth());}BufferedImage stitchedImage = new BufferedImage(maxWidth, totalHeight, BufferedImage.TYPE_INT_RGB);Graphics2D g2d = stitchedImage.createGraphics();g2d.setBackground(java.awt.Color.WHITE);g2d.clearRect(0, 0, maxWidth, totalHeight);int yOffset = 0;for (BufferedImage pageImage : pageImages) {g2d.drawImage(pageImage, 0, yOffset, null);yOffset += pageImage.getHeight();}g2d.dispose();ByteArrayOutputStream baos = new ByteArrayOutputStream();ImageIO.write(stitchedImage, "png", baos);return baos.toByteArray();} catch (Exception e) {throw new IOException("PDF转图片失败", e);}}
}
3.如何使用
byte[] imageBytes = new DocumentToImageUtil().convertToStitchedImage(content, fileType.toLowerCase());