libreoffice容器word转pdf
先说结论,市面上不花钱的,简单的效果好的就是这个种方式,在线测试下来不如命令转的效果好。AsposeWords和SpireDoc效果都不错,但是只有这个word转pdf感觉花3-5w不划算。
下载容器路径 https://docker.aityp.com/i/search?search=libreoffice
部署LibreOffice容器
使用Docker运行LibreOffice的无头模式(headless),提供文档转换服务:
#需要挂载输入输出路径和安装字体路径
docker run -d \
--name libreoffice1 \
-v /opt/libreoffice1/input:/app/input \
-v /opt/libreoffice1/output:/app/output \
-v /usr/share/fonts/:/usr/share/fonts/
-p 3000:3000 \
linuxserver/libreoffice:latest #online用的是 需要注意容器配置文件有个位置需要改成一下 要不然http访问不通docker run -t -d -p 9980:9980 -e "username=admin" -e "password=123456" --restart always --cap-add SYS_ADMIN libreofficeonline:telecom
此命令启动一个LibreOffice容器,监听8100端口,并将宿主机目录挂载到容器内以便文件交换。
Java调用REST API转换文档
若容器提供REST API(如libreserver/office-api),可通过Java的HTTP客户端发送请求:
package cn.zjtele.pubinfo.demo.api.controller;import org.apache.http.HttpEntity;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
import org.slf4j.MDC;import javax.net.ssl.SSLContext;
import java.io.File;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;import static com.sun.javafx.runtime.async.BackgroundExecutor.getExecutor;public class LibreOfficeOnlineMasterConverter {// 正确的API端点路径(根据您的服务器配置可能需要调整)private static final String LOOL_CONVERT_URL = "http://localhost:9980/lool/convert-to/pdf";// 如果需要忽略SSL证书验证static SSLContext sslContext;static {try {sslContext = SSLContexts.custom().loadTrustMaterial((chain, authType) -> true).build();} catch (NoSuchAlgorithmException e) {throw new RuntimeException(e);} catch (KeyManagementException e) {throw new RuntimeException(e);} catch (KeyStoreException e) {throw new RuntimeException(e);}}// 在类初始化时创建共享的HttpClientprivate static final CloseableHttpClient sharedHttpClient = HttpClients.custom().setSSLContext(sslContext).setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE).setMaxConnTotal(100) // 最大连接数.setMaxConnPerRoute(20) // 每个路由最大连接数.build();public static void printPoolStatus() {ThreadPoolExecutor executor = (ThreadPoolExecutor) getExecutor();System.out.println("活跃线程: " + executor.getActiveCount() +" / 队列任务: " + executor.getQueue().size());}public static boolean convertToPdf(String inputFile, String outputFile) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {MDC.put("traceId", UUID.randomUUID().toString().substring(0,8));System.out.println("开始处理文件: " + inputFile);// 如果需要忽略SSL证书验证
// SSLContext sslContext = SSLContexts.custom()
// .loadTrustMaterial((chain, authType) -> true)
// .build();// 修改convertToPdf方法中的httpClient获取方式
// CloseableHttpClient httpClient = sharedHttpClient;// 调整HttpClient配置,增加超时控制RequestConfig config = RequestConfig.custom().setConnectTimeout(5000) // 连接超时5秒.setSocketTimeout(30000) // 数据传输超时30秒.build();CloseableHttpClient httpClient = HttpClients.custom().setDefaultRequestConfig(config).setConnectionManager(new PoolingHttpClientConnectionManager()) // 使用连接池.build();try {// 1. 创建POST请求HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);// 2. 构建Multipart请求体(尝试不同字段名)MultipartEntityBuilder builder = MultipartEntityBuilder.create();builder.addBinaryBody("file", // 先尝试"file",如果失败再尝试"data"new File(inputFile),getContentType(inputFile),new File(inputFile).getName());// 3. 设置必要的头信息(master分支特定头)httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf"); // master分支特有httpPost.setHeader("Accept", "application/pdf");// 4. 添加其他可能的必要头httpPost.setHeader("User-Agent", "Java LibreOffice Converter");httpPost.setHeader("Cache-Control", "no-cache");httpPost.setEntity(builder.build());System.out.println("发送请求到: " + LOOL_CONVERT_URL);System.out.println("使用头信息: " + httpPost.getAllHeaders());// 5. 执行请求try (CloseableHttpResponse response = httpClient.execute(httpPost)) {int statusCode = response.getStatusLine().getStatusCode();HttpEntity entity = response.getEntity();System.out.println("响应状态: " + response.getStatusLine());System.out.println("响应头: " + response.getAllHeaders());if (statusCode == 200 && entity != null) {try (FileOutputStream fos = new FileOutputStream(outputFile)) {entity.writeTo(fos);}return true;} else {String responseBody = entity != null ?EntityUtils.toString(entity, StandardCharsets.UTF_8) : "无响应体";System.err.println("转换失败. 状态码: " + statusCode);System.err.println("响应体: " + responseBody);// 如果400错误,尝试使用"data"作为字段名if (statusCode == 400) {System.out.println("尝试使用'data'作为字段名重试...");return retryWithDataField(inputFile, outputFile);}}}} catch (Exception e) {System.err.println("转换过程中发生错误: " + e.getMessage());e.printStackTrace();} finally {try {httpClient.close();} catch (Exception e) {System.err.println("关闭HTTP客户端时出错: " + e.getMessage());}}return false;}/*** 使用"data"作为字段名重试*/private static boolean retryWithDataField(String inputFile, String outputFile) {CloseableHttpClient httpClient = HttpClients.createDefault();try {HttpPost httpPost = new HttpPost(LOOL_CONVERT_URL);MultipartEntityBuilder builder = MultipartEntityBuilder.create();builder.addBinaryBody("data", // 使用"data"作为字段名new File(inputFile),getContentType(inputFile),new File(inputFile).getName());// 设置相同的头信息httpPost.setHeader("X-WOPI-Override", "CONVERT_TO");httpPost.setHeader("X-WOPI-FileExtension", getFileExtension(inputFile));httpPost.setHeader("X-WOPI-SuggestedTarget", getOutputFilename(outputFile));httpPost.setHeader("X-LOOL-WOPI-ConvertTo", "pdf");httpPost.setHeader("Accept", "application/pdf");httpPost.setEntity(builder.build());try (CloseableHttpResponse response = httpClient.execute(httpPost)) {if (response.getStatusLine().getStatusCode() == 200) {try (FileOutputStream fos = new FileOutputStream(outputFile)) {response.getEntity().writeTo(fos);}return true;}}} catch (Exception e) {System.err.println("重试失败: " + e.getMessage());}return false;}// 新增异步转换方法public static Future<Boolean> convertToPdfAsync(String inputFile, String outputFile) {return ConverterThreadPool.getExecutor().submit(() -> {try {return convertToPdf(inputFile, outputFile);} catch (Exception e) {System.err.println("异步任务执行异常: " + e.getMessage());return false;}});}// 新增批量处理方法public static Map<String, Future<Boolean>> batchConvert(Map<String, String> filePairs) {Map<String, Future<Boolean>> results = new ConcurrentHashMap<>();filePairs.forEach((input, output) ->results.put(input, convertToPdfAsync(input, output)));return results;}/*** 获取正确的内容类型*/private static ContentType getContentType(String filePath) {String ext = getFileExtension(filePath).toLowerCase();switch (ext) {case "docx": return ContentType.create("application/vnd.openxmlformats-officedocument.wordprocessingml.document");case "doc": return ContentType.create("application/msword");case "odt": return ContentType.create("application/vnd.oasis.opendocument.text");default: return ContentType.APPLICATION_OCTET_STREAM;}}private static String getFileExtension(String filePath) {int lastDotIndex = filePath.lastIndexOf('.');return lastDotIndex > 0 ? filePath.substring(lastDotIndex + 1) : "";}private static String getOutputFilename(String filePath) {return new File(filePath).getName();}public static void main(String[] args) throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {String inputFile = "C:\\Users\\sheng\\Desktop\\chongqing.docx";String outputFile = "C:\\Users\\sheng\\Desktop\\chongqing.pdf";System.out.println("开始转换: " + inputFile + " → " + outputFile);boolean b = convertToPdf(inputFile, outputFile);System.out.println("转换结果: " + b);}
}
通过命令行调用容器内工具
若容器仅包含LibreOffice命令行工具,可通过Java执行Docker命令完成转换:
package cn.zjtele.pubinfo.demo.wordtopdf;import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;public class LibreOfficeConverter {private static final String INPUT_DIR = "D:/docker/input"; // 本地输入目录private static final String OUTPUT_DIR = "D:/docker/output"; // 本地输出目录public static void main(String[] args) {
// if (args.length == 0) {
// System.out.println("请提供要转换的Word文件名(例如:example.docx)");
// return;
// }long l = System.currentTimeMillis();String fileName = "11.docx";Path inputFilePath = Paths.get(INPUT_DIR, fileName);File inputFile = inputFilePath.toFile();if (!inputFile.exists()) {System.out.println("文件不存在:" + inputFilePath);return;}try {// 确保输出目录存在Files.createDirectories(Paths.get(OUTPUT_DIR));// 构造输出文件路径String outputFileName = fileName.replace(".docx", ".pdf");Path outputFilePath = Paths.get(OUTPUT_DIR, outputFileName);// 调用 LibreOffice 容器进行转换convertFileUsingLibreOffice(inputFile.getAbsolutePath(), outputFilePath.toString());System.out.println("文件转换成功!PDF文件已保存到:" + outputFilePath);System.out.println("转换耗时:" + (System.currentTimeMillis() - l) + "ms");} catch (Exception e) {e.printStackTrace();System.out.println("文件转换失败!");}}private static void convertFileUsingLibreOffice(String inputFilePath, String outputFilePath) throws IOException, InterruptedException {// 使用 LibreOffice 容器命令进行转换String command = String.format(
// "docker exec -i another_linuxserver-libreoffice libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",
// new File(inputFilePath).getName()"docker exec -i libreoffice767 libreoffice --headless --convert-to pdf --outdir /app/output /app/input/%s",new File(inputFilePath).getName());Process process = Runtime.getRuntime().exec(command);int exitCode = process.waitFor();if (exitCode != 0) {throw new RuntimeException("LibreOffice 转换失败,退出码:" + exitCode);}}
}
文件路径处理注意事项
确保Java应用有权限访问宿主机和容器的挂载目录。
输入/输出路径需使用容器内的映射路径(如/opt/documents)。
转换完成后从挂载目录提取PDF文件。