当前位置: 首页 > news >正文

PDFium导出pdf 图像

✅ 修正版:支持递归提取表单中的图片对象

我在你的基础上只增加了一个 递归遍历函数,不影响你现有逻辑和日志输出。


#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>
#include <iomanip>
#include <filesystem>#include "fpdfview.h"
#include "fpdf_edit.h"
#include "fpdf_save.h"// stb_image_write 实现 JPEG 保存
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"bool convert_to_rgb(FPDF_BITMAP bitmap, std::vector<unsigned char>& rgb_data) {int format = FPDFBitmap_GetFormat(bitmap);int width = FPDFBitmap_GetWidth(bitmap);int height = FPDFBitmap_GetHeight(bitmap);int stride = FPDFBitmap_GetStride(bitmap);unsigned char* buffer = (unsigned char*)FPDFBitmap_GetBuffer(bitmap);rgb_data.resize(width * height * 3);switch (format) {case FPDFBitmap_Gray:for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {unsigned char gray = src[x];rgb_data[(y * width + x) * 3 + 0] = gray;rgb_data[(y * width + x) * 3 + 1] = gray;rgb_data[(y * width + x) * 3 + 2] = gray;}}break;case FPDFBitmap_BGR:for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {rgb_data[(y * width + x) * 3 + 0] = src[x * 3 + 2];rgb_data[(y * width + x) * 3 + 1] = src[x * 3 + 1];rgb_data[(y * width + x) * 3 + 2] = src[x * 3 + 0];}}break;case FPDFBitmap_BGRx:case FPDFBitmap_BGRA:case FPDFBitmap_BGRA_Premul:for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {rgb_data[(y * width + x) * 3 + 0] = src[x * 4 + 2];rgb_data[(y * width + x) * 3 + 1] = src[x * 4 + 1];rgb_data[(y * width + x) * 3 + 2] = src[x * 4 + 0];}}break;default:std::cerr << "❌ Unsupported bitmap format: " << format << std::endl;return false;}return true;
}std::string format_to_string(int format) {switch (format) {case FPDFBitmap_Unknown: return "Unknown";case FPDFBitmap_Gray: return "Gray";case FPDFBitmap_BGR: return "BGR";case FPDFBitmap_BGRx: return "BGRx";case FPDFBitmap_BGRA: return "BGRA";case FPDFBitmap_BGRA_Premul: return "BGRA_Premul";default: return "Unknown(" + std::to_string(format) + ")";}
}// 🧩 递归提取函数,支持 Form 对象中的图片
void ExtractImagesFromObject(FPDF_DOCUMENT doc, FPDF_PAGE page, FPDF_PAGEOBJECT obj,const std::string& output_dir, int page_index, int& image_counter) {if (!obj) return;int type = FPDFPageObj_GetType(obj);// 如果是 Image 类型,导出if (type == FPDF_PAGEOBJ_IMAGE) {unsigned int logical_w = 0, logical_h = 0;FPDFImageObj_GetImagePixelSize(obj, &logical_w, &logical_h);FPDF_BITMAP bitmap = FPDFImageObj_GetRenderedBitmap(doc, page, obj);if (!bitmap) return;int bmp_w = FPDFBitmap_GetWidth(bitmap);int bmp_h = FPDFBitmap_GetHeight(bitmap);int bmp_format = FPDFBitmap_GetFormat(bitmap);std::cout << "🖼️  Image " << image_counter<< ": logical(" << logical_w << "x" << logical_h<< "), bitmap(" << bmp_w << "x" << bmp_h<< "), format=" << format_to_string(bmp_format) << std::endl;if (bmp_w <= 0 || bmp_h <= 0) {std::cerr << "⚠️  Invalid bitmap size, skipping.\n";FPDFBitmap_Destroy(bitmap);return;}std::vector<unsigned char> rgb_data;if (!convert_to_rgb(bitmap, rgb_data)) {FPDFBitmap_Destroy(bitmap);return;}// 输出路径std::ostringstream oss;oss << output_dir << "/page_" << std::setw(2) << std::setfill('0') << page_index<< "_img_" << std::setw(3) << std::setfill('0') << image_counter << ".jpg";std::string output_path = oss.str();if (stbi_write_jpg(output_path.c_str(), bmp_w, bmp_h, 3, rgb_data.data(), 90)) {std::cout << "✅ Saved: " << output_path << std::endl;} else {std::cerr << "❌ Failed to write JPEG: " << output_path << std::endl;}image_counter++;FPDFBitmap_Destroy(bitmap);}// 如果是 Form 对象,递归进入if (type == FPDF_PAGEOBJ_FORM) {int sub_count = FPDFFormObj_CountObjects(obj);for (int k = 0; k < sub_count; ++k) {FPDF_PAGEOBJECT sub_obj = FPDFFormObj_GetObject(obj, k);ExtractImagesFromObject(doc, page, sub_obj, output_dir, page_index, image_counter);}}
}int main() {FPDF_InitLibrary();const std::string input_pdf = "D:/BugPdf/bug.pdf";const std::string output_dir = "D:/image_out";FPDF_DOCUMENT document = FPDF_LoadDocument(input_pdf.c_str(), nullptr);if (!document) {unsigned long error = FPDF_GetLastError();std::cerr << "❌ Failed to load PDF document: " << input_pdf<< " (Error: " << error << ")" << std::endl;FPDF_DestroyLibrary();return -1;}int page_count = FPDF_GetPageCount(document);std::cout << "📄 Document has " << page_count << " pages." << std::endl;int total_images_exported = 0;for (int i = 0; i < page_count; i++) {FPDF_PAGE page = FPDF_LoadPage(document, i);if (!page) continue;int obj_count = FPDFPage_CountObjects(page);std::cout << "📄 Page " << i << " has " << obj_count << " objects." << std::endl;for (int j = 0; j < obj_count; j++) {FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, j);ExtractImagesFromObject(document, page, obj, output_dir, i, total_images_exported);}FPDF_ClosePage(page);}FPDF_CloseDocument(document);FPDF_DestroyLibrary();std::cout << "\n🎉 Done! Exported " << total_images_exported << " images.\n";return 0;
}

✅ 新功能说明

功能说明
🧩 支持递归 Form 对象自动深入表单对象中的嵌套图像
🖼️ 图像计数全局递增输出的文件名连续编号,不会覆盖
💪 完全兼容旧 PDFium 接口不需要修改库,只调用公开 API
🚫 不崩溃防护保留异常位图尺寸仍会安全跳过

自动检测透明度的版本

完美 👍 那我来给你一个最终升级版:
✅ 支持 递归提取表单中的图像
✅ 自动判断 透明通道 → PNG不透明 → JPEG
✅ 保留安全检查与日志输出
✅ 命名连续编号,防止覆盖


💎 最终版本:自动透明检测输出(PNG/JPEG)

#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <sstream>
#include <iomanip>
#include <filesystem>#include "fpdfview.h"
#include "fpdf_edit.h"
#include "fpdf_save.h"// ---------------- STB Image Write ----------------
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"// 创建目录
bool create_directory(const std::string& path) {try {return std::filesystem::create_directories(path);} catch (const std::exception& e) {std::cerr << "❌ Failed to create directory: " << path<< " - " << e.what() << std::endl;return false;}
}// 将 PDFium 位图转换为 RGB(A)
bool convert_to_rgb(FPDF_BITMAP bitmap, std::vector<unsigned char>& out_data, bool& has_alpha) {int format = FPDFBitmap_GetFormat(bitmap);int width = FPDFBitmap_GetWidth(bitmap);int height = FPDFBitmap_GetHeight(bitmap);int stride = FPDFBitmap_GetStride(bitmap);unsigned char* buffer = (unsigned char*)FPDFBitmap_GetBuffer(bitmap);has_alpha = false;switch (format) {case FPDFBitmap_Gray:out_data.resize(width * height * 3);for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {unsigned char gray = src[x];out_data[(y * width + x) * 3 + 0] = gray;out_data[(y * width + x) * 3 + 1] = gray;out_data[(y * width + x) * 3 + 2] = gray;}}break;case FPDFBitmap_BGR:out_data.resize(width * height * 3);for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {out_data[(y * width + x) * 3 + 0] = src[x * 3 + 2];out_data[(y * width + x) * 3 + 1] = src[x * 3 + 1];out_data[(y * width + x) * 3 + 2] = src[x * 3 + 0];}}break;case FPDFBitmap_BGRx:out_data.resize(width * height * 3);for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {out_data[(y * width + x) * 3 + 0] = src[x * 4 + 2];out_data[(y * width + x) * 3 + 1] = src[x * 4 + 1];out_data[(y * width + x) * 3 + 2] = src[x * 4 + 0];}}break;case FPDFBitmap_BGRA:case FPDFBitmap_BGRA_Premul:has_alpha = true;out_data.resize(width * height * 4);for (int y = 0; y < height; ++y) {unsigned char* src = buffer + y * stride;for (int x = 0; x < width; ++x) {out_data[(y * width + x) * 4 + 0] = src[x * 4 + 2]; // Rout_data[(y * width + x) * 4 + 1] = src[x * 4 + 1]; // Gout_data[(y * width + x) * 4 + 2] = src[x * 4 + 0]; // Bout_data[(y * width + x) * 4 + 3] = src[x * 4 + 3]; // A}}break;default:std::cerr << "❌ Unsupported bitmap format: " << format << std::endl;return false;}return true;
}// 格式说明
std::string format_to_string(int format) {switch (format) {case FPDFBitmap_Unknown: return "Unknown";case FPDFBitmap_Gray: return "Gray";case FPDFBitmap_BGR: return "BGR";case FPDFBitmap_BGRx: return "BGRx";case FPDFBitmap_BGRA: return "BGRA";case FPDFBitmap_BGRA_Premul: return "BGRA_Premul";default: return "Unknown(" + std::to_string(format) + ")";}
}// 递归提取图片(支持 Form)
void ExtractImagesFromObject(FPDF_DOCUMENT doc, FPDF_PAGE page, FPDF_PAGEOBJECT obj,const std::string& output_dir, int page_index, int& image_counter) {if (!obj) return;int type = FPDFPageObj_GetType(obj);// Image 对象if (type == FPDF_PAGEOBJ_IMAGE) {unsigned int logical_w = 0, logical_h = 0;FPDFImageObj_GetImagePixelSize(obj, &logical_w, &logical_h);FPDF_BITMAP bitmap = FPDFImageObj_GetRenderedBitmap(doc, page, obj);if (!bitmap) return;int bmp_w = FPDFBitmap_GetWidth(bitmap);int bmp_h = FPDFBitmap_GetHeight(bitmap);int bmp_format = FPDFBitmap_GetFormat(bitmap);std::cout << "🖼️  Image " << image_counter<< ": logical(" << logical_w << "x" << logical_h<< "), bitmap(" << bmp_w << "x" << bmp_h<< "), format=" << format_to_string(bmp_format) << std::endl;if (bmp_w <= 0 || bmp_h <= 0) {std::cerr << "⚠️  Invalid bitmap size, skipping.\n";FPDFBitmap_Destroy(bitmap);return;}std::vector<unsigned char> img_data;bool has_alpha = false;if (!convert_to_rgb(bitmap, img_data, has_alpha)) {FPDFBitmap_Destroy(bitmap);return;}// 输出文件名std::ostringstream oss;oss << output_dir << "/page_" << std::setw(2) << std::setfill('0') << page_index<< "_img_" << std::setw(3) << std::setfill('0') << image_counter<< (has_alpha ? ".png" : ".jpg");std::string output_path = oss.str();bool success = false;if (has_alpha) {success = stbi_write_png(output_path.c_str(), bmp_w, bmp_h, 4, img_data.data(), bmp_w * 4);} else {success = stbi_write_jpg(output_path.c_str(), bmp_w, bmp_h, 3, img_data.data(), 90);}if (success)std::cout << "✅ Saved: " << output_path << std::endl;elsestd::cerr << "❌ Failed to write: " << output_path << std::endl;image_counter++;FPDFBitmap_Destroy(bitmap);}// Form 对象递归if (type == FPDF_PAGEOBJ_FORM) {int sub_count = FPDFFormObj_CountObjects(obj);for (int k = 0; k < sub_count; ++k) {FPDF_PAGEOBJECT sub_obj = FPDFFormObj_GetObject(obj, k);ExtractImagesFromObject(doc, page, sub_obj, output_dir, page_index, image_counter);}}
}int main() {FPDF_InitLibrary();const std::string input_pdf = "D:/BugPdf/bug.pdf";const std::string output_dir = "D:/image_out";create_directory(output_dir);FPDF_DOCUMENT document = FPDF_LoadDocument(input_pdf.c_str(), nullptr);if (!document) {unsigned long error = FPDF_GetLastError();std::cerr << "❌ Failed to load PDF document: " << input_pdf<< " (Error: " << error << ")" << std::endl;FPDF_DestroyLibrary();return -1;}int page_count = FPDF_GetPageCount(document);std::cout << "📄 Document has " << page_count << " pages.\n";int total_images = 0;for (int i = 0; i < page_count; i++) {FPDF_PAGE page = FPDF_LoadPage(document, i);if (!page) continue;int obj_count = FPDFPage_CountObjects(page);std::cout << "\n📄 Page " << i << " has " << obj_count << " objects.\n";for (int j = 0; j < obj_count; j++) {FPDF_PAGEOBJECT obj = FPDFPage_GetObject(page, j);ExtractImagesFromObject(document, page, obj, output_dir, i, total_images);}FPDF_ClosePage(page);}FPDF_CloseDocument(document);FPDF_DestroyLibrary();std::cout << "\n🎉 Done! Exported " << total_images << " images.\n";return 0;
}

✨ 改进要点

功能说明
🧩 递归 Form 支持可提取表单对象内的嵌套图片
🧠 透明通道检测BGRA / Premul → PNG
🪶 普通图片 → JPEG自动区分输出格式
🧱 安全检查所有宽高、缓冲区安全验证保留
💾 自动创建输出目录避免路径不存在报错
📸 连续命名page_XX_img_XXX 格式防覆盖

http://www.dtcms.com/a/499200.html

相关文章:

  • C++11标准 上 (万字解析)
  • Java基础语法—字面量、变量详解、存储数据原理
  • 手工视频制作网站移动网站建设初学视频教程
  • 【shell】每日shell练习(系统服务状态监控/系统性能瓶颈分析)
  • Swift 下标脚本
  • Spring Boot 3零基础教程,WEB 开发 默认页签图标 Favicon 笔记28
  • php 网站部署杭州企业自助建站系统
  • IntelliJ IDEA 2023中为 Spring Boot 项目添加注释模板
  • Java Web安全防护:SQL注入、XSS攻击的预防与处理
  • leetcode 912.排序数组
  • 个人网站可以做商城吗seo三人行网站
  • 第3讲:Go垃圾回收机制与性能优化
  • Mac 桌面动态壁纸软件|Live Wallpaper 4K Pro v19.7 安装包使用教程(附安装包)
  • 简易网站开发网站建设的各个环节
  • 用 Selenium 搞定动态网页:模拟点击、滚动、登录全流程
  • VBA数据结构抉择战:Dictionary与Collection谁才是效率王者?
  • macos虚拟机-演示篇三配置clover引导
  • 【小白笔记】岛屿的周长(Island Perimeter)
  • 【C# OOP 入门到精通】从基础概念到 MVC 实战(含 SOLID 原则与完整代码)
  • 安徽省建设厅官网南宁seo外包要求
  • 算法实现迭代4_冒泡排序
  • uploads-labs靶场通关(1)
  • 网站建设标准合同福州做网站的公司多少钱
  • 类转函数(Class to Function)
  • Java-153 深入浅出 MongoDB 全面的适用场景分析与选型指南 场景应用指南
  • Makefile 模式规则精讲:从 ​​%.o: %.c​​ 到静态模式规则的终极自动化
  • app免费下载网站地址进入产品做网站如何谁来维护价格
  • 网站开发客户流程 6个阶段自助贸易网
  • Java前缀和算法题目练习
  • 《Python 结构化模式匹配深度解析:从语法革新到实战应用》