Java读取Excel图片技术详解:悬浮式与嵌入式图片的三种实现方案(支持WPS嵌入和Office Excel嵌入)
一、概要
在实际业务中,我们经常需要从Excel文件中提取数据,其中包括单元格数据和图片信息。本文将详细介绍三种读取Excel图片的技术方案,分别针对悬浮式图片、WPS嵌入式图片和Office嵌入式图片。
二、核心类说明
类名 | 功能描述 |
---|---|
ExcelController | 提供三个上传接口,分别对应三种图片读取方式 |
ExcelService | 抽象基类,包含通用解析逻辑和图片保存方法 |
ExcelServiceFloatImpl | 处理悬浮式图片的实现类 |
ExcelServiceImplantWpsImpl | 处理WPS嵌入式图片的实现类 |
ExcelServiceImplantOfficeImpl | 处理Office嵌入式图片的实现类 |
ExcelVO | 数据载体类,存储行号和单元格数据 |
ImagePosition | 图片位置信息类 |
ImageInfo | 图片信息辅助类 |
三、控制器接口
@RestController
@RequestMapping("/excel")
public class ExcelController {@Resourceprivate ExcelService excelServiceFloatImpl;@Resourceprivate ExcelService excelServiceImplantWpsImpl;@Resourceprivate ExcelService excelServiceImplantOfficeImpl;// 悬浮式图片读取接口@PostMapping("/upload/float")public ResponseEntity<List<ExcelVO>> uploadExcelFloat(@RequestParam("file") MultipartFile file)// WPS嵌入式图片读取接口@PostMapping("/upload/implant/wps")public ResponseEntity<List<ExcelVO>> uploadExcelImplantWps(@RequestParam("file") MultipartFile file)// Office嵌入式图片读取接口@PostMapping("/upload/implant/office")public ResponseEntity<List<ExcelVO>> uploadExcelImplantOffice(@RequestParam("file") MultipartFile file)
}
四、抽象父类
import com.central.common.exception.BusinessException;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.web.multipart.MultipartFile;import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;@Slf4j
public abstract class ExcelService {// 定义从第几行开始读取(从0开始计算,6则为第七行)public static final int START_ROW_INDEX = 6;/*** 解析上传的Excel文件** @param file 上传的Excel文件* @return 解析后的数据列表*/public List<ExcelVO> parseExcel(MultipartFile file) throws Exception {if (file == null || file.isEmpty()) {return Collections.emptyList();}// 校验文件扩展名String filename = file.getOriginalFilename();if (filename == null || !filename.toLowerCase().endsWith(".xlsx")) {throw new BusinessException("仅支持.xlsx格式文件");}if (file.getSize() > 55 * 1024 * 1024) {throw new BusinessException("文件大小不能超过55MB");}File tempFile = null;try {// 创建临时文件List<ExcelVO> result = new ArrayList<>();tempFile = File.createTempFile("excel_temp_", ".xlsx");file.transferTo(tempFile);try (OPCPackage pkg = OPCPackage.open(tempFile, PackageAccess.READ);XSSFWorkbook workbook = new XSSFWorkbook(pkg)) {XSSFSheet sheet = workbook.getSheetAt(0);// 读取单元格数据for (int i = START_ROW_INDEX; i <= sheet.getLastRowNum(); i++) {Row row = sheet.getRow(i);if (row == null) {continue;}ExcelVO vo = new ExcelVO();vo.setRowNum(i + 1);List<String> cellValues = new ArrayList<>();DataFormatter dataFormatter = new DataFormatter(Locale.CHINA);for (Cell cell : row) {cellValues.add(dataFormatter.formatCellValue(cell));}vo.setCellValues(cellValues);result.add(vo);}return readExcel(result, pkg, sheet);}} catch (Exception e) {log.error("解析失败", e);} finally {// 删除临时文件if (tempFile != null && tempFile.exists()) {boolean deleted = tempFile.delete();if (!deleted) {log.warn("临时文件删除失败: {}", tempFile.getAbsolutePath());}}}return null;}/*** 保存图片到本地** @param imageData 图片字节数组* @param rowNum 图片所在行号* @param colNum 图片所在列号*/String saveImageToLocal(byte[] imageData, int rowNum, int colNum, String extension) {try {Path saveDir = Paths.get("D:\\image");if (!Files.exists(saveDir)) {Files.createDirectories(saveDir);}String fileName = String.format("row_%d_col_%d_%s.%s",rowNum, colNum, UUID.randomUUID(), extension);Path filePath = saveDir.resolve(fileName);Files.write(filePath, imageData);return filePath.toString();} catch (IOException e) {log.error("保存图片到本地失败", e);return null;}}public abstract List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception;}
五、悬浮式图片读取
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.usermodel.*;
import org.springframework.stereotype.Service;import java.util.*;@Service
@Slf4j
public class ExcelServiceFloatImpl extends ExcelService {@Overridepublic List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {if (sheet.getDrawingPatriarch() == null) {return result;}// 读取图片for (XSSFShape shape : sheet.getDrawingPatriarch().getShapes()) {if (shape instanceof XSSFPicture) {XSSFPicture picture = (XSSFPicture) shape;if (picture.getAnchor() instanceof XSSFClientAnchor) {XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();int pictureRow = anchor.getRow1();if (pictureRow >= START_ROW_INDEX) {int resultIndex = pictureRow - START_ROW_INDEX;if (resultIndex < result.size()) {byte[] data = picture.getPictureData().getData();String filePath = saveImageToLocal(data,pictureRow + 1,anchor.getCol1() + 1,picture.getPictureData().suggestFileExtension());ExcelVO vo = result.get(resultIndex);ImagePosition imagePosition = new ImagePosition();imagePosition.setCol(anchor.getCol1() + 1);imagePosition.setRow(pictureRow + 1);imagePosition.setUrl(filePath);vo.setImagePositions(imagePosition);}}}}}return result;}// 获取悬浮图片private Map<Integer, ImagePosition> getFloatingPictures(XSSFSheet sheet) throws Exception {Map<Integer, ImagePosition> pictures = new HashMap<>();XSSFDrawing drawing = sheet.getDrawingPatriarch();if (drawing != null) {for (XSSFShape shape : drawing.getShapes()) {if (shape instanceof XSSFPicture) {XSSFPicture picture = (XSSFPicture) shape;if (picture.getAnchor() instanceof XSSFClientAnchor) {XSSFClientAnchor anchor = (XSSFClientAnchor) picture.getAnchor();int pictureRow = anchor.getRow1();if (pictureRow >= START_ROW_INDEX) {byte[] data = picture.getPictureData().getData();String filePath = saveImageToLocal(data,pictureRow + 1,anchor.getCol1() + 1,picture.getPictureData().suggestFileExtension());ImagePosition imagePosition = new ImagePosition();imagePosition.setCol(anchor.getCol1() + 1);imagePosition.setRow(pictureRow + 1);imagePosition.setUrl(filePath);pictures.put(anchor.getRow1() + 1, imagePosition);}}}}}return pictures;}}
六、Office Excel嵌入式图片读取
通过观察发现Office嵌入图片后,在内部 一个XML文件(xl\worksheets\sheet1.xml)中,嵌入图片对应的单元格标签(C 标签)会包含一个 “vm” 属性,该属性的取值恰好与图片名称(图片存放在xl\media中)中的递增数值一致。基于这一对应关系,通过查找单元格 C标签的 “vm” 属性值,即可匹配到对应的图片文件,从而完成嵌入式图片的读取。
Excel
xl\media
xl\worksheets\sheet1.xml
实现逻辑:
- 解析工作表XML(sheet1.xml),获取AB列单元格的vm属性(该属性与图片索引关联)和行号。
- 遍历Excel包中的媒体文件(位于/xl/media/),建立图片索引和图片数据的映射。
- 根据vm值匹配图片,将图片保存到本地,并设置到对应的行
存在问题
只用Office Excel编辑时,VM与图片属性对应不会存在问题。但用WPS与Office Excel混合嵌入不同格式的图片时可能会导致VM属性与Image图片数值对应不上,请注意!!!
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.springframework.stereotype.Service;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;@Service
@Slf4j
public class ExcelServiceImplantOfficeImpl extends ExcelService {private static final Pattern CELL_REF_PATTERN = Pattern.compile("AB(\\d+)");private static final Pattern IMAGE_PATH_PATTERN = Pattern.compile("/xl/media/image(\\d+)\\.(\\w+)");@Overridepublic List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {// 获取工作表XML内容PackagePart sheetPart = pkg.getPart(PackagingURIHelper.createPartName("/xl/worksheets/sheet1.xml"));InputStream sheetStream = sheetPart.getInputStream();// 解析工作表XML获取AB列单元格的vm属性Map<Integer, Integer> vmMap = parseSheetXML(sheetStream);sheetStream.close();// 获取所有图片并建立索引映射Map<Integer, ImageInfo> imageMap = getAllImages(pkg);//根据vm值匹配图片并保存for (Map.Entry<Integer, Integer> entry : vmMap.entrySet()) {int vm = entry.getKey();if (imageMap.containsKey(vm)) {//office 读到的是实际行号,list集合从0开始计算所以要减1int resultIndex = entry.getValue() - START_ROW_INDEX - 1;if (resultIndex < result.size()) {ImageInfo imageInfo = imageMap.get(vm);ImagePosition imagePosition = new ImagePosition();imagePosition.setCol(entry.getValue());imagePosition.setUrl(saveImageToLocal(imageInfo.getFileBytes(),entry.getValue(),28,imageInfo.getExtension()));ExcelVO vo = result.get(resultIndex);vo.setImagePositions(imagePosition);}}}return result;}// 解析工作表XML获取AB列单元格的vm属性 k:vm value:行号private Map<Integer, Integer> parseSheetXML(InputStream is) throws Exception {Map<Integer, Integer> vmMap = new HashMap<>();DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();DocumentBuilder builder = factory.newDocumentBuilder();Document doc = builder.parse(is);NodeList cells = doc.getElementsByTagName("c");for (int i = 0; i < cells.getLength(); i++) {Node cellNode = cells.item(i);if (cellNode.getNodeType() == Node.ELEMENT_NODE) {Element cellElement = (Element) cellNode;String cellRef = cellElement.getAttribute("r");// 只需要AB列且行号>=7Matcher matcher = CELL_REF_PATTERN.matcher(cellRef);if (matcher.matches()) {int rowNum = Integer.parseInt(matcher.group(1));if (rowNum >= 7) {String vmAttr = cellElement.getAttribute("vm");if (!vmAttr.isEmpty()) {int vm = Integer.parseInt(vmAttr);vmMap.put(vm, rowNum);}}}}}return vmMap;}// 获取所有图片并建立索引映射 k:图片索引 value :图片信息private Map<Integer, ImageInfo> getAllImages(OPCPackage pkg) throws Exception {Map<Integer, ImageInfo> imageMap = new HashMap<>();// 遍历所有文件for (PackagePart part : pkg.getParts()) {String partName = part.getPartName().toString();if (partName.startsWith("/xl/media/")) {Matcher matcher = IMAGE_PATH_PATTERN.matcher(partName);if (matcher.matches() && "png".equalsIgnoreCase(matcher.group(2))) {try (InputStream is = part.getInputStream();ByteArrayOutputStream bos = new ByteArrayOutputStream()) {byte[] buffer = new byte[4096];int bytesRead;while ((bytesRead = is.read(buffer)) != -1) {bos.write(buffer, 0, bytesRead);}ImageInfo imagePosition = new ImageInfo();imagePosition.setExtension(matcher.group(2).toLowerCase());imagePosition.setFileBytes(bos.toByteArray());imageMap.put(Integer.parseInt(matcher.group(1)), imagePosition);}}}}return imageMap;}}
七、WPS嵌入式图片读取
具体实现逻辑可参考其他人博客,以下代码参考该文章修改实现
实现类:
import com.alibaba.cloud.commons.lang.StringUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;
import org.springframework.stereotype.Service;import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.util.*;/*** @author mashb*/
@Service
@Slf4j
public class ExcelServiceImplantWpsImpl extends ExcelService {@Overridepublic List<ExcelVO> readExcel(List<ExcelVO> result, OPCPackage pkg, XSSFSheet sheet) throws Exception {//获取所有包文件List<PackagePart> parts = pkg.getParts();//获取每个工作表中的包文件Map<Integer, List<PackagePart>> picturePath = getEmbedPictures(parts);for (Integer key : picturePath.keySet()) {List<PackagePart> rows = picturePath.get(key);if (rows.size() < START_ROW_INDEX) {return result;}for (int i = START_ROW_INDEX; i < rows.size(); i++) {int resultIndex = i - START_ROW_INDEX;if (resultIndex < result.size()) {PackagePart part = rows.get(i);// 获取文件名String partName = part.getPartName().getName();String fileName = new File(partName).getName();String extension = fileName.substring(fileName.lastIndexOf('.') + 1);ExcelVO vo = result.get(resultIndex);ImagePosition imagePosition = new ImagePosition();imagePosition.setCol(28);imagePosition.setRow(i + 1);imagePosition.setUrl(saveImageToLocal(IOUtils.toByteArray(part.getInputStream()),i + 1,28,extension.toLowerCase()));vo.setImagePositions(imagePosition);}}}return result;}private static Map<Integer, List<PackagePart>> getEmbedPictures(List<PackagePart> parts) throws Exception {Map<String, Set<String>> mapImg = new HashMap<>();Map<String, String> mapImgPath = new HashMap<>();Map<Integer, List<String>> dataMap = new HashMap<>();for (PackagePart part : parts) {PackagePartName partName = part.getPartName();String name = partName.getName();if ("/xl/cellimages.xml".equals(name)) {SAXBuilder builder = new SAXBuilder();// 获取文档Document doc = builder.build(part.getInputStream());// 获取根节点Element root = doc.getRootElement();List<Element> cellImageList = root.getChildren();for (Element imgEle : cellImageList) {Element xdrPic = imgEle.getChildren().get(0);Element xdrNvPicPr = xdrPic.getChildren().get(0);Element xdrBlipFill = xdrPic.getChildren().get(1);Element aBlip = xdrBlipFill.getChildren().get(0);Attribute attr = aBlip.getAttributes().get(0);String imgId = xdrNvPicPr.getChildren().get(0).getAttributeValue("name");String id = attr.getValue();if (mapImg.containsKey(id)) {mapImg.get(id).add(imgId);} else {Set<String> set = new HashSet<>();set.add(imgId);mapImg.put(id, set);}}}if ("/xl/_rels/cellimages.xml.rels".equals(name)) {SAXBuilder builder = new SAXBuilder();// 获取文档Document doc = builder.build(part.getInputStream());// 获取根节点Element root = doc.getRootElement();List<Element> relationshipList = root.getChildren();for (Element relationship : relationshipList) {String id = relationship.getAttributeValue("Id");String target = relationship.getAttributeValue("Target");mapImgPath.put(id, target);}}if (name.contains("/xl/worksheets/sheet")) {// 获取文档String sheetNoStr = name.replace("/xl/worksheets/sheet", "").replace(".xml", "");Integer sheetNo = Integer.valueOf(sheetNoStr) - 1;// 步骤1:创建SAXParserFactory实例SAXParserFactory factory = SAXParserFactory.newInstance();// 步骤2:创建SAXParser实例SAXParser parser = factory.newSAXParser();MySAXParserHandler handler = new MySAXParserHandler();parser.parse(part.getInputStream(), handler);List<String> rows = handler.getRows();dataMap.put(sheetNo, rows);}}Map<String, String> imgMap = new HashMap<>();for (String id : mapImg.keySet()) {Set<String> imgIds = mapImg.get(id);String path = mapImgPath.get(id);for (String imgId : imgIds) {imgMap.put(imgId, path);}}for (Integer key : dataMap.keySet()) {List<String> rows = dataMap.get(key);for (int i = 0; i < rows.size(); i++) {String imgId = rows.get(i);if (imgMap.containsKey(imgId)) {rows.set(i, imgMap.get(imgId));}}}Map<Integer, List<PackagePart>> map = new HashMap<>();for (Integer key : dataMap.keySet()) {List<PackagePart> list = new ArrayList<>();map.put(key, list);List<String> pathList = dataMap.get(key);for (int i = 0; i < pathList.size(); i++) {list.add(i, null);String path = pathList.get(i);if (StringUtils.isNotEmpty(path)) {for (PackagePart part : parts) {PackagePartName partName = part.getPartName();String name = partName.getName();if (name.contains(path)) {list.set(i, part);break;}}}}}return map;}
}
SAX解析类
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;import java.util.ArrayList;
import java.util.List;@Data
@EqualsAndHashCode(callSuper = true)
public class MySAXParserHandler extends DefaultHandler {String value = null;List<String> rows = new ArrayList<>();int rowIndex = 0;/*** 用来标识解析开始*/@Overridepublic void startDocument() throws SAXException {super.startDocument();}/*** 用来标识解析结束*/@Overridepublic void endDocument() throws SAXException {super.endDocument();}/*** 解析xml元素*/@Overridepublic void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {// 调用DefaultHandler类的startElement方法super.startElement(uri, localName, qName, attributes);if (("row").equals(qName)) {value = "";}}@Overridepublic void endElement(String uri, String localName, String qName)throws SAXException {//调用DefaultHandler类的endElement方法super.endElement(uri, localName, qName);if (("row").equals(qName)) {if (value != null && value.contains("DISPIMG")) {value = value.substring(value.lastIndexOf("DISPIMG(")).replace("DISPIMG(\"", "");value = value.substring(0, value.indexOf("\""));rows.add(rowIndex, value);} else {rows.add(rowIndex, null);}rowIndex++;value = "";}}@Overridepublic void characters(char[] ch, int start, int length)throws SAXException {super.characters(ch, start, length);value += new String(ch, start, length);}
}
八、辅助类
ExcelVO
import lombok.Data;import java.util.List;
@Data
public class ExcelVO {// 行号private int rowNum;// 单元格值列表private List<String> cellValues;// 图片位置信息(可选)private ImagePosition imagePositions;
}
ImageInfo
import lombok.Data;@Data
public class ImageInfo {private String extension;private byte[] fileBytes;
}
ImagePosition
import lombok.Data;@Data
public class ImagePosition {private int row;private int col;private String url;
}