当前位置: 首页 > news >正文

c# openxml 打开加密 的word读取内容

using System;
using System.IO;
using System.Linq;
using System.Text;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Wordprocessing;/// <summary>
/// 使用OpenXML获取文档内容,替代Aspose方式
/// </summary>
/// <param name="path">文档路径</param>
/// <param name="password">密码</param>
/// <returns>文档内容字符串</returns>
public static string GetWordContentByOpenXml(string path, string password)
{try{using (var document = WordprocessingDocument.Open(path, false, new OpenSettings(){Password = password})){if (document.MainDocumentPart?.Document?.Body == null)return null;// 创建StringBuilder来存储文档主体内容var contentBuilder = new StringBuilder();// 获取文档主体,排除页眉页脚var body = document.MainDocumentPart.Document.Body;// 提取主文档内容(不包括页眉页脚)ExtractBodyContent(body, contentBuilder);// 获取原始内容string contentWithoutHeaderFooter = contentBuilder.ToString();// 应用内容清理和格式化string content = CleanContent(contentWithoutHeaderFooter);// 处理特定的截取逻辑int index = content.LastIndexOf("限公司第");if (index > 0){return content.Substring(0, index).Trim();}else{return content;}}}catch (Exception ex){LogManager.WriteError("GetWordContentByOpenXml()", ex.StackTrace?.ToString());return null;}
}/// <summary>
/// 提取文档主体内容,排除页眉页脚
/// </summary>
/// <param name="body">文档主体</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractBodyContent(Body body, StringBuilder contentBuilder)
{// 遍历文档主体中的所有元素foreach (var element in body.Elements()){ExtractElementContent(element, contentBuilder);}
}/// <summary>
/// 递归提取元素内容
/// </summary>
/// <param name="element">OpenXML元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractElementContent(OpenXmlElement element, StringBuilder contentBuilder)
{switch (element){case Paragraph paragraph:ExtractParagraphContent(paragraph, contentBuilder);contentBuilder.AppendLine(); // 段落后换行break;case Table table:ExtractTableContent(table, contentBuilder);break;case SectionProperties _:// 跳过节属性,这些通常包含页眉页脚引用break;default:// 递归处理其他容器元素foreach (var childElement in element.Elements()){ExtractElementContent(childElement, contentBuilder);}break;}
}/// <summary>
/// 提取段落内容
/// </summary>
/// <param name="paragraph">段落元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractParagraphContent(Paragraph paragraph, StringBuilder contentBuilder)
{foreach (var run in paragraph.Elements<Run>()){foreach (var text in run.Elements<Text>()){contentBuilder.Append(text.Text);}// 处理制表符foreach (var tab in run.Elements<TabChar>()){contentBuilder.Append("\t");}// 处理换行符foreach (var br in run.Elements<Break>()){contentBuilder.AppendLine();}}
}/// <summary>
/// 提取表格内容
/// </summary>
/// <param name="table">表格元素</param>
/// <param name="contentBuilder">内容构建器</param>
private static void ExtractTableContent(Table table, StringBuilder contentBuilder)
{foreach (var row in table.Elements<TableRow>()){foreach (var cell in row.Elements<TableCell>()){foreach (var paragraph in cell.Elements<Paragraph>()){ExtractParagraphContent(paragraph, contentBuilder);}contentBuilder.Append("\t"); // 单元格间用制表符分隔}contentBuilder.AppendLine(); // 表格行后换行}
}/// <summary>
/// 清理和格式化内容,模拟Aspose的清理功能
/// </summary>
/// <param name="content">原始内容</param>
/// <returns>清理后的内容</returns>
private static string CleanContent(string content)
{if (string.IsNullOrEmpty(content))return string.Empty;// 移除多余的空白字符(模拟Tool.TrimAll功能)content = System.Text.RegularExpressions.Regex.Replace(content, @"\s+", " ");content = content.Trim();// 移除多余的换行符content = System.Text.RegularExpressions.Regex.Replace(content, @"\n\s*\n", "\n");// 移除Aspose评估版本的水印文本(虽然OpenXML不会有,但保持兼容性)content = content.Replace("EvaluationOnly.CreatedwithAspose.Words.Copyright2003-2024AsposePtyLtd.", "");// 移除其他可能的控制字符content = System.Text.RegularExpressions.Regex.Replace(content, @"[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]", "");return content.Trim();
}/// <summary>
/// 检查文档是否需要密码
/// </summary>
/// <param name="path">文档路径</param>
/// <returns>是否需要密码</returns>
public static bool IsPasswordRequired(string path)
{try{using (var document = WordprocessingDocument.Open(path, false)){// 如果能正常打开,说明不需要密码return false;}}catch (OpenXmlPackageException ex){// 如果抛出密码相关异常,说明需要密码return ex.Message.Contains("password") || ex.Message.Contains("encrypted") || ex.Message.Contains("protected");}catch{// 其他异常可能也表示需要密码return true;}
}/// <summary>
/// 增强版本:支持更多文档处理选项
/// </summary>
/// <param name="path">文档路径</param>
/// <param name="password">密码</param>
/// <param name="includeHyperlinks">是否包含超链接文本</param>
/// <param name="includeFootnotes">是否包含脚注</param>
/// <returns>文档内容</returns>
public static string GetWordContentByOpenXmlAdvanced(string path, string password, bool includeHyperlinks = false, bool includeFootnotes = false)
{try{using (var document = WordprocessingDocument.Open(path, false, new OpenSettings(){Password = password})){if (document.MainDocumentPart?.Document?.Body == null)return null;var contentBuilder = new StringBuilder();var body = document.MainDocumentPart.Document.Body;// 提取主文档内容ExtractBodyContentAdvanced(body, contentBuilder, includeHyperlinks);// 如果需要包含脚注if (includeFootnotes && document.MainDocumentPart.FootnotesPart != null){ExtractFootnotesContent(document.MainDocumentPart.FootnotesPart, contentBuilder);}string contentWithoutHeaderFooter = contentBuilder.ToString();string content = CleanContent(contentWithoutHeaderFooter);// 应用特定的截取逻辑int index = content.LastIndexOf("公司第");if (index > 0){return content.Substring(0, index).Trim();}else{return content;}}}catch (Exception ex){LogManager.WriteError("GetWordContentByOpenXmlAdvanced()", ex.StackTrace?.ToString());return null;}
}/// <summary>
/// 高级内容提取,支持超链接等
/// </summary>
private static void ExtractBodyContentAdvanced(Body body, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var element in body.Elements()){if (element is Paragraph paragraph){ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks);contentBuilder.AppendLine();}else if (element is Table table){ExtractTableContentAdvanced(table, contentBuilder, includeHyperlinks);}else if (!(element is SectionProperties)){// 递归处理其他元素foreach (var childElement in element.Elements()){ExtractBodyContentAdvanced(new Body(childElement), contentBuilder, includeHyperlinks);}}}
}/// <summary>
/// 高级段落内容提取
/// </summary>
private static void ExtractParagraphContentAdvanced(Paragraph paragraph, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var element in paragraph.Elements()){if (element is Run run){foreach (var text in run.Elements<Text>()){contentBuilder.Append(text.Text);}}else if (element is Hyperlink hyperlink && includeHyperlinks){foreach (var run2 in hyperlink.Elements<Run>()){foreach (var text in run2.Elements<Text>()){contentBuilder.Append(text.Text);}}}}
}/// <summary>
/// 高级表格内容提取
/// </summary>
private static void ExtractTableContentAdvanced(Table table, StringBuilder contentBuilder, bool includeHyperlinks)
{foreach (var row in table.Elements<TableRow>()){foreach (var cell in row.Elements<TableCell>()){foreach (var paragraph in cell.Elements<Paragraph>()){ExtractParagraphContentAdvanced(paragraph, contentBuilder, includeHyperlinks);}contentBuilder.Append("\t");}contentBuilder.AppendLine();}
}/// <summary>
/// 提取脚注内容
/// </summary>
private static void ExtractFootnotesContent(FootnotesPart footnotesPart, StringBuilder contentBuilder)
{if (footnotesPart.Footnotes != null){contentBuilder.AppendLine("\n--- 脚注 ---");foreach (var footnote in footnotesPart.Footnotes.Elements<Footnote>()){foreach (var paragraph in footnote.Elements<Paragraph>()){ExtractParagraphContent(paragraph, contentBuilder);contentBuilder.AppendLine();}}}
}
http://www.dtcms.com/a/297709.html

相关文章:

  • (笔记)U-boot 2012.10 armv7启动汇编解析
  • C++STL系列之unordered_set和unordered_map
  • gig-gitignore工具实战开发(四):使用ai辅助生成gitignore
  • 开讲啦|MBSE公开课:第二集 MBSE远景设想
  • cocos creator 3.8.6 websocke的一直报错WebSocket is not a constructor
  • Logstash 多表增量同步 MySQL 到 Elasticsearch:支持逻辑删除与热加载,Docker 快速部署实战
  • vue项目创建流程
  • XML的简略知识点
  • PyCharm高效开发全攻略
  • RHCA - CL260 | Day01:Ceph 架构及环境介绍
  • leetcode102:二叉树的层序遍历(队列实现)
  • 栈----1.有效的括号
  • iOS WebView 调试实战,文件上传与权限弹窗异常的排查路径
  • 三维模型驱动下的光伏组件智能排列方案
  • 【深入底层】C++开发简历4+4技能描述6
  • 百特搭AI低代码平台助力企业国际化业务敏捷拓展
  • 《一种采用分布式多模态传感模块的身体尺度机器人皮肤的设计、评估与应用》论文解读
  • 【日志】unity俄罗斯方块——边界限制检测
  • 如何在离线电脑win11上安装VS2022
  • PI 思维升级 PI设计的典范转移:从阻抗思维到谐振控制
  • 在 Ansys CFX Pre 中配置 RGP 表的分步指南
  • C++中的反向迭代器
  • java面试题(中级题汇总)
  • k8s 中的 deployment,statefulset,daemonset 控制器的区别
  • 特征值与特征向量
  • 腾讯云CodeBuddy+微信小程序:5分钟开发番茄小闹钟
  • 电科金仓新一代数据库一体机:国产化方案替换优质选择
  • 本地内网IP映射到公网访问如何实现?内网端口映射外网工具有哪些?
  • python学习DAY22打卡
  • 深入解析Hadoop高频面试题:HDFS读/写流程的RPC调用链