Python 综合运用:MD 转 DOCX 工具
下面是一个使用 Python 和 PyQt5 实现的 Markdown 转 DOCX 工具,具有美观的图形界面,支持表格和代码块转换,并提供转换预览功能。
功能特点
- 一比一复刻 Markdown 格式到 DOCX
- 支持表格、代码块等复杂元素转换
- 美观的图形界面
- 转换过程实时预览
- 文件选择对话框
代码实现
import sys
import os
from markdown import markdown
from docx import Document
from docx.shared import Pt, RGBColor, Inches
from docx.oxml.ns import qn
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from PyQt5.QtWidgets import (QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QPushButton, QFileDialog, QTextEdit, QLabel, QWidget, QMessageBox, QProgressBar)
from PyQt5.QtCore import Qt
from PyQt5.QtGui import QFont, QIconclass MarkdownToDocxConverter(QMainWindow):def __init__(self):super().__init__()self.initUI()self.setWindowIcon(QIcon('icon.png')) # 请准备一个图标文件或删除这行def initUI(self):self.setWindowTitle('Markdown 转 DOCX 工具')self.setGeometry(300, 300, 800, 600)# 主窗口部件main_widget = QWidget()self.setCentralWidget(main_widget)# 主布局main_layout = QVBoxLayout()main_widget.setLayout(main_layout)# 标题title_label = QLabel('Markdown 转 DOCX 转换器')title_label.setFont(QFont('Microsoft YaHei', 16, QFont.Bold))title_label.setAlignment(Qt.AlignCenter)title_label.setStyleSheet('color: #2c3e50; margin-bottom: 20px;')main_layout.addWidget(title_label)# 文件选择区域file_layout = QHBoxLayout()self.md_file_label = QLabel('未选择文件')self.md_file_label.setFont(QFont('Microsoft YaHei', 10))self.md_file_label.setStyleSheet('border: 1px solid #ddd; padding: 5px;')self.md_file_label.setFixedHeight(30)select_btn = QPushButton('选择 Markdown 文件')select_btn.setFont(QFont('Microsoft YaHei', 10))select_btn.setStyleSheet('''QPushButton {background-color: #3498db;color: white;border: none;padding: 8px 15px;border-radius: 4px;}QPushButton:hover {background-color: #2980b9;}''')select_btn.clicked.connect(self.select_md_file)file_layout.addWidget(self.md_file_label, stretch=4)file_layout.addWidget(select_btn, stretch=1)main_layout.addLayout(file_layout)# 预览区域preview_label = QLabel('预览内容:')preview_label.setFont(QFont('Microsoft YaHei', 10, QFont.Bold))main_layout.addWidget(preview_label)self.preview_text = QTextEdit()self.preview_text.setFont(QFont('Consolas', 10))self.preview_text.setReadOnly(True)self.preview_text.setStyleSheet('''QTextEdit {border: 1px solid #ddd;padding: 10px;background-color: #f9f9f9;}''')main_layout.addWidget(self.preview_text, stretch=3)# 进度条self.progress_bar = QProgressBar()self.progress_bar.setRange(0, 100)self.progress_bar.setValue(0)self.progress_bar.setTextVisible(True)self.progress_bar.setStyleSheet('''QProgressBar {border: 1px solid #ddd;border-radius: 3px;text-align: center;height: 20px;}QProgressBar::chunk {background-color: #2ecc71;width: 10px;}''')main_layout.addWidget(self.progress_bar)# 转换按钮convert_btn = QPushButton('转换为 DOCX')convert_btn.setFont(QFont('Microsoft YaHei', 12, QFont.Bold))convert_btn.setStyleSheet('''QPushButton {background-color: #2ecc71;color: white;border: none;padding: 10px 20px;border-radius: 4px;margin-top: 15px;}QPushButton:hover {background-color: #27ae60;}QPushButton:disabled {background-color: #95a5a6;}''')convert_btn.clicked.connect(self.convert_to_docx)convert_btn.setEnabled(False)self.convert_btn = convert_btnmain_layout.addWidget(convert_btn, alignment=Qt.AlignCenter)# 状态栏self.statusBar().showMessage('准备就绪')# 成员变量self.md_file_path = ''def select_md_file(self):options = QFileDialog.Options()file_path, _ = QFileDialog.getOpenFileName(self, "选择 Markdown 文件", "", "Markdown Files (*.md *.markdown);;All Files (*)", options=options)if file_path:self.md_file_path = file_pathself.md_file_label.setText(file_path)self.convert_btn.setEnabled(True)# 预览文件内容try:with open(file_path, 'r', encoding='utf-8') as f:content = f.read()self.preview_text.setPlainText(content)self.statusBar().showMessage('文件加载成功')except Exception as e:QMessageBox.warning(self, '错误', f'无法读取文件: {str(e)}')self.statusBar().showMessage('文件读取失败')def convert_to_docx(self):if not self.md_file_path:QMessageBox.warning(self, '警告', '请先选择 Markdown 文件')return# 设置保存路径options = QFileDialog.Options()save_path, _ = QFileDialog.getSaveFileName(self, "保存 DOCX 文件", os.path.splitext(self.md_file_path)[0] + '.docx', "Word Documents (*.docx);;All Files (*)", options=options)if not save_path:returnself.progress_bar.setValue(10)self.statusBar().showMessage('正在转换...')QApplication.processEvents() # 更新UItry:# 读取Markdown内容with open(self.md_file_path, 'r', encoding='utf-8') as f:md_content = f.read()self.progress_bar.setValue(30)# 创建Word文档doc = Document()# 设置默认字体doc.styles['Normal'].font.name = '微软雅黑'doc.styles['Normal']._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')doc.styles['Normal'].font.size = Pt(10.5)# 转换Markdown为HTMLhtml_content = markdown(md_content, extensions=['extra', # 支持表格、代码块等'codehilite', # 代码高亮'tables', # 表格支持'fenced_code' # 围栏代码块])self.progress_bar.setValue(50)# 将HTML内容添加到Word文档self.add_html_to_doc(html_content, doc)self.progress_bar.setValue(80)# 保存文档doc.save(save_path)self.progress_bar.setValue(100)self.statusBar().showMessage('转换完成!')QMessageBox.information(self, '成功', '文件转换完成!')except Exception as e:QMessageBox.critical(self, '错误', f'转换过程中出错: {str(e)}')self.statusBar().showMessage('转换失败')finally:self.progress_bar.setValue(0)def add_html_to_doc(self, html, doc):from bs4 import BeautifulSoupsoup = BeautifulSoup(html, 'html.parser')for element in soup.children:if element.name == 'h1':self.add_heading(doc, element.text, 0)elif element.name == 'h2':self.add_heading(doc, element.text, 1)elif element.name == 'h3':self.add_heading(doc, element.text, 2)elif element.name == 'h4':self.add_heading(doc, element.text, 3)elif element.name == 'h5':self.add_heading(doc, element.text, 4)elif element.name == 'h6':self.add_heading(doc, element.text, 5)elif element.name == 'p':self.add_paragraph(doc, element.text)elif element.name == 'ul':self.add_list(doc, element, False)elif element.name == 'ol':self.add_list(doc, element, True)elif element.name == 'table':self.add_table(doc, element)elif element.name == 'pre':self.add_code_block(doc, element)elif element.name == 'blockquote':self.add_quote(doc, element)elif element.name == 'hr':self.add_horizontal_rule(doc)def add_heading(self, doc, text, level):heading = doc.add_heading(text, level)# 设置中文字体for run in heading.runs:run.font.name = '微软雅黑'run._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')def add_paragraph(self, doc, text):p = doc.add_paragraph(text)# 设置中文字体for run in p.runs:run.font.name = '微软雅黑'run._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')def add_list(self, doc, element, ordered):for li in element.find_all('li', recursive=False):if ordered:doc.add_paragraph(li.text, style='List Number')else:doc.add_paragraph(li.text, style='List Bullet')# 递归处理子列表for child in li.children:if child.name in ['ul', 'ol']:self.add_list(doc, child, child.name == 'ol')def add_table(self, doc, element):rows = element.find_all('tr')if not rows:return# 创建表格table = doc.add_table(rows=len(rows), cols=len(rows[0].find_all(['th', 'td'])))table.style = 'Table Grid' # 添加边框for i, row in enumerate(rows):cells = row.find_all(['th', 'td'])for j, cell in enumerate(cells):table.cell(i, j).text = cell.get_text()# 设置中文字体for paragraph in table.cell(i, j).paragraphs:for run in paragraph.runs:run.font.name = '微软雅黑'run._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')# 表头加粗if cell.name == 'th':for paragraph in table.cell(i, j).paragraphs:for run in paragraph.runs:run.font.bold = Truedef add_code_block(self, doc, element):code = element.find('code')if not code:returncode_text = code.get_text()# 添加代码段落p = doc.add_paragraph()p.paragraph_format.left_indent = Inches(0.5)p.paragraph_format.space_before = Pt(6)p.paragraph_format.space_after = Pt(6)run = p.add_run(code_text)run.font.name = 'Consolas'run.font.size = Pt(10)run.font.color.rgb = RGBColor(0x36, 0x36, 0x36)# 添加灰色背景shading_elm = p._element.get_or_add_pPr().get_or_add_shd()shading_elm.set(qn('w:fill'), 'F0F0F0')def add_quote(self, doc, element):p = doc.add_paragraph()p.paragraph_format.left_indent = Inches(0.5)p.paragraph_format.first_line_indent = Inches(-0.25)p.paragraph_format.space_before = Pt(6)p.paragraph_format.space_after = Pt(6)run = p.add_run(element.get_text())run.font.name = '微软雅黑'run._element.rPr.rFonts.set(qn('w:eastAsia'), '微软雅黑')run.font.italic = Truerun.font.color.rgb = RGBColor(0x66, 0x66, 0x66)# 添加左边框p._element.get_or_add_pPr().get_or_add_pBdr().left.val = 'single'p._element.get_or_add_pPr().get_or_add_pBdr().left.sz = 4p._element.get_or_add_pPr().get_or_add_pBdr().left.color = 'auto'def add_horizontal_rule(self, doc):p = doc.add_paragraph()p.paragraph_format.alignment = WD_PARAGRAPH_ALIGNMENT.CENTERrun = p.add_run('―' * 30) # 使用长破折号作为分隔线run.font.color.rgb = RGBColor(0xCC, 0xCC, 0xCC)if __name__ == '__main__':app = QApplication(sys.argv)# 设置应用程序字体font = QFont('Microsoft YaHei', 10)app.setFont(font)# 设置样式表app.setStyleSheet('''QMainWindow {background-color: #f5f7fa;}QLabel {color: #34495e;}''')converter = MarkdownToDocxConverter()converter.show()sys.exit(app.exec_())
使用说明
- 运行程序后,点击"选择 Markdown 文件"按钮选择要转换的.md文件
- 文件内容将显示在预览框中
- 点击"转换为 DOCX"按钮选择保存位置并开始转换
- 转换过程中会显示进度条和状态信息
- 转换完成后会弹出提示框
依赖安装
在运行此程序前,需要安装以下依赖:
pip install PyQt5 markdown python-docx beautifulsoup4
功能扩展建议
- 可以添加批量转换功能
- 可以增加对更多Markdown扩展语法的支持
- 可以添加主题切换功能
- 可以增加转换历史记录功能
这个工具提供了美观的界面和完整的Markdown到DOCX转换功能,支持表格、代码块等复杂元素的转换,并提供了实时预览和进度显示。