项目一:Python实现PDF增删改查编辑保存功能的全栈解决方案
1、需求:
公司文档涉及PDF类型的比较多,需要PDF编辑内容,并且可批量定制化满足业务灵活自主的自动化处理功能。
2、产品设计
提供一个完整的全栈解决方案,包含前端界面和后端服务,实现PDF文档的增删改查(CRUD)和编辑保存功能。
-
完整的CRUD功能:
-
创建:上传PDF文件
-
读取:查看PDF列表和详情
-
更新:编辑PDF(旋转、水印、提取页面)
-
删除:删除PDF文件
-
-
用户认证:
-
注册、登录、注销功能
-
用户只能访问自己的PDF文件
-
-
PDF编辑功能:
-
旋转PDF页面
-
添加水印
-
提取特定页面
-
下载处理后的文件
-
-
响应式设计:
-
适配不同屏幕尺寸
-
现代化的用户界面
-
-
安全特性:
-
文件类型验证
-
文件安全存储
-
CSRF保护
-
3. 系统架构
pdf_manager/ ├── app/ # 应用主目录 │ ├── __init__.py # Flask应用工厂 │ ├── models.py # 数据库模型 │ ├── routes.py # 路由和视图函数 │ ├── utils/ # 实用工具 │ │ ├── pdf_utils.py # PDF处理工具 │ │ └── auth.py # 认证工具 │ ├── templates/ # HTML模板 │ │ ├── base.html # 基础模板 │ │ ├── index.html # 主页 │ │ ├── view.html # 查看PDF │ │ ├── edit.html # 编辑PDF │ │ └── auth/ # 认证相关模板 │ │ ├── login.html │ │ └── register.html │ └── static/ # 静态文件 │ ├── css/ │ │ └── main.css # 主样式 │ └── js/ │ ├── main.js # 主JavaScript │ └── pdf.js # PDF操作相关JS ├── config.py # 配置文件 ├── requirements.txt # 依赖文件 └── run.py # 启动脚本
2. 后端实现 (Flask)
2.1 配置文件 (config.py)
import os
from dotenv import load_dotenvload_dotenv()
class Config:
SECRET_KEY = os.getenv('SECRET_KEY', 'dev-key')
SQLALCHEMY_DATABASE_URI = os.getenv('DATABASE_URL', 'sqlite:///pdf_manager.db')
SQLALCHEMY_TRACK_MODIFICATIONS = False
UPLOAD_FOLDER = os.path.join(os.path.dirname(__file__), 'uploads')
PROCESSED_FOLDER = os.path.join(os.path.dirname(__file__), 'processed')
MAX_CONTENT_LENGTH = 16 * 1024 * 1024 # 16MB
ALLOWED_EXTENSIONS = {'pdf'}
2.2 PDF工具类 (app/utils/pdf_utils.py)
import os
from PyPDF2 import PdfFileReader, PdfFileWriter
from reportlab.pdfgen import canvas
import ioclass PDFEditor:
@staticmethod
def rotate_pdf(input_path, output_path, angle=90):
"""旋转PDF页面"""
pdf_reader = PdfFileReader(input_path)
pdf_writer = PdfFileWriter()
for page_num in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page_num)
page.rotateClockwise(angle)
pdf_writer.addPage(page)
with open(output_path, 'wb') as out_pdf:
pdf_writer.write(out_pdf)
return output_path@staticmethod
def add_watermark(input_path, output_path, watermark_text):
"""添加水印"""
pdf_reader = PdfFileReader(input_path)
pdf_writer = PdfFileWriter()
for page_num in range(pdf_reader.getNumPages()):
page = pdf_reader.getPage(page_num)
packet = io.BytesIO()
can = canvas.Canvas(packet, pagesize=page.mediaBox)
can.setFillColorRGB(0.8, 0.8, 0.8, alpha=0.3)
can.setFont("Helvetica", 50)
width = float(page.mediaBox.getWidth())
height = float(page.mediaBox.getHeight())
can.saveState()
can.translate(width/2, height/2)
can.rotate(45)
can.drawCentredString(0, 0, watermark_text)
can.restoreState()
can.save()
packet.seek(0)
watermark_pdf = PdfFileReader(packet)
watermark_page = watermark_pdf.getPage(0)
page.mergePage(watermark_page)
pdf_writer.addPage(page)
with open(output_path, 'wb') as out_pdf:
pdf_writer.write(out_pdf)
return output_path@staticmethod
def extract_pages(input_path, output_path, pages):
"""提取指定页面"""
pdf_reader = PdfFileReader(input_path)
pdf_writer = PdfFileWriter()
for page_num in pages:
if 0 <= page_num - 1 < pdf_reader.getNumPages():
pdf_writer.addPage(pdf_reader.getPage(page_num - 1))
with open(output_path, 'wb') as out_pdf:
pdf_writer.write(out_pdf)
return output_path@staticmethod
def merge_pdfs(pdf_paths, output_path):
"""合并多个PDF"""
pdf_writer = PdfFileWriter()
for path in pdf_paths:
pdf_reader = PdfFileReader(path)
for page_num in range(pdf_reader.getNumPages()):
pdf_writer.addPage(pdf_reader.getPage(page_num))
with open(output_path, 'wb') as out_pdf:
pdf_writer.write(out_pdf)
return output_path@staticmethod
def get_page_count(pdf_path):
"""获取PDF页数"""
with open(pdf_path, 'rb') as f:
pdf = PdfFileReader(f)
return pdf.getNumPages()
2.3 数据库模型 (app/models.py)
from datetime import datetime
from app import db
from werkzeug.security import generate_password_hash, check_password_hash
from flask_login import UserMixinclass User(UserMixin, db.Model):
id = db.Column(db.Integer, primary_key=True)
username = db.Column(db.String(64), index=True, unique=True)
email = db.Column(db.String(120), index=True, unique=True)
password_hash = db.Column(db.String(128))
pdfs = db.relationship('PDFDocument', backref='owner', lazy='dynamic')def set_password(self, password):
self.password_hash = generate_password_hash(password)def check_password(self, password):
return check_password_hash(self.password_hash, password)class PDFDocument(db.Model):
id = db.Column(db.Integer, primary_key=True)
filename = db.Column(db.String(256))
original_name = db.Column(db.String(256))
filepath = db.Column(db.String(512))
pages = db.Column(db.Integer)
created_at = db.Column(db.DateTime, index=True, default=datetime.utcnow)
user_id = db.Column(db.Integer, db.ForeignKey('user.id'))
def __repr__(self):