安装环境
conda create -n maker-pdf python=3.12
conda activate marker-pdf
pip install modelscope
pip install marker-pdf -U
下载模型
from modelscope import snapshot_downloadmodel_root = "models"
snapshot_download("Lixiang/marker-pdf", local_dir="models")
开始运行
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
from surya.settings import settings
model_root = "models"
settings.MODEL_CACHE_DIR = model_root
for chectpoint in ["LAYOUT_MODEL_CHECKPOINT","DETECTOR_MODEL_CHECKPOINT","OCR_ERROR_MODEL_CHECKPOINT","TABLE_REC_MODEL_CHECKPOINT","RECOGNITION_MODEL_CHECKPOINT",
]:value = getattr(settings, chectpoint)if "s3://" in value:value = value.replace("s3://", "/")setattr(settings, chectpoint, model_root + value)converter = PdfConverter(artifact_dict=create_model_dict(),
)
rendered = converter("test.pdf")
text, _, images = text_from_rendered(rendered)
print(text)