我有一个 Python 脚本,可以使用“docx2pdf”库将 DOCX 文件转换为 PDF,但它依赖于 Microsoft Word,因此与 Linux 不兼容。我正在寻找一种跨平台解决方案,可以在 Windows 和 Linux(或至少仅限 Linux)上将 DOCX 文件转换为 PDF。
import os
import json
import pymorphy2
import click
from docx2pdf import convert
from docxtpl import DocxTemplate
morph = pymorphy2.MorphAnalyzer()
def render_court_name(context):
court_name = context['court_name']
first_word = court_name.split()[0]
parsed = morph.parse(first_word)[0]
first_word_gent = parsed.inflect({'gent'}).word
rest_words = ' '.join(court_name.split()[1:])
return first_word_gent + ' ' + rest_words
def render_debtor_name(context, case):
debtor_name = context['debtor_name']
name_parts = debtor_name.split()
last_name = morph.parse(name_parts[0])[0].inflect({case}).word.title()
first_name = morph.parse(name_parts[1])[0].inflect({case}).word.title()
middle_name = morph.parse(name_parts[2])[0].inflect({case}).word.title()
return last_name + ' ' + first_name + ' ' + middle_name
# @click.command()
@click.argument('template_path')
@click.argument('json_data')
@click.argument('output_path')
def main(template_path, json_data, output_path):
data = json.loads(json_data)
context = {
'debtor_name_genitive': render_debtor_name(data, 'gent'),
'debtor_name_instrumental': render_debtor_name(data, 'ablt')
}
context.update(data)
context['court_name'] = render_court_name(context)
doc = DocxTemplate(template_path)
doc.render(context)
template_filename = os.path.basename(template_path)
output_base_name = os.path.splitext(template_filename)[0]
output_doc_path = os.path.join(output_path, output_base_name + '.docx')
doc.save(output_doc_path)
pdf_path = os.path.join(output_path, output_base_name + '.pdf')
convert(output_doc_path, pdf_path)
print('Путь к файлу pdf ->', pdf_path)
if __name__ == '__main__':
main()
您可以使用
pandoc
及其 Python 包装器 pypandoc
# install pandoc
sudo apt-get install pandoc
# install LaTeX as pandoc requires LaTeX to produce PDF output
sudo apt-get install texlive-xetex
pip install pypandoc
import pypandoc
...
# Convert the DOCX to PDF using pypandoc
pypandoc.convert_file(output_doc_path, 'pdf', outputfile=pdf_path)