如何使用reportlab生成可搜索的PDF?

问题描述 投票:0回答:1

这里有一些生成 pdf 的代码,并且已经稳定使用了几年 - 但是,我刚刚注意到生成的 pdf 在 acrobat reader 中不可搜索。如何使生成的 pdf 可搜索?

请注意,包含要搜索的内容的元素是一个表格 - 也许这就是问题所在?

from reportlab.lib import colors,utils
from reportlab.lib.pagesizes import letter,landscape,portrait
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Image, Spacer
from reportlab.lib.styles import getSampleStyleSheet,ParagraphStyle
from reportlab.lib.units import inch

... ...

        doc = SimpleDocTemplate(pdfName, pagesize=landscape(letter),leftMargin=0.5*inch,rightMargin=0.5*inch,topMargin=1.03*inch,bottomMargin=0.5*inch) # or pagesize=letter
#       self.logMsgBox.show()
#       QTimer.singleShot(5000,self.logMsgBox.close)
        QCoreApplication.processEvents()
        elements=[]
        for team in teamFilterList:
            extTeamNameLower=getExtTeamName(team).lower()
            radioLogPrint=[]
            styles = getSampleStyleSheet()
            styles.add(ParagraphStyle(
                name='operator',
                parent=styles['Normal'],
                backColor='lightgrey'
                ))
            headers=MyTableModel.header_labels[0:6]
            if self.useOperatorLogin:
                operatorImageFile=os.path.join(iconsDir,'user_icon_80px.png')
                if os.path.isfile(operatorImageFile):
                    rprint('operator image file found: '+operatorImageFile)
                    headers.append(Image(operatorImageFile,width=0.16*inch,height=0.16*inch))
                else:
                    rprint('operator image file not found: '+operatorImageFile)
                    headers.append('Op.')
            radioLogPrint.append(headers)
##          if teams and opPeriod==1: # if request op period = 1, include 'Radio Log Begins' in all team tables
##              radioLogPrint.append(self.radioLog[0])
            entryOpPeriod=1 # update this number when 'Operational Period <x> Begins' lines are found
##          hits=False # flag to indicate whether this team has any entries in the requested op period; if not, don't make a table for this team
            for row in self.radioLog:
                opStartRow=False
##              rprint("message:"+row[3]+":"+str(row[3].split()))
                if row[3].startswith("Radio Log Begins:"):
                    opStartRow=True
                if row[3].startswith("Operational Period") and row[3].split()[3] == "Begins:":
                    opStartRow=True
                    entryOpPeriod=int(row[3].split()[2])
                # #523: handled continued incidents
                if row[3].startswith('Radio Log Begins - Continued incident'):
                    opStartRow=True
                    entryOpPeriod=int(row[3].split(': Operational Period ')[1].split()[0])
##              rprint("desired op period="+str(opPeriod)+"; this entry op period="+str(entryOpPeriod))
                if entryOpPeriod == opPeriod:
                    if team=="" or extTeamNameLower==getExtTeamName(row[2]).lower() or opStartRow: # filter by team name if argument was specified
                        style=styles['Normal']
                        if 'RADIO OPERATOR LOGGED IN' in row[3]:
                            style=styles['operator']
                        printRow=[row[0],row[1],row[2],Paragraph(row[3],style),Paragraph(row[4],styles['Normal']),Paragraph(row[5],styles['Normal'])]
                        if self.useOperatorLogin:
                            if len(row)>10:
                                printRow.append(row[10])
                            else:
                                printRow.append('')
                        radioLogPrint.append(printRow)
##                      hits=True
            if not teams:
                # #523: avoid exception 
                try:
                    radioLogPrint[1][4]=self.datum
                except:
                    rprint('Nothing to print for specified operational period '+str(opPeriod))
                    return
            rprint("length:"+str(len(radioLogPrint)))
            if not teams or len(radioLogPrint)>2: # don't make a table for teams that have no entries during the requested op period
                if self.useOperatorLogin:
                    colWidths=[x*inch for x in [0.5,0.6,1.25,5.2,1.25,0.9,0.3]]
                else:
                    colWidths=[x*inch for x in [0.5,0.6,1.25,5.5,1.25,0.9]]
                t=Table(radioLogPrint,repeatRows=1,colWidths=colWidths)
                t.setStyle(TableStyle([('FONT',(0,0),(-1,-1),'Helvetica'),
                                        ('FONT',(0,0),(-1,1),'Helvetica-Bold'),
                                        ('INNERGRID', (0,0), (-1,-1), 0.25, colors.black),
                                     ('BOX', (0,0), (-1,-1), 2, colors.black),
                                      ('BOX', (0,0), (-1,0), 2, colors.black)]))
                elements.append(t)
                if teams and team!=teamFilterList[-1]: # don't add a spacer after the last team - it could cause another page!
                    elements.append(Spacer(0,0.25*inch))
        doc.build(elements,onFirstPage=functools.partial(self.printLogHeaderFooter,opPeriod=opPeriod,teams=teams),onLaterPages=functools.partial(self.printLogHeaderFooter,opPeriod=opPeriod,teams=teams))
#       self.logMsgBox.setInformativeText("Finalizing and Printing...")
        self.printPDF(pdfName)

... ...

def printPDF(self,pdfName):
    try:
        win32api.ShellExecute(0,"print",pdfName,'/d:"%s"' % win32print.GetDefaultPrinter(),".",0)
    except Exception as e:
        estr=str(e)

... ...

python pdf reportlab searchable
1个回答
0
投票

谢谢 Marijn,那是票,我的错误:reportlab 生成的 pdf > 是< searchable already (from the .build call at the end of the code); the pdf subsequently generated by windows print to pdf is not searchable. Interesting, but, this particular question is solved.

© www.soinside.com 2019 - 2024. All rights reserved.