def input_pdf_text(上传的文件):
# Open the PDF file in binary mode
with uploaded_file as file:
# Create a PdfReader object to read the PDF
reader = pdf.PdfReader(file)
text=""
for page in reader(len(reader.pages)):
page=reader.pages[page]
text+= str(page.extract_text())
return text
在循环页面时存在语法错误和格式问题,并且误用
reader(len(reader.pages)):
,因为循环页面的语法不正确。
使用 pip 安装 PyPDF2:
pip install PyPDF2
那么这里有一个代码示例
导入 PyPDF2
def input_pdf_text(uploaded_file):
# Ensure the file is opened in binary mode if it isn't already
if uploaded_file.mode != 'rb':
uploaded_file = open(uploaded_file, 'rb')
# Create a PdfReader object to read the PDF
reader = PyPDF2.PdfReader(uploaded_file)
text = ""
# Loop through each page and extract text
for page in reader.pages:
# Extract text from each page and concatenate it
if page.extract_text():
text += page.extract_text()
# Close the file if opened
if uploaded_file.mode == 'rb':
uploaded_file.close()
return text
要使用它,只需:
# Example of using the function with a file path
file_path = 'path_to_your_pdf_file.pdf'
with open(file_path, 'rb') as file:
pdf_text = input_pdf_text(file)
print(pdf_text)