我正在使用 llama-index 和以下代码:
import boto3
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
def retrieve_pdf_files_from_s3(bucket_name):
s3 = boto3.client('s3',aws_access_key_id='****',aws_secret_access_key='****',region_name="****")
response = s3.list_objects_v2(Bucket=bucket_name)
pdf_files = []
if 'Contents' in response:
for obj in response['Contents']:
if obj['Key'].endswith('.pdf'):
pdf_files.append(obj['Key'])
return pdf_files
bucket_name = "****"
pdf_files = retrieve_pdf_files_from_s3(bucket_name)
"""
for pdf_data in pdf_files:
s3 = boto3.client('s3',aws_access_key_id='****',aws_secret_access_key='****',region_name="****")
s3.download_file(bucket_name, pdf_data, './tmp/' + pdf_data)
"""
reader = SimpleDirectoryReader(input_dir="./tmp")
documents = reader.load_data()
index = VectorStoreIndex.from_documents(documents)
print(index)
这是一个非常简单的例子。
执行时出现以下错误:
回溯(最近一次调用最后一次): 文件“D:\NusreGrowth\RAG 为自己的数据 env\lib\site-packages\llama_inde