我使用 langchain Python 库创建向量存储并根据用户查询检索相关文档。如何在矢量存储中嵌入文档?
例如,在这段代码中:
import pprint
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document
model = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
embeddings = HuggingFaceEmbeddings(model_name = model)
def main():
doc1 = Document(page_content="The sky is blue.", metadata={"document_id": "10"})
doc2 = Document(page_content="The forest is green", metadata={"document_id": "62"})
docs = []
docs.append(doc1)
docs.append(doc2)
for doc in docs:
doc.metadata['summary'] = 'hello'
pprint.pprint(docs)
db = FAISS.from_documents(docs, embeddings)
db.save_local("faiss_index")
new_db = FAISS.load_local("faiss_index", embeddings)
query = "Which color is the sky?"
docs = new_db.similarity_search_with_score(query)
print('Retrieved docs:', docs)
print('Metadata of the most relevant document:', docs[0][0].metadata)
if __name__ == '__main__':
main()
如何获得文档
doc1
和doc2
的嵌入?
代码使用 Python 3.11 进行了测试:
pip install langchain==0.1.1 langchain_openai==0.0.2.post1 sentence-transformers==2.2.2 langchain_community==0.0.13 faiss-cpu==1.7.4
emb1 = embeddings.embed_query(doc1.page_content)
emb2 = embeddings.embed_query(doc2.page_content)