在下面的代码中,我在 ConversationalRetrievalChain 检索器中遇到了一个大问题,当尝试增加“k”返回的文档数量时,我已经超出了最大令牌数量,问题是默认情况下它只返回 4,而我的csv数据有1000行
`
从 googletrans 导入翻译器
import re
from deep_translator import GoogleTranslator
from langchain.embeddings import HuggingFaceEmbeddings
import os
from transformers import AutoConfig
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.llms import CTransformers
from langchain.chains import ConversationalRetrievalChain
import pandas as pd
from transformers import AutoModelForCausalLM
from langchain.memory import ConversationBufferMemory
from langchain_chroma import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_text_splitters import CharacterTextSplitter
def translate_to_portuguese(text):
translator = GoogleTranslator(source="pt", target="en")
translated_segments = []
segments = re.split(r'(".*?")', text) # Split text into segments based on quotation marks
for segment in segments:
if segment.startswith('"') and segment.endswith('"'):
translated_segments.append(segment) # If segment is within quotation marks, keep it unchanged
else:
translated_text = translator.translate(segment)
if translated_text is not None:
translated_segments.append(translated_text)
else:
translated_segments.append(segment)
translated_text = ' '.join(translated_segments)
return translated_text
def main():
# Load the document and split it into chunks
file_path = "/content/sample_data/california_housing_test.csv"
loader = CSVLoader(file_path=file_path, encoding="utf-8", csv_args={'delimiter': ','})
documents = loader.load()
# Split documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
print(docs)
# Create the open-source embedding function
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
#persist_directory="./chroma_db"
# Create Chroma index
db2 = Chroma.from_documents(docs, embedding_function)
# Configure LLM model
llm_model_name = "TheBloke/Llama-2-7B-GGUF"
llm = CTransformers(model=llm_model_name,
model_type='llama',
max_new_tokens=8145,
temperature=1.0)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=False)
# Create conversational retrieval chain
qa = ConversationalRetrievalChain.from_llm(llm,
retriever=db2.as_retriever(search_kwargs={'k': 10}),
memory=memory,
output_key="answer",
verbose=True,
max_tokens_limit=4000)
# Interactive conversation loop
while True:
print(" ")
query = input('Pergunta: ').lower()
if query == 'quit':
break
print(" ")
traducao = translate_to_portuguese(query)
chat_history = []
result = qa({"question": traducao, "chat_history": chat_history})
response = GoogleTranslator(source="en", target="pt")
traducao_response = response.translate(result['answer'])
print("Resposta Bot:", traducao_response)
if __name__ == "__main__":
main() `
这个错误:
WARNING:ctransformers:Number of tokens (1554) exceeded maximum context length (512).
使用特定设置设置变压器模型并手动处理令牌。
例如:
llm = CTransformers(model=llm_model_name,
model_type='llama',
config={'max_new_tokens': 600, 'temperature': 0.01, 'context_length': 700})