我开发了一个 Streamlit 应用程序,它利用 Cohere API 进行自然语言处理。该应用程序允许用户提出问题,然后使用 Cohere API 处理这些问题。但是,当提示中的令牌总数超过 Cohere API 的最大限制 4,081 个令牌时,我遇到了问题。
这不是答案,而是......
我有一个类似的问题,它工作得很好,直到它没有......我认为它有一个配额,因为它已经工作了一段时间......
这是我的代码:
import streamlit as st
from dotenv import load_dotenv
from langchain.chains import (
create_retrieval_chain,
)
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.embeddings import OCIGenAIEmbeddings
from langchain_community.llms import OCIGenAI
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
import chromadb
if "history" not in st.session_state:
st.session_state.history = []
load_dotenv()
llm = OCIGenAI(
model_id="cohere.command-light",
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="ocid1.compartment.oc1..aaaaaaaacz66k7qusk5kg5wc4keajwvi2meiauw6wmyztmrb2tm6gt7tzqsa",
model_kwargs={"max_tokens": 1000}
)
embeddings = OCIGenAIEmbeddings(
model_id="cohere.embed-english-v3.0",
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="ocid1.compartment.oc1..aaaaaaaacz66k7qusk5kg5wc4keajwvi2meiauw6wmyztmrb2tm6gt7tzqsa",
)
client = chromadb.HttpClient(host="localhost", port=8000)
db = Chroma(client=client, embedding_function=embeddings, collection_name="ncert-eng-chromadb")
retv = db.as_retriever(search_type="similarity", search_kwargs={"k": 5})
from langchain.chains import create_history_aware_retriever
from langchain_core.prompts import MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
# First we need a prompt that we can pass into an LLM to generate this search query
prompt = ChatPromptTemplate.from_messages([
("system", "Answer the user's questions based on the below context:\n\n{context}"),
MessagesPlaceholder(variable_name="chat_history"),
("user", "{input}"),
])
retriever_chain = create_history_aware_retriever(llm, retv, prompt)
chat_history = []
# https://python.langchain.com/v0.1/docs/get_started/quickstart/
document_chain = create_stuff_documents_chain(llm,
prompt)
chain = create_retrieval_chain(retriever_chain, document_chain)
st.title('🤖 Welcome to the ChatBot')
for msg in st.session_state.history:
with st.chat_message(msg['role']):
st.markdown(msg['content'])
prompt = st.chat_input("Say something")
if prompt:
st.session_state.history.append({
'role': 'user',
'content': prompt
})
with st.chat_message("user"):
st.markdown(prompt)
with st.spinner('💡Thinking'):
response = chain.invoke({
"chat_history": chat_history,
"input": prompt
})
chat_history = [HumanMessage(content=prompt), AIMessage(content=response["answer"])]
st.session_state.history.append({
'role': 'Assistant',
'content': response["answer"]
})
with st.chat_message("Assistant"):
st.markdown(response["answer"])