我正在为 Llama2 13b 使用 AWS Sagemaker Jumpstart 模型:meta-text Generation-llama-2-13b-f
在使用 chain_type="map_reduce" 运行 Langchain 总结链时,出现以下错误。我无法从我的环境访问 https://huggingface.co。有没有办法在本地目录中设置 gpt2 标记生成器?
parameters = {
"properties": {
"min_length": 100,
"max_length": 1024,
"do_sample": True,
"top_p": 0.9,
"repetition_penalty": 1.03,
"temperature": 0.8
}
}
class ContentHandler(LLMContentHandler):
content_type = "application/json"
accepts = "application/json"
def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
input_str = json.dumps({"inputs": prompt, **model_kwargs})
print(input_str)
return input_str.encode("utf-8")
def transform_output(self, output: bytes) -> str:
response_json = json.loads(output.read().decode("utf-8"))
return response_json[0]["generation"]["content"]
content_handler = ContentHandler()
endpoint_name='xxxxxxxxxxxxxxxxxx'
llm=SagemakerEndpoint(
endpoint_name=endpoint_name,
region_name="us-east-1",
model_kwargs=parameters,
content_handler=content_handler,
endpoint_kwargs={"CustomAttributes": 'accept_eula=true'}
)
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)
错误:
File /usr/local/lib/python3.9/site-packages/transformers/tokenization_utils_base.py:1788, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1782 logger.info(
1783 f"Can't load following files from cache: {unresolved_files} and cannot check if these "
1784 "files are necessary for the tokenizer to operate."
1785 )
1787 if all(full_file_name is None for full_file_name in resolved_vocab_files.values()):
-> 1788 raise EnvironmentError(
1789 f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
1790 "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
1791 f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
1792 f"containing all relevant files for a {cls.__name__} tokenizer."
1793 )
1795 for file_id, file_path in vocab_files.items():
1796 if file_id not in resolved_vocab_files:
OSError: Can't load tokenizer for 'gpt2'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'gpt2' is the correct path to a directory containing all relevant files for a GPT2TokenizerFast tokenizer.
您可以使用下面的代码将其保存在本地目录中
from transformers import GPT2Tokenizer
t = GPT2Tokenizer.from_pretrained("gpt2")
t.save_pretrained('/SOMEFOLDER/')