我正在使用 HuggingFace Transformers Pipeline 库为给定提示生成多个文本完成。我的目标是利用像 GPT-2 这样的模型来生成不同的可能完成,例如 vLLM 中的默认值。但是,当我尝试指定 max_length 和 num_return_sequences 等参数时,我遇到了未使用的 model_kwargs 的问题。
这是我正在使用的代码片段:
Copy code
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
from typing import List, Dict
def process_prompts(prompts: List[str], model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer, num_completions: int = 3) -> List[List[str]]:
device = 0 if model.device.type == 'cuda' else -1
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
outputs = []
for prompt in prompts:
try:
results = text_generator(prompt, max_length=50, num_return_sequences=num_completions, num_beams=num_completions)
completions = [result['generated_text'] for result in results]
outputs.append(completions)
except Exception as e:
print(f"Error processing prompt {prompt}: {str(e)}")
return outputs
if __name__ == "__main__":
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.to("cuda" if torch.cuda.is_available() else "cpu")
example_prompts = ["Hello, how are you?"]
processed_outputs = process_prompts(example_prompts, model, tokenizer, num_completions=3)
for output in processed_outputs:
print(output)
还有:
results = text_generator(prompt, max_length=50, num_return_sequences=num_completions)
当我运行此程序时,出现以下错误:
The following `model_kwargs` are not used by the model: ['max_len']
Note: I am aware that typos in the generate arguments can also trigger this warning, but I've checked and rechecked the arguments names.
和
raise ValueError(
ValueError: Greedy methods without beam search do not support `num_return_sequences` different than 1 (got 4).
可能导致此错误的原因是什么,如何修复它以使用模型有效地生成多个完成?
在我看来,您的代码中只有一些小问题。
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, pipeline
from typing import List, Dict
def process_prompts(prompts: List[str], model: GPT2LMHeadModel, tokenizer: GPT2Tokenizer, num_completions) -> List[List[str]]:
device = 0 if model.device.type == 'cuda' else -1
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
outputs = []
for prompt in prompts:
try:
results = text_generator(prompt, truncation=True,
num_return_sequences=num_completions, num_beams=num_completions)
completions = [result['generated_text'] for result in results]
outputs.append(completions)
except Exception as e:
print(f"Error processing prompt {prompt}: {str(e)}")
return outputs
if __name__ == "__main__":
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")
model.to("cuda" if torch.cuda.is_available() else "cpu")
example_prompts = ["Hello, how are you?"]
num_completions = 2
processed_outputs = process_prompts(example_prompts, model, tokenizer, num_completions)
res = ' '.join(processed_outputs[0])
print(res)
Hello, how are you? How are you?"
"I'm fine," I said.
"Well, I guess I'm just fine," he said, turning to me.
"Well, I guess I'm just fine Hello, how are you? How are you?"
"I'm fine," I said.
"Well, I guess I'm just fine," he said, turning to me.
"Well, I guess I'm just not