当我使用这些参数运行我的代码时:python -m torch.distributed.launch --nproc_per_node=8 main3.py,尝试使用 8 个 gpus,这是否会创建我的代码的 8 个单独运行并导致其他 7 个 gpus训练进度丢失?如果是这样,为什么我只得到一个进度条,时间减少了,但打印的文件名也增加了 8 倍? 我的代码
import os
import math
import numpy as np
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel, TextDataset, DataCollatorForLanguageModeling
from transformers import Trainer, TrainingArguments
import random
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32 # Batch size for training.
epochs = 3 # Number of epochs to train for.
chunk_size = 250000 # Number of samples to train on.
data_path = "data.txt"
start_token = "<start>"
end_token = "<end>"
# Read the number of lines in the data file
with open(data_path, "r", encoding="utf-8") as f:
num_lines = sum(1 for line in f)
# Calculate the number of chunks needed
num_chunks = math.ceil(num_lines / chunk_size)
# Use GPT-2's tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("./gpt2_model")
# Add the start and end tokens to the tokenizer
tokenizer.add_tokens([start_token, end_token])
# Load GPT-2's pre-trained model
model = GPT2LMHeadModel.from_pretrained("./gpt2_model")
# Resize the model's token embeddings to include the new tokens
model.resize_token_embeddings(len(tokenizer))
for chunk in range(num_chunks):
# Clear previous data
input_texts = []
filename = ""
# ...
with open(data_path, "r", encoding="utf-8") as f:
# Skip lines that have already been read
for _ in range(chunk * chunk_size):
next(f)
# Read the lines for this chunk
for _ in range(chunk_size):
line = f.readline().strip()
if not line:
break
input_text, target_text = line.split(":")
input_texts.append(start_token + input_text + ":" + target_text + end_token)
filename = "processed_data.txt"
# Save the processed data into a new file
with open(str(chunk) + filename, "w", encoding="utf-8") as f:
for text in input_texts:
f.write(text + "\n")
print("file created:" + str(chunk) + filename)
# Create the dataset using the tokenizer
dataset = TextDataset(tokenizer=tokenizer, file_path=str(chunk) + filename, block_size=128)
# Create a data collator for language modeling
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
# Define training arguments
training_args = TrainingArguments(
output_dir="./gpt2_model",
overwrite_output_dir=True,
num_train_epochs=epochs,
per_device_train_batch_size=batch_size,
save_steps=10_000,
save_total_limit=2,
# Add these arguments to enable multi-GPU training
)
# Create a Trainer instance with the model, dataset, and training arguments
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=dataset,
)
# Train the model on this chunk
trainer.train()
model.save_pretrained("./gpt2_model")
tokenizer.save_pretrained("./gpt2_model")