我正在尝试训练连续词袋架构,但每当我尝试使用 DataLoader 训练模型时,都会不断出现此错误。如果我不使用任何批次,损失就会减少。
class CBOW(nn.Module):
def __init__(self, vocab_size, emb_dim, hidden_dim):
super(CBOW, self).__init__()
self.embedding = nn.Embedding(vocab_size, emb_dim)
self.linear1 = nn.Linear(emb_dim, hidden_dim)
self.activation = nn.ReLU()
self.linear2 = nn.Linear(hidden_dim, vocab_size)
self.activation_function2 = nn.LogSoftmax(dim = -1)
def forward(self, inputs):
embeds = self.embedding(inputs).mean(dim=0)
out = self.activation(self.linear1(embeds))
out = self.linear2(out)
return out
# Define hyperparameters
vocab_size = len(vocab)
emb_dim = 12
hidden_dim = 128
lr = 0.01
num_epochs = 2
# Initialize model and optimizer
model = CBOW(vocab_size, emb_dim, hidden_dim)
#model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.CrossEntropyLoss()
def train_one_epoch(model, loader, num_epochs):
running_loss = 0
last_loss = 0
for epoch in range(num_epochs):
loss = 0.0
for inputs, labels in loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_fn(outputs, labels.unsqueeze())
loss.backward()
optimizer.step()
running_loss += loss.item()
return last_loss
加载器中的样本由元组组成。
val_loader.dataset[0][0].shape 输出 torch.Size([6]) 这是context.
val_loader.dataset[0][1].shape 输出 torch.Size([]) 这是target.
我真的不明白到底发生了什么。