我正在构建一个前馈神经网络 (FFNN),用于使用电影评论数据集进行二元情感分类。这是一个简化的任务,只有两个类标签,即正 (1) 和负 (0)。它 FFNN 进行词嵌入,对它们进行平均,通过两层运行它们并在稍后激活,然后应用 sigmoid 来获得预测概率。
模型运行,但我不断得到零。我非常需要帮助。 这是 colab 的链接
我试过以下方法:
from collections import OrderedDict
import torch
from typing import List
import torch.nn as nn
class FeedForwardNeuralNetClassifier(nn.Module):
"""
The Feed-Forward Neural Net sentiment classifier.
"""
def __init__(self, vocab_size, emb_dim, n_hidden_units):
"""
In the __init__ function, you will define modules in FFNN.
:param vocab_size: size of vocabulary
:param emb_dim: dimension of the embedding vectors
:param n_hidden_units: dimension of the hidden units
"""
super(FeedForwardNeuralNetClassifier, self).__init__()
self.vocab_size = vocab_size
self.emb_dim = emb_dim
self.n_hidden_units = n_hidden_units
# TODO: implement a randomly initialized word embedding matrix using nn.Embedding
# It should have a size of (vocab_size x emb_dim)
self.word_embeddings = nn.Embedding(self.vocab_size, self.emb_dim) # replace me
# Define the rest of the model
self.layer1 = nn.Linear(self.emb_dim, self.n_hidden_units)
self.layer2 = nn.Linear(self.n_hidden_units, 1)
self.activation = nn.ReLU()
self.sigmoid = nn.Sigmoid()
def forward(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> torch.Tensor:
"""
The forward function, which defines how FFNN should work when given a batch of inputs and their actual sent lengths (i.e., before PAD)
:param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
:param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
:return the logits of FFNN (i.e., the unnormalized hidden units before sigmoid) of shape (n_examples)
"""
# Lookup word embeddings
embeddings = self.word_embeddings(batch_inputs)
# Element-wise averaging layer
averaged = torch.mean(embeddings, dim=1)
# Hidden layer ReLU
hidden = self.activation(self.layer1(averaged))
# Output layer
return self.layer2(hidden)
def batch_predict(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> List[int]:
"""
Make predictions for a batch of inputs. This function may directly invoke forward (which passes the input through FFNN and returns the output logits)
:param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
:param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
:return: a list of predicted classes for this batch of data, either 0 for negative class or 1 for positive class
"""
output = self.forward(batch_inputs, batch_lengths)
# Sigmoid
probability = self.sigmoid(output)
# Convert logits to predicted labels
predicted_labels = (probability > 0.5).int().tolist()
return predicted_labels
model = FeedForwardNeuralNetClassifier(vocab_size=len(vocab), emb_dim=300, n_hidden_units=300)
# FeedForwardNeuralNetClassifier(
# (word_embeddings): Embedding(4818, 300)
# (layer1): Linear(in_features=300, out_features=300, bias=True)
# (layer2): Linear(in_features=300, out_features=1, bias=True)
# (activation): ReLU()
# (sigmoid): Sigmoid()
# )
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # replace me
import time
BATCH_SIZE=32
N_EPOCHS=20
# create a batch iterator for the training data
batch_iterator = SentimentExampleBatchIterator(
train_exs, batch_size=BATCH_SIZE, PAD_idx=PAD_IDX, shuffle=True)
#loss function
loss_func = nn.BCEWithLogitsLoss()
# training
best_epoch = -1
best_acc = -1
start_time = time.time()
for epoch in range(N_EPOCHS):
print("Epoch %i" % epoch)
batch_iterator.refresh() # initiate a new iterator for this epoch
model.train() # turn on the "training mode"
batch_loss = 0.0
batch_example_count = 0
batch_data = batch_iterator.get_next_batch()
while batch_data is not None:
batch_inputs, batch_lengths, batch_labels = batch_data
# project to the device
batch_inputs = batch_inputs.to(device)
batch_lengths = batch_lengths.to(device)
batch_labels = batch_labels.unsqueeze(1)
batch_labels = batch_labels.to(device)
# TODO: clean up the gradients for this batch
optimizer.zero_grad()
# TODO: call the model and get the loss
outputs = model(batch_inputs, batch_lengths).float()
batch_labels = batch_labels.float()
loss = loss_func(outputs, batch_labels)
# record the loss and number of examples, so we could report some stats
batch_example_count += len(batch_labels)
batch_loss += loss.item() * len(batch_labels)
# TODO: backpropagation using loss
# backpropagation using loss
loss.backward()
# update the model parameters
optimizer.step()
# get another batch
batch_data = batch_iterator.get_next_batch()
print("Avg loss: %.5f" % (batch_loss / batch_example_count))
# evaluate on dev set
model.eval() # turn on the "evaluation mode"
acc, _, _, _ = evaluate(model, dev_exs, return_metrics=True)
if acc > best_acc:
best_acc = acc
best_epoch = epoch
print("Secure a new best accuracy %.3f in epoch %d!" % (best_acc, best_epoch))
# Save the current best model parameters
print("Save the best model checkpoint as best_model.ckpt!")
torch.save(model.state_dict(), "best_model.ckpt")
print("Time elapsed: %s" % time.strftime("%Hh%Mm%Ss", time.gmtime(time.time()-start_time)))
print("-" * 10)
print("End of training! The best accuracy %.3f was obtained in epoch %d." % (best_acc, best_epoch))
# Load back the best checkpoint on dev set
model.load_state_dict(torch.load("best_model.ckpt"))
老实说,我不太确定。问题看似简单,但是你看过模型每个函数的文档了吗?