情感分析 FFNN 模型（Pytorch）中的零精度

Question

我正在构建一个前馈神经网络 (FFNN)，用于使用电影评论数据集进行二元情感分类。这是一个简化的任务，只有两个类标签，即正 (1) 和负 (0)。它 FFNN 进行词嵌入，对它们进行平均，通过两层运行它们并在稍后激活，然后应用 sigmoid 来获得预测概率。

模型运行，但我不断得到零。我非常需要帮助。这是 colab 的链接

我试过以下方法：

改变损失函数
重新排列模型顺序
改变嵌入维度

以下内容：

模特
FFNN的赋值
培训

代码如下：

模特

from collections import OrderedDict

import torch
from typing import List

import torch.nn as nn

class FeedForwardNeuralNetClassifier(nn.Module):
    """
    The Feed-Forward Neural Net sentiment classifier.
    """
    def __init__(self, vocab_size, emb_dim, n_hidden_units):
        """
        In the __init__ function, you will define modules in FFNN.
        :param vocab_size: size of vocabulary
        :param emb_dim: dimension of the embedding vectors
        :param n_hidden_units: dimension of the hidden units
        """
        super(FeedForwardNeuralNetClassifier, self).__init__()
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.n_hidden_units = n_hidden_units
       
        # TODO: implement a randomly initialized word embedding matrix using nn.Embedding
        # It should have a size of (vocab_size x emb_dim)
        self.word_embeddings = nn.Embedding(self.vocab_size, self.emb_dim) # replace me

        # Define the rest of the model
        self.layer1 = nn.Linear(self.emb_dim, self.n_hidden_units)
        self.layer2 = nn.Linear(self.n_hidden_units, 1)
        self.activation = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> torch.Tensor:
        """
        The forward function, which defines how FFNN should work when given a batch of inputs and their actual sent lengths (i.e., before PAD)
        :param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
        :param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
        :return the logits of FFNN (i.e., the unnormalized hidden units before sigmoid) of shape (n_examples)
        """

        # Lookup word embeddings
        embeddings = self.word_embeddings(batch_inputs)

        # Element-wise averaging layer
        averaged = torch.mean(embeddings, dim=1)

        # Hidden layer ReLU
        hidden = self.activation(self.layer1(averaged))

        # Output layer
        return self.layer2(hidden)

        

    def batch_predict(self, batch_inputs: torch.Tensor, batch_lengths: torch.Tensor) -> List[int]:
        """
        Make predictions for a batch of inputs. This function may directly invoke forward (which passes the input through FFNN and returns the output logits)

        :param batch_inputs: a torch.Tensor object of size (n_examples, max_sent_length_in_this_batch), which is the *indexed* inputs
        :param batch_lengths: a torch.Tensor object of size (n_examples), which describes the actual sentence length of each example (i.e., before PAD)
        :return: a list of predicted classes for this batch of data, either 0 for negative class or 1 for positive class
        """
        output = self.forward(batch_inputs, batch_lengths)
         
        # Sigmoid
        probability = self.sigmoid(output)

        # Convert logits to predicted labels
        predicted_labels = (probability > 0.5).int().tolist()

        return predicted_labels

作业


model = FeedForwardNeuralNetClassifier(vocab_size=len(vocab), emb_dim=300, n_hidden_units=300)

# FeedForwardNeuralNetClassifier(
# (word_embeddings): Embedding(4818, 300)
# (layer1): Linear(in_features=300, out_features=300, bias=True)
# (layer2): Linear(in_features=300, out_features=1, bias=True)
#  (activation): ReLU()
#  (sigmoid): Sigmoid()
# )



device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)



optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # replace me

培训


import time

BATCH_SIZE=32
N_EPOCHS=20

# create a batch iterator for the training data
batch_iterator = SentimentExampleBatchIterator(
    train_exs, batch_size=BATCH_SIZE, PAD_idx=PAD_IDX, shuffle=True)

#loss function
loss_func = nn.BCEWithLogitsLoss()

# training
best_epoch = -1
best_acc = -1
start_time = time.time()
for epoch in range(N_EPOCHS):
    print("Epoch %i" % epoch)

    batch_iterator.refresh() # initiate a new iterator for this epoch

    model.train() # turn on the "training mode"
    batch_loss = 0.0
    batch_example_count = 0
    batch_data = batch_iterator.get_next_batch()
    while batch_data is not None:
        batch_inputs, batch_lengths, batch_labels = batch_data
        # project to the device
        batch_inputs = batch_inputs.to(device)
        batch_lengths = batch_lengths.to(device)
        batch_labels = batch_labels.unsqueeze(1)
        batch_labels = batch_labels.to(device)
        

        
        # TODO: clean up the gradients for this batch
        optimizer.zero_grad()
        
        # TODO: call the model and get the loss
        outputs = model(batch_inputs, batch_lengths).float()
        
        batch_labels = batch_labels.float()
        loss = loss_func(outputs, batch_labels)
        # record the loss and number of examples, so we could report some stats
        batch_example_count += len(batch_labels)
        batch_loss += loss.item() * len(batch_labels)

        # TODO: backpropagation using loss
        # backpropagation using loss
        loss.backward()

        # update the model parameters
        optimizer.step()

        # get another batch
        batch_data = batch_iterator.get_next_batch()

    print("Avg loss: %.5f" % (batch_loss / batch_example_count))

    # evaluate on dev set
    model.eval() # turn on the "evaluation mode"
    acc, _, _, _ = evaluate(model, dev_exs, return_metrics=True)
    if acc > best_acc:
        best_acc = acc
        best_epoch = epoch
        print("Secure a new best accuracy %.3f in epoch %d!" % (best_acc, best_epoch))
        
        # Save the current best model parameters
        print("Save the best model checkpoint as best_model.ckpt!")
        torch.save(model.state_dict(), "best_model.ckpt")
    
    print("Time elapsed: %s" % time.strftime("%Hh%Mm%Ss", time.gmtime(time.time()-start_time)))
    print("-" * 10)

print("End of training! The best accuracy %.3f was obtained in epoch %d." % (best_acc, best_epoch))

# Load back the best checkpoint on dev set
model.load_state_dict(torch.load("best_model.ckpt"))

Answer 1

老实说，我不太确定。问题看似简单，但是你看过模型每个函数的文档了吗？

情感分析 FFNN 模型（Pytorch）中的零精度

问题描述投票：0回答：1

以下内容：

代码如下：

模特

作业

培训

1个回答

最新问题

情感分析 FFNN 模型（Pytorch）中的零精度

问题描述 投票：0回答：1

以下内容：

代码如下：

模特

作业

培训

1个回答

最新问题

问题描述投票：0回答：1