低 f1 分数和低损失函数分数

问题描述 投票:0回答:2

我正在尝试建立一个多标签文本分类模型来对有毒评论进行分类。 我从此链接中关注了一篇中等文章:使用 Pytorch 使用 BERT 进行多标签文本分类

我还使用了来自kaggle的这个数据集:jigsaw-有毒-评论-分类-挑战

我正在使用 google colab 通过 V100 GPU 运行时设置来运行我的模型。

不幸的是,经过几个小时的训练(4 个时期),我的 f1 分数仅为 0.04214842148421484。我的最终损失分数是0.00354736

我知道损失函数和 f1 分数是两个不同的东西,但据我了解,低成本函数分数应该会影响 f1 分数。我哪里出错了?

这是代码:

import torch
import numpy as np
import pandas as pd
import shutil, sys
import transformers
from sklearn import metrics
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, BertModel, BertConfig

val_targets=[]
val_outputs=[]

class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, max_len,):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.title = dataframe['comment_text']
        self.targets = self.data.target_list
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split(" "))

        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]


        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'targets': torch.tensor(self.targets[index], dtype=torch.float)
        }
    

class BERTClass(torch.nn.Module):
    def __init__(self):
        super(BERTClass, self).__init__()
        self.l1 = transformers.BertModel.from_pretrained('bert-base-uncased', return_dict=False)
        self.l2 = torch.nn.Dropout(0.3)
        self.l3 = torch.nn.Linear(768, 6)

    def forward(self, ids, mask, token_type_ids):
        _, output_1= self.l1(ids, attention_mask = mask, token_type_ids = token_type_ids)
        output_2 = self.l2(output_1)
        output = self.l3(output_2)
        return output
    
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)


def save_ckp(state, is_best, checkpoint_path, best_model_path):
    """
    state: checkpoint we want to save
    is_best: is this the best checkpoint; min validation loss
    checkpoint_path: path to save checkpoint
    best_model_path: path to save best model
    """
    f_path = checkpoint_path
    # save checkpoint data to the path given, checkpoint_path
    torch.save(state, f_path)
    # if it is a best model, min validation loss
    if is_best:
        best_fpath = best_model_path
        # copy that checkpoint file to best path given, best_model_path
        shutil.copyfile(f_path, best_fpath)

def load_ckp(checkpoint_fpath, model, optimizer):
    """
    checkpoint_path: path to save checkpoint
    model: model that we want to load checkpoint parameters into
    optimizer: optimizer we defined in previous training
    """
    # load checkpoint
    checkpoint = torch.load(checkpoint_fpath)

    # initialize state_dict from checkpoint to model
    model.load_state_dict(checkpoint['state_dict'])

    # initialize optimizer from checkpoint to optimizer
    optimizer.load_state_dict(checkpoint['optimizer'])

    # handle valid_loss_min based on its type
    valid_loss_min = checkpoint['valid_loss_min']
    if isinstance(valid_loss_min, torch.Tensor):
        valid_loss_min = valid_loss_min.item()

    # return model, optimizer, epoch value, min validation loss
    return model, optimizer, checkpoint['epoch'], valid_loss_min


def train_model(start_epochs,  n_epochs, valid_loss_min_input,
                training_loader, validation_loader, model,
                optimizer, checkpoint_path, best_model_path):

  # initialize tracker for minimum validation loss
  valid_loss_min = valid_loss_min_input


  for epoch in range(start_epochs, n_epochs+1):
    train_loss = 0
    valid_loss = 0

    model.train()
    print('############# Epoch {}: Training Start   #############'.format(epoch))
    for batch_idx, data in enumerate(training_loader):
        #print('yyy epoch', batch_idx)
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        optimizer.zero_grad()
        outputs = model(ids, mask, token_type_ids)
        print(outputs.shape)

        loss = loss_fn(outputs, targets)
        if batch_idx%100==0:
           print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')

        loss.backward()
        optimizer.step()
        #print('before loss data in training', loss.item(), train_loss)
        train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.item() - train_loss))
        #print('after loss data in training', loss.item(), train_loss)

    print('############# Epoch {}: Training End     #############'.format(epoch))

    print('############# Epoch {}: Validation Start   #############'.format(epoch))
    ######################
    # validate the model #
    ######################

    model.eval()


    outputs, targets = do_validation(validation_loader)
    val_preds = (np.array(outputs) > 0.5).astype(int)
    val_targets = (np.array(targets) > 0.5).astype(int)
    accuracy = metrics.accuracy_score(val_targets, val_preds)
    f1_score_micro = metrics.f1_score(val_targets, val_preds, average='micro')
    f1_score_macro = metrics.f1_score(val_targets, val_preds, average='macro')
    print(f"Accuracy Score = {accuracy}")
    print(f"F1 Score (Micro) = {f1_score_micro}")
    print(f"F1 Score (Macro) = {f1_score_macro}")

          
    print('############# Epoch {}: Validation End     #############'.format(epoch))
    # calculate average losses
    #print('before cal avg train loss', train_loss)
    train_loss = train_loss/len(training_loader)
    valid_loss = valid_loss/len(validation_loader)
    # print training/validation statistics
    print('Epoch: {} \tAvgerage Training Loss: {:.6f} \tAverage Validation Loss: {:.6f}'.format(
          epoch,
          train_loss,
          valid_loss
          ))

    # create checkpoint variable and add important data
    checkpoint = {
          'epoch': epoch + 1,
          'valid_loss_min': valid_loss,
          'state_dict': model.state_dict(),
          'optimizer': optimizer.state_dict()
    }

      # save checkpoint
    save_ckp(checkpoint, False, checkpoint_path, best_model_path)

    ## TODO: save the model if validation loss has decreased
    if valid_loss <= valid_loss_min:
      print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(valid_loss_min,valid_loss))
      # save checkpoint as best model
      save_ckp(checkpoint, True, checkpoint_path, best_model_path)
      valid_loss_min = valid_loss

    print('############# Epoch {}  Done   #############\n'.format(epoch))


  return model


def do_validation(dataloader):
    model.eval()
    fin_targets=[]
    fin_outputs=[]
    with torch.no_grad():
        for _, data in enumerate(dataloader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)
            fin_targets.extend(targets.cpu().detach().numpy().tolist())
            fin_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())
    return fin_outputs, fin_targets


if __name__ == '__main__':

    # If there's a GPU available...
    if torch.cuda.is_available():

        # Tell PyTorch to use the GPU.
        device = torch.device("cuda")

        print('There are %d GPU(s) available.' % torch.cuda.device_count())

        print('We will use the GPU:', torch.cuda.get_device_name(0))

    # If not...
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")

    train_df =  pd.read_csv(train_data_location,on_bad_lines='skip')
    test_df = pd.read_csv(test_data_location,on_bad_lines='skip')
    select_labels = train_df.columns.values.tolist()[2:]
    train_df['target_list'] = train_df[select_labels].values.tolist()
    test_df['target_list'] = test_df[select_labels].values.tolist()
    MAX_LEN = 64
    TRAIN_BATCH_SIZE = 8
    VALID_BATCH_SIZE = 8
    EPOCHS = 10
    LEARNING_RATE = 1e-05
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    training_set = CustomDataset(train_df, tokenizer, MAX_LEN)
    validation_set = CustomDataset(test_df, tokenizer, MAX_LEN)
    train_params = {'batch_size': TRAIN_BATCH_SIZE,
                    'shuffle': True,
                    'num_workers': 0
                    }

    test_params = {'batch_size': VALID_BATCH_SIZE,
                    'shuffle': False,
                    'num_workers': 0
                    }

    training_loader = DataLoader(training_set, **train_params)
    validation_loader = DataLoader(validation_set, **test_params)
    model = BERTClass()
    model.to(device)
    optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)
    checkpoint_path = '/content/checkpoints/current_checkpoint.pt'
    best_model = '/content/checkpoints/best_model.pt'
    trained_model = train_model(1, EPOCHS, np.Inf, training_loader, validation_loader, model,
                        optimizer,checkpoint_path,best_model)
    

 
machine-learning pytorch bert-language-model text-classification
2个回答
2
投票

F1 分数是精确率和召回率的调和平均值。它允许程序员看到单个数字的精度和召回率。损失分数与其他绩效指标不直接相关。

对于多标签文本分类,准确率、精确度和召回率是重要的指标。具体来说,检查您的总体准确度分数,然后检查每个类别的精确度和召回率分数。除了研究和商业目的之外,F1 分数并不是特别有用。

至于为什么你的 F1 分数较低,你是否分割了你的数据集?我看到你导入了 Sklearn 的 train_test_split 库,但你从未在代码中调用它。看起来您只是将整个原始数据集传递给您的训练函数。


0
投票

您正在使用 BCEWithLogitsLoss,事实是这个函数或其他众所周知的损失函数都不一定反映 f-score。更多分析这里这里

可以在here找到所谓的“soft-f1-score”的实现。

相关问题解答也发生了这里

希望大家帮忙!

© www.soinside.com 2019 - 2024. All rights reserved.