使用tensorflow进行句子分类实现HuggingFace BERT

问题描述 投票:0回答:1

我正在尝试使用Hugging face bert模型对推文进行分类来训练真实灾难推文预测模型(Kaggle竞赛)。>

我遵循了许多教程,并使用了许多bert模型,但是没有一个模型可以在COlab中运行并克服错误

我的代码是:

!pip install transformers
import tensorflow as tf
import numpy as np
import pandas as pd
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import ModelCheckpoint
from transformers import DistilBertTokenizer, RobertaTokenizer


train = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/train.csv")
test = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/test.csv")


roberta = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizer.from_pretrained(roberta, do_lower_case = True, add_special_tokens = True, max_length = 128, pad_to_max_length = True)


def tokenize(sentences, tokenizer):
  input_ids, input_masks, input_segments = [], [], []
  for sentence in sentences:
    inputs = tokenizer.encode_plus(sentence, add_special_tokens = True, max_length = 128, pad_to_max_length = True, return_attention_mask = True, return_token_type_ids = True)
    input_ids.append(inputs['input_ids'])
    input_masks.append(inputs['attention_mask'])
    input_segments.append(inputs['token_type_ids'])
  return np.asarray(input_ids, dtype = "int32"), np.asarray(input_masks, dtype = "int32"), np.asarray(input_segments, dtype = "int32")


input_ids, input_masks, input_segments = tokenize(train.text.values, tokenizer)

from transformers import TFDistilBertForSequenceClassification, DistilBertConfig, TFDistilBertModel

distil_bert = 'distilbert-base-uncased'

config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
config.output_hidden_states = False
transformer_model = TFDistilBertModel.from_pretrained(distil_bert, config = config)

input_ids_in = tf.keras.layers.Input(shape=(128,), name='input_token', dtype=tf.int32)
input_masks_in = tf.keras.layers.Input(shape=(128,), name='masked_token', dtype=tf.int32) 
embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0]
X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(embedding_layer)
X = tf.keras.layers.GlobalMaxPool1D()(X)
X = tf.keras.layers.Dense(50, activation='relu')(X)
X = tf.keras.layers.Dropout(0.2)(X)
X = tf.keras.layers.Dense(1, activation='sigmoid')(X)
model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs = X)
model.compile(Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy'])
for layer in model.layers[:3]:
  layer.trainable = False

bert_input = [
    input_ids,
    input_masks
]


checkpoint = ModelCheckpoint('/content/drive/My Drive/disaster_model/model_hugging_face.h5', monitor = 'val_loss', save_best_only= True)


train_history = model.fit(
    bert_input,
    validation_split = 0.2,
    batch_size = 16,
    epochs = 10,
    callbacks = [checkpoint]
)

在colab中运行上述代码时,出现以下错误:

Epoch 1/10
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-91-9df711c91040> in <module>()
      9     batch_size = 16,
     10     epochs = 10,
---> 11     callbacks = [checkpoint]
     12 )

10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
    966           except Exception as e:  # pylint:disable=broad-except
    967             if hasattr(e, "ag_error_metadata"):
--> 968               raise e.ag_error_metadata.to_exception(e)
    969             else:
    970               raise

ValueError: in user code:

    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function  *
        outputs = self.distribute_strategy.run(
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run  **
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
        return fn(*args, **kwargs)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step  **
        self.trainable_variables)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
        trainable_variables))
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
        filtered_grads_and_vars = _filter_grads(grads_and_vars)
    /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
        ([v.name for _, v in grads_and_vars],))

    ValueError: No gradients provided for any variable: ['tf_distil_bert_model_23/distilbert/embeddings/word_embeddings/weight:0', 'tf_distil_bert_model_23/distilbert/embeddings/position_embeddings/embeddings:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/gamma:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/gamma:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/bias:0', 'tf_...

    

我正在尝试使用Hugging face bert模型对推文进行分类来训练实际灾难推文预测模型(Kaggle竞赛)。我遵循了许多教程,并使用了...

tensorflow text-classification huggingface-transformers bert
1个回答
0
投票
© www.soinside.com 2019 - 2024. All rights reserved.