我正在尝试使用Hugging face bert模型对推文进行分类来训练真实灾难推文预测模型(Kaggle竞赛)。>
我遵循了许多教程,并使用了许多bert模型,但是没有一个模型可以在COlab中运行并克服错误
我的代码是:
!pip install transformers import tensorflow as tf import numpy as np import pandas as pd from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.optimizers import Adam, SGD from tensorflow.keras.callbacks import ModelCheckpoint from transformers import DistilBertTokenizer, RobertaTokenizer train = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/train.csv") test = pd.read_csv("/content/drive/My Drive/Kaggle_disaster/test.csv") roberta = 'distilbert-base-uncased' tokenizer = DistilBertTokenizer.from_pretrained(roberta, do_lower_case = True, add_special_tokens = True, max_length = 128, pad_to_max_length = True) def tokenize(sentences, tokenizer): input_ids, input_masks, input_segments = [], [], [] for sentence in sentences: inputs = tokenizer.encode_plus(sentence, add_special_tokens = True, max_length = 128, pad_to_max_length = True, return_attention_mask = True, return_token_type_ids = True) input_ids.append(inputs['input_ids']) input_masks.append(inputs['attention_mask']) input_segments.append(inputs['token_type_ids']) return np.asarray(input_ids, dtype = "int32"), np.asarray(input_masks, dtype = "int32"), np.asarray(input_segments, dtype = "int32") input_ids, input_masks, input_segments = tokenize(train.text.values, tokenizer) from transformers import TFDistilBertForSequenceClassification, DistilBertConfig, TFDistilBertModel distil_bert = 'distilbert-base-uncased' config = DistilBertConfig(dropout=0.2, attention_dropout=0.2) config.output_hidden_states = False transformer_model = TFDistilBertModel.from_pretrained(distil_bert, config = config) input_ids_in = tf.keras.layers.Input(shape=(128,), name='input_token', dtype=tf.int32) input_masks_in = tf.keras.layers.Input(shape=(128,), name='masked_token', dtype=tf.int32) embedding_layer = transformer_model(input_ids_in, attention_mask=input_masks_in)[0] X = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(embedding_layer) X = tf.keras.layers.GlobalMaxPool1D()(X) X = tf.keras.layers.Dense(50, activation='relu')(X) X = tf.keras.layers.Dropout(0.2)(X) X = tf.keras.layers.Dense(1, activation='sigmoid')(X) model = tf.keras.Model(inputs=[input_ids_in, input_masks_in], outputs = X) model.compile(Adam(lr = 1e-5), loss = 'binary_crossentropy', metrics = ['accuracy']) for layer in model.layers[:3]: layer.trainable = False bert_input = [ input_ids, input_masks ] checkpoint = ModelCheckpoint('/content/drive/My Drive/disaster_model/model_hugging_face.h5', monitor = 'val_loss', save_best_only= True) train_history = model.fit( bert_input, validation_split = 0.2, batch_size = 16, epochs = 10, callbacks = [checkpoint] )
在colab中运行上述代码时,出现以下错误:
Epoch 1/10
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-91-9df711c91040> in <module>()
9 batch_size = 16,
10 epochs = 10,
---> 11 callbacks = [checkpoint]
12 )
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py in wrapper(*args, **kwargs)
966 except Exception as e: # pylint:disable=broad-except
967 if hasattr(e, "ag_error_metadata"):
--> 968 raise e.ag_error_metadata.to_exception(e)
969 else:
970 raise
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['tf_distil_bert_model_23/distilbert/embeddings/word_embeddings/weight:0', 'tf_distil_bert_model_23/distilbert/embeddings/position_embeddings/embeddings:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/gamma:0', 'tf_distil_bert_model_23/distilbert/embeddings/LayerNorm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/q_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/k_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/v_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/attention/out_lin/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/gamma:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/sa_layer_norm/beta:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin1/bias:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/kernel:0', 'tf_distil_bert_model_23/distilbert/transformer/layer_._0/ffn/lin2/bias:0', 'tf_...
我正在尝试使用Hugging face bert模型对推文进行分类来训练实际灾难推文预测模型(Kaggle竞赛)。我遵循了许多教程,并使用了...