我正在尝试更新 https://www.tensorflow.org/text/guide/word_embeddings 中的步骤以接受序列到序列,其中输入序列是上下文、问题、答案连接在一起,输出是答案序列。
我尝试了一个函数模型,其中每个输入都将进入其路径以进行矢量化、嵌入,然后连接起来进行预测:
train_cont_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_context"].values)
train_q_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_question"].values)
train_a_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_answer"].values)
# some text vectorisation steps
c_input = Input(shape=(1,))
q_input = Input(shape=(1,))
a_input = Input(shape=(1,))
c_vec = context_vec_layer(c_input)
q_vec = q_vec_layer(q_input)
a_vec = a_vec_layer(a_input)
c_emb = Embedding(VOCAB_SIZE, GLOVE,weights=[embedding_matrix],trainable=False,mask_zero=True)(c_vec)
q_emb = Embedding(VOCAB_SIZE, GLOVE,weights=[embedding_matrix],trainable=False,mask_zero=True)(q_vec)
a_emb = Embedding(VOCAB_SIZE, GLOVE,weights=[embedding_matrix],trainable=False,mask_zero=True)(a_vec)
c_pool = GlobalAveragePooling1D()(c_emb)
q_pool = GlobalAveragePooling1D()(q_emb)
a_pool = GlobalAveragePooling1D()(a_emb)
# concatenate processed input
concat = concatenate([c_pool,q_pool,a_pool])
Y_pred = Dense(QUES_LEN, activation='relu')(concat)
model = Model(inputs = [c_input,q_input,a_input],outputs =Y_pred)
model.compile(optimizer='adam',
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
metrics=['accuracy'])
model. Summary()
model.fit_generator(
[train_cont_ds,train_q_ds,train_a_ds],
train_a_ds, validation_data=(
[cv_cont_ds,cv_q_ds,cv_a_ds],
cv_a_ds), verbose=1,epochs=epochs, callbacks=[tensorboard_callback, cp_callback])
这给出了错误:
ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>"}), <class 'NoneType'>
然后我尝试了:
train_ds = tf.data.Dataset.from_tensor_slices(((train[ "clean_context"].values, train[ "clean_question"].values, train[ "clean_answer"].values), train[ "clean_answer"].values))
train_ds = train_ds.batch(batch_size)
cv_ds = tf.data.Dataset.from_tensor_slices(((cv[ "clean_context"].values, cv[ "clean_question"].values, cv[ "clean_answer"].values), cv[ "clean_answer"].values))
cv_ds = train_ds.batch(batch_size)
dev_ds = tf.data.Dataset.from_tensor_slices(((dev[ "clean_context"].values, dev[ "clean_question"].values, dev[ "clean_answer"].values), dev[ "clean_answer"].values))
dev_ds = train_ds.batch(batch_size)
文本向量器出错。
然后我试了:
train_cont_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_context"].values)
train_q_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_question"].values)
train_a_ds = tf.data.Dataset.from_tensor_slices(train[ "clean_answer"].values)
train_a_ds = train_a_ds.batch(batch_size)
train_ds = train_cont_ds.concatenate(train_q_ds).concatenate(train_a_ds)
train_ds = train_ds.batch(batch_size)
cv_cont_ds = tf.data.Dataset.from_tensor_slices(cv[ "clean_context"].values)
cv_q_ds = tf.data.Dataset.from_tensor_slices(cv[ "clean_question"].values)
cv_a_ds = tf.data.Dataset.from_tensor_slices(cv[ "clean_answer"].values)
cv_a_ds = cv_a_ds.batch(batch_size)
cv_ds = cv_cont_ds.concatenate(cv_q_ds).concatenate(cv_a_ds)
cv_ds = cv_ds.batch(batch_size)
dev_cont_ds = tf.data.Dataset.from_tensor_slices(dev[ "clean_context"].values)
dev_q_ds = tf.data.Dataset.from_tensor_slices(dev[ "clean_question"].values)
dev_a_ds = tf.data.Dataset.from_tensor_slices(dev[ "clean_answer"].values)
dev_a_ds = dev_a_ds.batch(batch_size)
dev_ds= dev_cont_ds.concatenate(dev_q_ds).concatenate(dev_a_ds)
dev_ds = train_ds.batch(batch_size)
hist = model.fit(
[train_ds.as_numpy_iterator()], train_a_ds.as_numpy_iterator(),
validation_data=(
[cv_ds.as_numpy_iterator()], cv_a_ds.as_numpy_iterator()),
verbose=1,epochs=epochs, callbacks=[tensorboard_callback])
同样的错误:
ValueError: Failed to find data adapter that can handle input: (<class 'list'> containing values of types {"<class 'tensorflow.python.data.ops.dataset_ops._NumpyIterator'>"}), <class 'tensorflow.python.data.ops.dataset_ops._NumpyIterator'>
然后我试了:
def gen():
for element in zip(train_ds,train_a_ds):
yield element
ds = tf.data.Dataset.from_generator(gen,output_types=tf.dtypes.float32)
def v_gen():
for element in zip(cv_ds,cv_a_ds):
yield element
v_ds = tf.data.Dataset.from_generator(v_gen,output_types=tf.dtypes.float32)
hist = model.fit_generator(
ds,validation_data=(v_ds),
verbose=1,epochs=epochs, callbacks=[tensorboard_callback])
ValueError: Target data is missing. Your model was compiled with loss=<keras.losses.BinaryCrossentropy object at 0x00000139D15BA610>, and therefore expects target data to be provided in `fit()`.
如果有人请尝试解释正确的预期数据集格式以使用这些层构建此 seq2seq 模型,将不胜感激,
提前谢谢你