在过去的几周中,我试图使输入管道在tensorflow(tf 2.0.1)下与tf.records一起运行。从CSV句子加载并生成记录:
import tensorflow as tf
import pathlib
import sys
import csv
PATH_PARENT = str(pathlib.Path(__file__).parent.absolute())
if PATH_PARENT.endswith('models'):
PATH_PARENT = PATH_PARENT[:-len('models')]
PATH_PARENT = PATH_PARENT.replace("\\", '/')
sys.path.append(PATH_PARENT)
def create_tf_example(features, label):
tf_example = tf.train.Example(features=tf.train.Features(feature={
'Sentence': tf.train.Feature(bytes_list=tf.train.BytesList(value=[features.encode('utf-8')])),
'Class': tf.train.Feature(bytes_list=tf.train.BytesList(value=[label.encode('utf-8')])),
}))
return tf_example
intent_load_list = ["training_data_intent_Music_controler_0.csv"] # Example: musik,<slot_0>,play,<slot_music_controle>
for load_intent in intent_load_list:
start = 14
end = load_intent.rfind("_")
label = load_intent[start : end]
print("loading intent " + label)
csv_data = []
with open(PATH_PARENT + "models/" + load_intent, 'r') as csv_file:
csv_reader = csv.reader(csv_file)
for row in csv_reader:
clean_output = ''
for word in row:
if '<' not in word:
clean_output = clean_output + word + ' '
csv_data.append(clean_output)
with tf.io.TFRecordWriter(PATH_PARENT + "models/dataset.tfrecords") as writer:
for row in csv_data:
features = row
example = create_tf_example(features, label)
writer.write(example.SerializeToString())
writer.close()
到目前为止有效。此记录应稍后加载,使用tf-hub模型进行编辑,然后使用该记录进行训练。但是,我要么得到了IndexEror,要么就彻底失败了:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import tensorflow_hub as hub
import sys
import pathlib
PATH_PARENT = str(pathlib.Path(__file__).parent.absolute())
if PATH_PARENT.endswith('models'):
PATH_PARENT = PATH_PARENT[:-len('models')]
PATH_PARENT = PATH_PARENT.replace("\\", '/')
sys.path.append(PATH_PARENT)
embed = hub.load("https://tfhub.dev/google/nnlm-de-dim50-with-normalization/2")
dataset = tf.data.TFRecordDataset(filenames = [PATH_PARENT + "models/dataset.tfrecords"])
def prepare_for_training(ds, shuffle_buffer_size=1024, batch_size=2):
ds = ds.map(lambda x: embed([x]))
ds = ds.shuffle(buffer_size=shuffle_buffer_size).batch(batch_size)
print(ds)
return ds
def convert_data(data):
data_np = embed(data)
data_list = data_np.tolist()
return data_list
batch_size = 64
n_intents = 2
train_ds = prepare_for_training(dataset, batch_size=batch_size)
build_model = keras.Sequential()
build_model.add(keras.layers.Input(shape=(None, 50)))
build_model.add(keras.layers.Dense(50, activation='relu'))
build_model.add(keras.layers.Dropout(0.2))
build_model.add(keras.layers.Dense(20, activation='relu'))
build_model.add(keras.layers.Dropout(0.2))
build_model.add(keras.layers.Dense(n_intents, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(train_ds, epochs=5) # IndexError - list index out of range
print("done")
有人可能尝试过类似的想法吗?不幸的是,文档并没有太大帮助。在此先感谢
当您未在数据中传递2.0.1
时,您在tensorflow版本labels
中收到此错误。在下面的示例中,我正在使用Input
写入虚拟TFRecordWriter
值,然后使用TFRecordDataset
读取它并将其传递给模型。
如果在tensorflow版本2.1.0
中运行相同的代码,则错误语句将更改为IndexError: tuple index out of range
。
此外,如果您在tensorflow版本2.2.0
中运行相同的代码,则错误语句将更改为ValueError: No gradients provided for any variable: ['dense/kernel:0', 'dense/bias:0', 'dense_1/kernel:0', 'dense_1/bias:0', 'dense_2/kernel:0', 'dense_2/bias:0'].
重现错误的代码-
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,10)]
print("Writing Input - ", Input)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
return Input
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
输出-
2.0.1
Writing Input - [20191221.123, 20191222.123, 20191223.123, 20191224.123, 20191225.123, 20191226.123, 20191227.123, 20191228.123, 20191229.123, 20191230.123]
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense (Dense) (None, 50) 100
_________________________________________________________________
dropout (Dropout) (None, 50) 0
_________________________________________________________________
dense_1 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_1 (Dropout) (None, 20) 0
_________________________________________________________________
dense_2 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/Unknown - 0s 60ms/step
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-1-d1c5c463cdc2> in <module>()
47 build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
48
---> 49 build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
50 print("done")
20 frames
/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/keras/engine/training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
164
165 if hasattr(loss_fn, 'reduction'):
--> 166 per_sample_losses = loss_fn.call(targets[i], outs[i])
167 weighted_losses = losses_utils.compute_weighted_loss(
168 per_sample_losses,
IndexError: list index out of range
Solution-我创建了虚拟labels
变量,并使用labels
写入了虚拟TFRecordWriter
,然后使用TFRecordDataset
读取了它并将其传递给模型。现在我们将Input
和labels
都传递给模型,并且可以正常工作。
固定代码-
%tensorflow_version 2.x
import tensorflow as tf
import numpy as np
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Input = [20191221.123 + x for x in range(0,9)]
labels = [1, 1, 0, 0, 0, 1, 1, 2, 2]
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Input':tf.train.Feature(float_list=tf.train.FloatList(value=Input)),
'labels':tf.train.Feature(float_list=tf.train.FloatList(value=labels))
}
))
writer = tf.io.TFRecordWriter("Data.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Input': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True),
'labels': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Input = features['Input']
labels = features['labels']
return Input, labels
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Data.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
write_date_tfrecord()
generator = dataset_generator()
build_model = tf.keras.Sequential()
build_model.add(tf.keras.layers.Input(shape=(1,)))
build_model.add(tf.keras.layers.Dense(50, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(20, activation='relu'))
build_model.add(tf.keras.layers.Dropout(0.2))
build_model.add(tf.keras.layers.Dense(3, activation='softmax'))
build_model.summary()
build_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
build_model.fit(dataset_generator(), epochs=5) # IndexError - list index out of range
print("done")
输出-
2.1.0
Model: "sequential_4"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_12 (Dense) (None, 50) 100
_________________________________________________________________
dropout_8 (Dropout) (None, 50) 0
_________________________________________________________________
dense_13 (Dense) (None, 20) 1020
_________________________________________________________________
dropout_9 (Dropout) (None, 20) 0
_________________________________________________________________
dense_14 (Dense) (None, 3) 63
=================================================================
Total params: 1,183
Trainable params: 1,183
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
1/1 [==============================] - 0s 362ms/step - loss: 8705754.0000 - accuracy: 0.0000e+00
Epoch 2/5
1/1 [==============================] - 0s 14ms/step - loss: 4458477.5000 - accuracy: 0.2222
Epoch 3/5
1/1 [==============================] - 0s 16ms/step - loss: 5933292.5000 - accuracy: 0.2222
Epoch 4/5
1/1 [==============================] - 0s 16ms/step - loss: 4305070.0000 - accuracy: 0.1111
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 5578528.5000 - accuracy: 0.1111
done
也建议您阅读此link,该解释说明“如何馈送TFRecord来训练Keras模型”。
希望这能回答您的问题。祝您学习愉快。