TensorFlow RNN 执行错误:rnn_mode、rnn_input_mode 和 rnn_direction_mode 配置不正确

问题描述 投票:0回答:1
# Preparing the data
import os, pathlib, shutil, random
from tensorflow import keras
batch_size = 32
base_dir = pathlib.Path("aclImdb")
val_dir = base_dir / "val"
train_dir = base_dir / "train"
for category in ("neg", "pos"):
    os.makedirs(val_dir / category)


    files = os.listdir(train_dir / category)
    random.Random(1337).shuffle(files)
    num_val_samples = int(0.2 * len(files))
    val_files = files[-num_val_samples:]
    for fname in val_files:
        shutil.move(train_dir / category / fname,
                    val_dir / category / fname)

train_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/train", batch_size=batch_size
)
val_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/val", batch_size=batch_size
)
test_ds = keras.utils.text_dataset_from_directory(
    "aclImdb/test", batch_size=batch_size
)
text_only_train_ds = train_ds.map(lambda x, y: x)
from tensorflow.keras import layers
# Preparing integer sequence datasets
max_length = 600
max_tokens = 20000
text_vectorization = layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_length,
)
text_vectorization.adapt(text_only_train_ds)

int_train_ds = train_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_val_ds = val_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
int_test_ds = test_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4)
# A sequence model built on one-hot encoded vector sequences
import tensorflow as tf
inputs = keras.Input(shape=(None,), dtype="int64")
embedded = tf.one_hot(inputs, depth=max_tokens)
x = layers.Bidirectional(layers.LSTM(32))(embedded)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])
model.summary()
callbacks = [
    keras.callbacks.ModelCheckpoint("one_hot_bidir_lstm.keras",
                                    save_best_only=True)
]
model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks)
model = keras.models.load_model("one_hot_bidir_lstm.keras")
print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

上面是我的代码,用于拟合双向 LSTM 模型,用于解决 IMDB 电影评论情感分类。 当我尝试构建双向 LSTM 模型时,出现以下错误:

--------------------------------------------
InternalError                             Traceback (most recent call last)
Cell In[5], line 18
13 model.summary()
14 callbacks = [
15     keras.callbacks.ModelCheckpoint("one_hot_bidir_lstm.keras",
16                                     save_best_only=True)
17]
--->  18 model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks)
19 model = keras.models.load_model("one_hot_bidir_lstm.keras")
20 print(f"Test acc: {model.evaluate(int_test_ds)[1]:.3f}")

File F:\Python-virtual-envir\keras-deeplearning-envir\lib\site-packages\keras\utils\traceback_utils.py:70,  in filter_traceback.< locals> .error_handler(*args, **kwargs)
67     filtered_tb = _process_traceback_frames(e.__traceback__)
68     # To get the full stack trace, call:
69     # `tf.debugging.disable_traceback_filtering()`
--->  70     raise e.with_traceback(filtered_tb) from None
71 finally:
72     del filtered_tb

File F:\Python-virtual-envir\keras-deeplearning-envir\lib\site-packages\tensorflow\python\eager\execute.py:54,  in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
52 try:
53   ctx.ensure_initialized()
--->  54   tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
55                                       inputs, attrs, num_outputs)
56 except core._NotOkStatusException as e:
57   if name is not None:

InternalError: Graph execution error:

Failed to call ThenRnnForward with model config: [rnn_mode, rnn_input_mode, rnn_direction_mode]:  2, 0, 0 , [num_layers, input_size, num_units, dir_count, max_seq_length, batch_size, cell_num_units]:  [1, 20000, 32, 1, 600, 32, 32]
[[{{node CudnnRNN}}]]
[[model/bidirectional/backward_lstm/PartitionedCall]] [Op:__inference_train_function_6753]
------------------------------------------

根据分析: 在我的模型中,我使用双向包装器来包装 LSTM 层,这意味着我预期的 rnn_direction_mode 应为 1(双向)。然而,错误消息显示 rnn_direction_mode 为 0,这表明模型在执行时期望单向 RNN。

首先,上述代码源自《Deep Learning with Python Second Edition》——FrancoisChollet 中的“chapter11_part02_sequence-models.ipynb”代码内容 我尝试运行作者的源代码,发现同样的错误; 另外,我确保将数据划分为训练集和验证集的代码是正确的。我已经成功地用一元语法训练了 keras 模型。 我的期望是模型能够成功训练

python keras lstm bidirectional bilstm
1个回答
0
投票

该问题可能与在输入序列上使用 tf.one_hot 有关。双向 LSTM 层期望输入形状的形式为(batch_size、sequence_length、input_features),但 one-hot 编码可能会导致预期输入形状不匹配。

您可以按照以下步骤操作。 1)去掉one-hot编码层,直接使用整数序列作为双向LSTM层的输入

2)确保输入序列具有正确的形状

(batch_size, sequence_length)
,无需使用one-hot编码。

你可以尝试这些代码。

import os, pathlib, shutil, random
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

# Preparing the data
batch_size = 32
base_dir = pathlib.Path("aclImdb")
val_dir = base_dir / "val"
train_dir = base_dir / "train"
for category in ("neg", "pos"):
    os.makedirs(val_dir / category)

    files = os.listdir(train_dir / category)
    random.Random(1337).shuffle(files)
    num_val_samples = int(0.2 * len(files))
    val_files = files[-num_val_samples:]
    for fname in val_files:
        shutil.move(train_dir / category / fname, val_dir / category / fname)

train_ds = keras.utils.text_dataset_from_directory("aclImdb/train", batch_size=batch_size)
val_ds = keras.utils.text_dataset_from_directory("aclImdb/val", batch_size=batch_size)
test_ds = keras.utils.text_dataset_from_directory("aclImdb/test", batch_size=batch_size)
text_only_train_ds = train_ds.map(lambda x, y: x)

# Preparing integer sequence datasets
max_length = 600
max_tokens = 20000
text_vectorization = layers.TextVectorization(
    max_tokens=max_tokens,
    output_mode="int",
    output_sequence_length=max_length,
)
text_vectorization.adapt(text_only_train_ds)

int_train_ds = train_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4
)
int_val_ds = val_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4
)
int_test_ds = test_ds.map(
    lambda x, y: (text_vectorization(x), y),
    num_parallel_calls=4
)

# A sequence model built on integer vector sequences
inputs = keras.Input(shape=(max_length,), dtype="int64")
embedded = layers.Embedding(input_dim=max_tokens, output_dim=64)(inputs)  # Embedding layer instead of one-hot
x = layers.Bidirectional(layers.LSTM(32))(embedded)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)

model = keras.Model(inputs, outputs)
model.compile(optimizer="rmsprop",
              loss="binary_crossentropy",
              metrics=["accuracy"])

model.summary()

# Continue with training as before
callbacks = [
    keras.callbacks.ModelCheckpoint("bidir_lstm_model.keras", save_best_only=True)
]

model.fit(int_train_ds, validation_data=int_val_ds, epochs=10, callbacks=callbacks)
© www.soinside.com 2019 - 2024. All rights reserved.