我正在尝试学习 LSTM 网络并使用人工数据集进行练习。我收到一个错误:
ValueError: Can not squeeze dim[2], expected a dimension of 1, got 4 for '{{node Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast)' with input shapes: [2,12,4].
我的目标是创建一个包含不同时间长度的输入向量。例如,我可能有特定公司 4 个月的数据,但对于另一家公司,我可能有 12 个月的数据。然后,我使用填充和掩码来使用 LSTM 训练我的网络。让我分享一个小例子来展示我在做什么。
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Masking
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
#I generate random data as input and output
X = []
y = []
for i in range(10):
length = np.random.randint(3, 13)
X.append(np.random.rand(length, 4))
y.append(np.random.randint(0, 3, size=length))
#Since I have varying lengths in my input, I use padding and then masking. I use -100 as a padding value for now
X_padded = pad_sequences(X, dtype='float32', padding='post', value=-100)
#I use one hot encoding for the output vector
y_categorical = [to_categorical(y_i, num_classes=3) for y_i in y]
y_padded = pad_sequences(y_categorical, dtype='float32', padding='post', value=-100)
x_mask = (X_padded != -100).astype(np.int32)
y_mask = (y_padded != -100).astype(np.int32)
#Both x_mask and y_mask will be used during training to get rid of the padding value
#Then I use sckit learn to split my data into training and testing
X_train, X_test, y_train, y_test, X_mask_train, X_mask_test, y_mask_train, y_mask_test = train_test_split(X_padded, y_padded, x_mask, y_mask, test_size=0.2)
# I generate a very simple model.
model = Sequential()
model.add(Masking(mask_value=-100., input_shape=(None, 4)))
model.add(LSTM(units=256, return_sequences=True))
model.add(Dense(units=128, activation='relu'))
model.add(Dense(units=3, activation='softmax'))
# I compile my model
model.compile(loss='categorical_crossentropy', optimizer= tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])
# Lastly, I train using sample_weight
model.fit(X_train, y_train, epochs=10, batch_size=2, sample_weight=(X_mask_train, y_mask_train))
我怀疑我使用的方式
sample_weight
可能不正确