日期格式的LSTM错误

Question

这是我深度学习的第一次尝试，这段代码的目的是预测FOREX市场的方向。

这是代码：

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']

data = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)

data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)
del data['Date']
del data['Time']

sequence_length = 21
n_features = len(data.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512

data = data.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
    data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)

val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]

print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))

train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape

train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))

preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)

train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))

X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))

model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))

model.compile(loss="mse", optimizer="adam")

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    verbose=2)

preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
    pred = preds_val[i][0]
    diff.append(y_val[i] - pred)

real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[104])
print(preprocessor.data_max_[104])

preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min

plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()

这是错误：

使用TensorFlow后端。 2017-12-03 13：26：44.494199：W C：\ tf_jenkins \ home \ workspace \ rel-win \ M \ windows \ PY \ 36 \ tensorflow \ core \ platform \ cpu_feature_guard.cc：45] TensorFlow库未编译为使用AVX指令，但这些都可以在您的机器上使用，并可以加速CPU 计算。 2017-12-03 13：26：44.494660：W C：\ tf_jenkins \ home \ workspace \ rel-win \ M \ windows \ PY \ 36 \ tensorflow \ core \ platform \ cpu_feature_guard.cc：45] TensorFlow库未编译为使用AVX2指令，但这些都可以在您的机器上使用，并可以加速CPU 计算。培训数据：（1824,21,6）验证数据：（203,21， 6）回溯（最近一次调用最后一次）：文件“E：/ Tutorial / Deep Learning.py“，第42行，在预处理器中= MinMaxScaler（）。fit（train）文件”C：\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ data.py“，第308行，合适 return self.partial_fit（X，y）文件“C：\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ preprocessing \ data.py”，第334行，在partial_fit中 estimator = self，dtype = FLOAT_DTYPES）文件“C：\ Users \ sydgo \ Anaconda3 \ lib \ site-packages \ sklearn \ utils \ validation.py”，第433行，在check_array数组= np.array（数组，dtype = dtype，order = order，copy = copy）TypeError：float（）参数必须是字符串或数字，而不是 “时间戳”

Answer 1

预期的dtype之间发生了碰撞和实际上交付的数据类型：

TypeError: float() argument must be a string or a number, not 'Timestamp'

最可能修改的嫌疑人是：

data['DateTime'] = pd.to_datetime(data.Date + ' ' + data.Time)

你必须回到这个概念，应该将什么（如果有的话）作为预期的FOREX定量建模特征的集合馈入LSTM模型。

Answer 2

这是修复错误后的代码

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential

column_names = ['Date', 'Time', 'Open', 'High', 'Low','Close', 'Volume']

df = pd.read_csv(r"E:\Tutorial\EURUSD60.csv", header=None, names=column_names)

df['DateTime'] = pd.to_datetime(df.Date + ' ' + df.Time)
del df['Date']
del df['Time']

df.rename(columns={'DateTime': 'timestamp', 'Open': 'open',
                   'High': 'high', 'Low': 'low', 'Close': 'close', 'Volume': 'volume'}, inplace=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], infer_datetime_format=True)
df.set_index('timestamp', inplace=True)
df = df.astype(float)
df['hour'] = df.index.hour
df['day'] = df.index.weekday
df['week'] = df.index.week


sequence_length = 21
n_features = len(df.columns)
val_ratio = 0.1
n_epochs = 300
batch_size = 512

data = df.as_matrix()
data_processed = []
for index in range(len(data) - sequence_length):
    data_processed.append(data[index: index + sequence_length])
data_processed = np.array(data_processed)

val_split = round((1 - val_ratio) * data_processed.shape[0])
train = data_processed[: int(val_split), :]
val = data_processed[int(val_split):, :]

print('Training data: {}'.format(train.shape))
print('Validation data: {}'.format(val.shape))

train_samples, train_nx, train_ny = train.shape
val_samples, val_nx, val_ny = val.shape

train = train.reshape((train_samples, train_nx * train_ny))
val = val.reshape((val_samples, val_nx * val_ny))

preprocessor = MinMaxScaler().fit(train)
train = preprocessor.transform(train)
val = preprocessor.transform(val)

train = train.reshape((train_samples, train_nx, train_ny))
val = val.reshape((val_samples, val_nx, val_ny))

X_train = train[:, : -1]
y_train = train[:, -1][:, -1]
X_val = val[:, : -1]
y_val = val[:, -1][:, -1]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], n_features))
X_val = np.reshape(X_val, (X_val.shape[0], X_val.shape[1], n_features))

model = Sequential()
model.add(LSTM(input_shape=(X_train.shape[1:]), units=128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.25))
model.add(Dense(units=1))
model.add(Activation("linear"))

model.compile(loss="mse", optimizer="adam")

history = model.fit(
    X_train,
    y_train,
    batch_size=batch_size,
    epochs=n_epochs,
    verbose=2)

preds_val = model.predict(X_val)
diff = []
for i in range(len(y_val)):
    pred = preds_val[i][0]
    diff.append(y_val[i] - pred)

real_min = preprocessor.data_min_[104]
real_max = preprocessor.data_max_[104]
print(preprocessor.data_min_[:120])
print(preprocessor.data_max_[:120])

preds_real = preds_val * (real_max - real_min) + real_min
y_val_real = y_val * (real_max - real_min) + real_min

plt.plot(preds_real, label='Predictions')
plt.plot(y_val_real, label='Actual values')
plt.xlabel('test')
plt.legend(loc=0)
plt.show()

日期格式的LSTM错误

问题描述投票：2回答：2

2个回答

最新问题

日期格式的LSTM错误

问题描述 投票：2回答：2

2个回答

最新问题

问题描述投票：2回答：2