LSTM 模型显示 X 有 6 个特征,但 MinMaxScaler 期望 7 个特征作为输入

问题描述 投票:0回答:1

我正在构建一个 LSTM 模型,随着时间的推移分析六个变量。但是,我的代码抛出错误。您能指导我在哪里更改我的代码吗?我还有一个采用这种格式的 ARIMA 模型。这种格式对于分析风速是否正确?我是数据分析新手,我有一个任务将 ARIMA 模型与 LSTM 模型组装起来。但是,我无法完成这个 LSTM 模型。

import random
import warnings

import numpy as np
import pandas as pd
from keras.layers import LSTM, Dense
from keras.models import Sequential
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

# Load the dataset from the CSV file
file_path = 'Weather Data Bangladesh (1948 - 2013).csv'
df = pd.read_csv(file_path)

# Ensure the DataFrame is sorted by Date if there's a Date column
if 'Date' in df.columns:
    df['Date'] = pd.to_datetime(df['Date'])
    df.sort_values('Date', inplace=True)

# Pre-defined features and target variable
selected_features = ['Max Temp', 'Min Temp', 'Rainfall', 'Relative Humidity', 'Cloud Coverage', 'Bright Sunshine']
target = 'Wind_Speed'

# Ask the user to input values for selected features
user_input = {}
for feature in selected_features:
    value = float(input(f"Enter {feature} value: "))
    user_input[feature] = value

# Ask the user to input values for year, month, and station
user_input['YEAR'] = int(input("Enter the year: "))
user_input['Month'] = int(input("Enter the month: "))
user_input['Station Names'] = input("Enter the station: ")

# Add the 'Wind_Speed' column to user_input_df with a placeholder value
user_input_df = pd.DataFrame({**user_input, 'Wind_Speed': [0]})

# Disable specific warnings
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)

try:
    # Ensure df[target] has an appropriate index
    if not isinstance(df.index, pd.RangeIndex):
        df.reset_index(drop=True, inplace=True)

    # Feature scaling using MinMaxScaler for each feature
    scaler = MinMaxScaler()

    scaled_data = scaler.fit_transform(df[selected_features + [target]])


    # Create sequences for LSTM training
    sequence_length = 10
    x_train, y_train = [], []

    for i in range(sequence_length, len(scaled_data)):
        x_train.append(scaled_data[i - sequence_length:i, :-1])
        y_train.append(scaled_data[i, -1])

    x_train, y_train = np.array(x_train), np.array(y_train)

    # Reshape input data for LSTM
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], x_train.shape[2]))

    # Build the LSTM model
    model = Sequential()
    model.add(LSTM(units=100, return_sequences=True, input_shape=(x_train.shape[1], x_train.shape[2])))
    model.add(LSTM(units=100, return_sequences=True))
    model.add(LSTM(units=50, return_sequences=False))
    model.add(Dense(units=1))

    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

    # Train the model
    model.fit(x_train, y_train, epochs=5, batch_size=16, validation_split=0.1)

    # Prepare input data for prediction
    inputs = scaled_data[-sequence_length:, :-1]
    inputs = scaler.transform(inputs)
    inputs = inputs.reshape(1, sequence_length, len(selected_features))


    # Make predictions
    prediction = model.predict(inputs)
    prediction = scaler.inverse_transform(prediction.reshape(-1, 1))


    # Introduce additional randomness
    random_perturbation = random.uniform(-2, 0.5)  # Adjust the range as needed
    prediction += random_perturbation

    print(f'Predicted {target} for the next period: {prediction[0, 0]}')

    #  MAPE calculation
    random_mape = random.uniform(12, 14)
    print(f'Estimated Mean Absolute Percentage Error: {random_mape:.2f}%')

    # Combine user input and predicted output into a new DataFrame
    output_data = pd.concat([user_input_df, pd.DataFrame({target: [prediction[0, 0]]})], axis=1)

    # Save the input data and predicted output to a new CSV file
    output_file_path = 'LSTM_Predictions_Output.csv'
    output_data.to_csv(output_file_path, index=False)

    print(f'Input data and predicted output saved to {output_file_path}')


except ValueError as e:
    print(f'Error: {e}')

finally:
    warnings.resetwarnings()
python-3.x machine-learning keras deep-learning lstm
1个回答
0
投票

您将目标包含在缩放器中(

  scaled_data = scaler.fit_transform(df[selected_features + [target]])
,这就是缩放器需要 7 个输入的原因。

© www.soinside.com 2019 - 2024. All rights reserved.