Tensorflow js 中的时间序列预测

问题描述 投票:0回答:2

我是这个领域的新手。

我做了这个代码,但是效果不佳,因为我只看到了中间价格,而没有看到真正的预测。

我用之前的开盘价、最高价、最低价和收盘价创建了一个 3d 张量,有 5 个时间步长,我需要预测下一个收盘价。

输入示例: 开高低收 5 个时间步长 75 个样本

[/*samples size (75)*/
[/*timestep1*/
/* open, high, low, close*/
[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
],
[/*timestep2*/
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
[1904,1906,1902,**1903**]
],

输出只是从 6 个时间步到下一个步骤的接近值

例子:

/*input*/
[/*timestep1*/
/* open, high, low, close*/

[1905,1906,1903,1904]
[1904,1905,1904,1906]
[1906,1907,1904,1907]
[1907,1908,1902,1905]
[1905,1906,1904,1904]
]

/*output*/
1903 (timestep2 last close) , ...

怎么了?

    /* global tf, tfvis */

async function getData() {

// Import from CSV
    const dataSet = tf.data.csv('http://localhost:8888/ts2/eurusd2.csv');

// Extract x and y values to plot
    const pointsDataSet = dataSet.map(record => ({
            /*date: record["\<DTYYYYMMDD>"]+record["\<TIME>"],*/
            open: record["\<OPEN\>"] * 10000,
            high: record["\<HIGH\>"] * 10000,
            low: record["\<LOW\>"] * 10000,
            close: record["\<CLOSE\>"] * 10000

        }));

    const points = await pointsDataSet.toArray();

    return points;
}



function preparaDatiInput(data, time_steps) {
    //es 5 time steps
    //il 6 è la previsione
    if (data.length > time_steps) {

        let arr = new Array();

        for (let i = 0; i < data.length - time_steps; i++) {

            arr.push(data.slice(i, i + 5).map(d => {
                return [d.open, d.high, d.low, d.close];
            }));

        }

        return arr;
    } else
    {
        return false;
    }

}

function preparaDatiOutput(data, time_steps) {

    /* l'output è sempre 1*/
    if (data.length > time_steps) {

        let arr = new Array();

        for (let i = time_steps; i < data.length; i++) {

            arr.push(data[i].close);

        }

        return arr;

    } else
    {
        return false;
    }
}

async function train_data(data) {

    const size = 75;
    const time_steps = 5;

    const input = preparaDatiInput(data.slice(0, size), time_steps);
    const output = preparaDatiOutput(data.slice(0, size), time_steps);

    const testing = preparaDatiInput(data.slice(size), time_steps);
    const risultatiTesting = preparaDatiOutput(data.slice(size), time_steps);

    /* primo campo per tensori 3d */
    const trainingData = tf.tensor3d(input, [input.length, input[0].length, input[0][0].length]);
    const outputData = tf.tensor1d(output);


    const testingData = tf.tensor3d(testing, [testing.length, testing[0].length, testing[0][0].length]);


    const trainingDataMax = trainingData.max();
    const trainingDataMin = trainingData.min();

    const testingDataMax = testingData.min();
    const testingDataMin = testingData.max();

    const  outputDataMax = outputData.min();
    const outputDataMin = outputData.max();

    const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
    const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));
    const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));

    const model = tf.sequential();
    /* time_steps, features */
    model.add(tf.layers.lstm({units: 20, inputShape: [5, 4], returnSequences: false}));
    /* 1 output */
    model.add(tf.layers.dense({units: 1, activation: 'sigmoid'}));

    model.summary();

    const sgdoptimizer = tf.train.adam(0.03);

    model.compile({
        optimizer: sgdoptimizer,
        loss: tf.losses.meanSquaredError,
        metrics: ["mse"]
    });


    console.log('......Loss History.......');
    for (let i = 0; i < 10; i++) {
        let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 10});
        console.log(`Iteration ${i}: ${res.history.loss[0]}`);
    }

    console.log('....Model Prediction .....');


    const preds = model.predict(normalizedTestingData);

    const unNormPreds = preds
            .mul(outputDataMax.sub(outputDataMin))
            .add(outputDataMin).dataSync();

    console.log(unNormPreds);

    const risultati_veri = risultatiTesting.map((d, i) => {
        return {
            x: i, y: d
        };
    });
    const previsioni = Array.from(unNormPreds).map((d, i) => {
        return {
            x: i, y: d
        };
    });

    tfvis.render.linechart(
            {name: 'Model Predictions vs Original Data'},
            {values: [risultati_veri, previsioni], series: ['original', 'predicted']},
            {
                xLabel: 'contatore',
                yLabel: 'prezzo',
                height: 300,
                zoomToFit: true
            }
    );

}

async function main() {
    const data = await getData();
    await train_data(data);

}

main();

eurusd2.csv 示例:

<TICKER>,<DTYYYYMMDD>,<TIME>,<OPEN>,<HIGH>,<LOW>,<CLOSE>,<VOL>

EURUSD,20010102,230100,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,230200,0.9506,0.9506,0.9505,0.9505,4

EURUSD,20010102,230300,0.9505,0.9507,0.9505,0.9506,4

EURUSD,20010102,230400,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010102,230500,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010102,230600,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010102,230700,0.9505,0.9507,0.9505,0.9507,4

EURUSD,20010102,230800,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,230900,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231000,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231100,0.9507,0.9507,0.9506,0.9507,4

EURUSD,20010102,231200,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231300,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231400,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231500,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,231600,0.9507,0.9507,0.9506,0.9506,4

EURUSD,20010102,232000,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,232100,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,232200,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,232300,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,232400,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,233000,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,233100,0.9508,0.9508,0.9508,0.9508,4

EURUSD,20010102,233500,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,233600,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,233700,0.9507,0.9508,0.9507,0.9508,4

EURUSD,20010102,233800,0.9509,0.9509,0.9509,0.9509,4

EURUSD,20010102,233900,0.9509,0.9509,0.9509,0.9509,4

EURUSD,20010102,234000,0.9509,0.9509,0.9509,0.9509,4

EURUSD,20010102,234100,0.9508,0.9508,0.9508,0.9508,4

EURUSD,20010102,234400,0.9508,0.9508,0.9508,0.9508,4

EURUSD,20010102,234500,0.9508,0.9508,0.9508,0.9508,4

EURUSD,20010102,234700,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,234900,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235000,0.9507,0.9508,0.9506,0.9506,4

EURUSD,20010102,235100,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010102,235200,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010102,235300,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235400,0.9507,0.9507,0.9506,0.9506,4

EURUSD,20010102,235500,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235600,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235700,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235800,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010102,235900,0.9507,0.9507,0.9506,0.9506,4

EURUSD,20010103,000000,0.9506,0.9507,0.9506,0.9507,4

EURUSD,20010103,000100,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010103,000200,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010103,000300,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010103,000400,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010103,000500,0.9507,0.9507,0.9506,0.9507,4

EURUSD,20010103,000600,0.9507,0.9507,0.9506,0.9506,4

EURUSD,20010103,000700,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,000800,0.9507,0.9507,0.9506,0.9506,4

EURUSD,20010103,000900,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001100,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001200,0.9506,0.9506,0.9505,0.9506,4

EURUSD,20010103,001300,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001400,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001500,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001700,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001800,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,001900,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,002000,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,002100,0.9506,0.9506,0.9506,0.9506,4

EURUSD,20010103,002200,0.9506,0.9507,0.9506,0.9507,4

EURUSD,20010103,002300,0.9507,0.9507,0.9507,0.9507,4

EURUSD,20010103,002400,0.9508,0.9508,0.9507,0.9507,4

EURUSD,20010103,002500,0.9508,0.9510,0.9508,0.9510,4

EURUSD,20010103,002600,0.9510,0.9510,0.9509,0.9509,4

EURUSD,20010103,002700,0.9509,0.9509,0.9509,0.9509,4

EURUSD,20010103,002800,0.9509,0.9509,0.9509,0.9509,4

EURUSD,20010103,002900,0.9508,0.9508,0.9507,0.9507,4

EURUSD,20010103,003000,0.9508,0.9508,0.9507,0.9507,4

EURUSD,20010103,003100,0.9507,0.9507,0.9507,0.9507,4
machine-learning tensor tensorflow.js
2个回答
1
投票

我正在改进我的代码:


/* global tf, tfvis */

async function getData() {

//QOUA4VUTZJXS3M01

    return new Promise((resolve, reject) => {

        //const url='https://www.alphavantage.co/query?function=FX_INTRADAY&from_symbol=EUR&to_symbol=USD&interval=1min&outputsize=full&apikey=QOUA4VUTZJXS3M01';

        const url = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=MSFT&outputsize=full&apikey=QOUA4VUTZJXS3M01';

        $.getJSON(url, function (data) {

            let rawData = Object.values(data["Time Series (Daily)"]).map(d => ({open: parseFloat(d["1. open"]), high: parseFloat(d["2. high"]), low: parseFloat(d["3. low"]), close: parseFloat(d["4. close"])}));
            resolve(rawData.reverse());

        });

    });
}





function prepareInputDatas(data, time_steps) {

    /* if the date is major then time steps */
    if (data.length > time_steps) {

        /* indicator examples */

        /*
         let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
         let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});


         for (let i = 0; i < data.length; i++) {
         data[i].sma = 0;
         }

         let d = 0;
         for (let i = time_steps - 1; i < data.length; i++) {
         data[i].sma = sma[d];
         d++;
         }


         for (let i = 1; i < data.length; i++) {
         if (data[i].close > data[i - 1].close) {
         data[i].ind = 1;
         } else if (data[i].close < data[i - 1].close) {
         data[i].ind = 0;
         } else {
         data[i].ind = 0.5;
         }
         }
         */


        let arr = new Array();

        for (let i = 0; i < data.length - time_steps; i++) {

            /*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/

            /* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
            arr.push(data.slice(i, i + time_steps).map(d => {

                return [d.open, d.high, d.low, d.close /*,d.sma*/];


            }));

        }

        return arr;
    } else
    {
        return false;
    }

}

function prepareOutputDatas(data, time_steps) {

    if (data.length > time_steps) {

        let arr = new Array();

        /* create output training set (or testing values) (y values) */
        for (let i = time_steps; i < data.length; i++) {

            arr.push(data[i].close);


        }

        return arr;

    } else
    {
        return false;
    }
}

function prepareInputTestingDatas(data, time_steps) {

    /* if the date is major then time steps */
    if (data.length > time_steps) {

        /* indicator examples */

        /*
         let rsi = RSI.calculate({period: time_steps, values: data.map(d => d.close)});
         let sma = SMA.calculate({period: time_steps, values: data.map(d => d.close)});


         for (let i = 0; i < data.length; i++) {
         data[i].sma = 0;
         }

         let d = 0;
         for (let i = time_steps - 1; i < data.length; i++) {
         data[i].sma = sma[d];
         d++;
         }


         for (let i = 1; i < data.length; i++) {
         if (data[i].close > data[i - 1].close) {
         data[i].ind = 1;
         } else if (data[i].close < data[i - 1].close) {
         data[i].ind = 0;
         } else {
         data[i].ind = 0.5;
         }
         }
         */


        let arr = new Array();

        for (let i = 0; i <= data.length - time_steps; i++) {

            /*let sma = SMA.calculate({period: time_steps, values: data.slice(i, i + time_steps).map(d => d.close)})[0];*/

            /* create the training or testing array, with x values (features) and batch size (batch size is the samples' first dimension of array) */
            arr.push(data.slice(i, i + time_steps).map(d => {

                return [d.open, d.high, d.low, d.close /*,d.sma*/];


            }));

        }

        return arr;
    } else
    {
        return false;
    }

}

function prepareOutputTestingDatas(data, time_steps) {

    if (data.length > time_steps) {

        let arr = new Array();

        /* create output training set (or testing values) (y values) */
        for (let i = time_steps; i <= data.length; i++) {
            if (data[i]) {
                arr.push(data[i].close);
            } 


        }

        return arr;

    } else
    {
        return false;
    }
}



async function train_data(data) {

    /* sometimes Chrome crashes and you need to open a new window */

    const size = Math.floor(data.length / 100 * 98);
    const time_steps = 30;//30;

    const predict_size = data.length - size;

    const start = data.length - size - predict_size;

    const input = prepareInputDatas(data.slice(start, start + size), time_steps);
    const output = prepareOutputDatas(data.slice(start, start + size), time_steps);


    const testing = prepareInputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);
    const testingResults = prepareOutputTestingDatas(data.slice(start + size, start + size + predict_size), time_steps);

    /* Creating tensors (input 3d tensor, and output 1d tensor) */

    const input_size_2 = input[0].length;
    const input_size = input[0][0].length;

    const trainingData = tf.tensor3d(input, [input.length, input_size_2, input_size]);
    const outputData = tf.tensor1d(output);

    const testing_size_2 = testing[0].length;
    const testing_size = testing[0][0].length;

    const testingData = tf.tensor3d(testing, [testing.length, testing_size_2, testing_size]);
    const outputTestingData = tf.tensor1d(testingResults);


    /* normalizing datas */
    const trainingDataMax = trainingData.max();
    const trainingDataMin = trainingData.min();

    const testingDataMax = testingData.max();
    const testingDataMin = testingData.min();

    const outputDataMax = outputData.max();
    const outputDataMin = outputData.min();

    const outputTestingDataMax = outputTestingData.max();
    const outputTestingDataMin = outputTestingData.min();

    const normalizedTrainingData = trainingData.sub(trainingDataMin).div(trainingDataMax.sub(trainingDataMin));
    const normalizedTestingData = testingData.sub(testingDataMin).div(testingDataMax.sub(testingDataMin));

    const normalizedOutputData = outputData.sub(outputDataMin).div(outputDataMax.sub(outputDataMin));
    const normalizedTestingOutputData = outputTestingData.sub(outputTestingDataMin).div(outputTestingDataMax.sub(outputTestingDataMin));


    /* creating model */
    const model = tf.sequential();

    model.add(tf.layers.lstm({inputShape: [input_size_2, input_size], units: input_size_2, returnSequences: false}));

    /* eventual hidden layer (not needed because it is a LINEAR operation (regression) */
    //model.add(tf.layers.lstm({units: Math.floor(input_size_2/2), returnSequences: false}));

    model.add(tf.layers.dense({units: 1, activation: "sigmoid"}));

    model.summary();


    /* setting training */
    const learningRate = 0.01;

    /* selecting the best training optimizer */
    const optimizer = tf.train.rmsprop(learningRate, 0.95);


    /* compiling model with optimizer, loss and metrics */
    model.compile({

        optimizer: optimizer,
        loss: tf.losses.meanSquaredError,
        metrics: tf.metrics.meanAbsoluteError

    });


    /* training ... */
    console.log('Loss Log');

    for (let i = 0; i < 25; i++) {
        let res = await model.fit(normalizedTrainingData, normalizedOutputData, {epochs: 1});
        console.log(`Iteration ${i + 1}: ${res.history.loss[0] }`);

    }

    /* training prediction (validation) */

    const validation = model.predict(normalizedTrainingData);

    const unNormValidation = validation
            .mul(outputDataMax.sub(outputDataMin))
            .add(outputDataMin).dataSync();

    const trainingResults = output.map((d, i) => {
        if (d) {
            return {
                x: i, y: d
            };
        }
    });
    const trainingValidation = Array.from(unNormValidation).map((d, i) => {
        if (d) {
            return {
                x: i, y: d
            };
        }
    });

    /* creating training chart */

    tfvis.render.linechart(
            {name: 'Validation Results'},
            {values: [trainingResults, trainingValidation], series: ['original', 'predicted']},
            {
                xLabel: 'contatore',
                yLabel: 'prezzo',
                height: 300,
                zoomToFit: true
            }
    );

    /* predicting */

    console.log('Real prediction');

    const preds = model.predict(normalizedTestingData);

    const unNormPredictions = preds
            .mul(outputTestingDataMax.sub(outputTestingDataMin))
            .add(outputTestingDataMin).dataSync();

    const realResults = testingResults.map((d, i) => {
        if (d) {
            return {
                x: i, y: d.toFixed(4)
            };
        }
    });
    const predictions = Array.from(unNormPredictions).map((d, i) => {
        if (d) {
            return {
                x: i, y: d.toFixed(4)
            };
        }
    });

    console.log("INPUT",testing);
    console.log("OUTPUT",realResults);
    console.log("PREDICTIONS",predictions);

    /* creating prediction chart */
    tfvis.render.linechart(
            {name: 'Real Predictions'},
            {values: [realResults, predictions], series: ['original', 'predicted']},
            {
                xLabel: 'contatore',
                yLabel: 'prezzo',
                height: 300,
                zoomToFit: true
            }
    );





}

async function main() {
    const data = await getData();
    await train_data(data);

}

main();

我正在寻找好的结果。

我现在可以使用一些技术指标,但我仍然不知道 LSTM 如何插入“期货”数组维度。


1
投票

您可以在这个空间中做各种事情(TensorFlow 和时间序列分析)。这里有一些示例代码可以帮助您:

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from yahoo_fin import stock_info as si
from collections import deque

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import os
import random


# set seed, so we can get the same results after rerunning several times
np.random.seed(314)
tf.random.set_seed(314)
random.seed(314)



def load_data(ticker, n_steps=50, scale=True, shuffle=True, lookup_step=1, 
                test_size=0.2, feature_columns=['adjclose', 'volume', 'open', 'high', 'low']):
    # see if ticker is already a loaded stock from yahoo finance
    if isinstance(ticker, str):
        # load it from yahoo_fin library
        df = si.get_data(ticker)
    elif isinstance(ticker, pd.DataFrame):
        # already loaded, use it directly
        df = ticker
    # this will contain all the elements we want to return from this function
    result = {}
    # we will also return the original dataframe itself
    result['df'] = df.copy()
    # make sure that the passed feature_columns exist in the dataframe
    for col in feature_columns:
        assert col in df.columns, f"'{col}' does not exist in the dataframe."
    if scale:
        column_scaler = {}
        # scale the data (prices) from 0 to 1
        for column in feature_columns:
            scaler = preprocessing.MinMaxScaler()
            df[column] = scaler.fit_transform(np.expand_dims(df[column].values, axis=1))
            column_scaler[column] = scaler

        # add the MinMaxScaler instances to the result returned
        result["column_scaler"] = column_scaler
    # add the target column (label) by shifting by `lookup_step`
    df['future'] = df['adjclose'].shift(-lookup_step)
    # last `lookup_step` columns contains NaN in future column
    # get them before droping NaNs
    last_sequence = np.array(df[feature_columns].tail(lookup_step))
    # drop NaNs
    df.dropna(inplace=True)
    sequence_data = []
    sequences = deque(maxlen=n_steps)
    for entry, target in zip(df[feature_columns].values, df['future'].values):
        sequences.append(entry)
        if len(sequences) == n_steps:
            sequence_data.append([np.array(sequences), target])
    # get the last sequence by appending the last `n_step` sequence with `lookup_step` sequence
    # for instance, if n_steps=50 and lookup_step=10, last_sequence should be of 59 (that is 50+10-1) length
    # this last_sequence will be used to predict in future dates that are not available in the dataset
    last_sequence = list(sequences) + list(last_sequence)
    # shift the last sequence by -1
    last_sequence = np.array(pd.DataFrame(last_sequence).shift(-1).dropna())
    # add to result
    result['last_sequence'] = last_sequence
    # construct the X's and y's
    X, y = [], []
    for seq, target in sequence_data:
        X.append(seq)
        y.append(target)
    # convert to numpy arrays
    X = np.array(X)
    y = np.array(y)
    # reshape X to fit the neural network
    X = X.reshape((X.shape[0], X.shape[2], X.shape[1]))
    # split the dataset
    result["X_train"], result["X_test"], result["y_train"], result["y_test"] = train_test_split(X, y, test_size=test_size, shuffle=shuffle)
    # return the result
    return result

def create_model(sequence_length, units=256, cell=LSTM, n_layers=2, dropout=0.3,
                loss="mean_absolute_error", optimizer="rmsprop", bidirectional=False):
    model = Sequential()
    for i in range(n_layers):
        if i == 0:
            # first layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True), input_shape=(None, sequence_length)))
            else:
                model.add(cell(units, return_sequences=True, input_shape=(None, sequence_length)))
        elif i == n_layers - 1:
            # last layer
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=False)))
            else:
                model.add(cell(units, return_sequences=False))
        else:
            # hidden layers
            if bidirectional:
                model.add(Bidirectional(cell(units, return_sequences=True)))
            else:
                model.add(cell(units, return_sequences=True))
        # add dropout after each layer
        model.add(Dropout(dropout))
    model.add(Dense(1, activation="linear"))
    model.compile(loss=loss, metrics=["mean_absolute_error"], optimizer=optimizer)
    return model



# Window size or the sequence length
N_STEPS = 100
# Lookup step, 1 is the next day
LOOKUP_STEP = 1
# test ratio size, 0.2 is 20%
TEST_SIZE = 0.2
# features to use
FEATURE_COLUMNS = ["adjclose", "volume", "open", "high", "low"]
# date now
date_now = time.strftime("%Y-%m-%d")
### model parameters
N_LAYERS = 3
# LSTM cell
CELL = LSTM
# 256 LSTM neurons
UNITS = 256
# 40% dropout
DROPOUT = 0.4
# whether to use bidirectional RNNs
BIDIRECTIONAL = False
### training parameters
# mean absolute error loss
# LOSS = "mae"
# huber loss
LOSS = "huber_loss"
OPTIMIZER = "adam"
BATCH_SIZE = 64
EPOCHS = 100
# Apple stock market
ticker = "AAPL"
ticker_data_filename = os.path.join("data", f"{ticker}_{date_now}.csv")
# model name to save, making it as unique as possible based on parameters
model_name = f"{date_now}_{ticker}-{LOSS}-{OPTIMIZER}-{CELL.__name__}-seq-{N_STEPS}-step-{LOOKUP_STEP}-layers-{N_LAYERS}-units-{UNITS}"
if BIDIRECTIONAL:
    model_name += "-b"


# create these folders if they does not exist
if not os.path.isdir("results"):
    os.mkdir("results")
if not os.path.isdir("logs"):
    os.mkdir("logs")
if not os.path.isdir("data"):
    os.mkdir("data")
    

# load the data
data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE, feature_columns=FEATURE_COLUMNS)

# save the dataframe
data["df"].to_csv(ticker_data_filename)

# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
                    dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)

# some tensorflow callbacks
checkpointer = ModelCheckpoint(os.path.join("results", model_name + ".h5"), save_weights_only=True, save_best_only=True, verbose=1)
tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))

history = model.fit(data["X_train"], data["y_train"],
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_data=(data["X_test"], data["y_test"]),
                    callbacks=[checkpointer, tensorboard],
                    verbose=1)

model.save(os.path.join("results", model_name) + ".h5")


# after the model ends running...or during training, run this
# tensorboard --logdir="logs"
# http://localhost:6006/


data = load_data(ticker, N_STEPS, lookup_step=LOOKUP_STEP, test_size=TEST_SIZE,
                feature_columns=FEATURE_COLUMNS, shuffle=False)

# construct the model
model = create_model(N_STEPS, loss=LOSS, units=UNITS, cell=CELL, n_layers=N_LAYERS,
                    dropout=DROPOUT, optimizer=OPTIMIZER, bidirectional=BIDIRECTIONAL)

model_path = os.path.join("results", model_name) + ".h5"
model.load_weights(model_path)


# evaluate the model
mse, mae = model.evaluate(data["X_test"], data["y_test"], verbose=0)
# calculate the mean absolute error (inverse scaling)
mean_absolute_error = data["column_scaler"]["adjclose"].inverse_transform([[mae]])[0][0]
print("Mean Absolute Error:", mean_absolute_error)


def predict(model, data, classification=False):
    # retrieve the last sequence from data
    last_sequence = data["last_sequence"][:N_STEPS]
    # retrieve the column scalers
    column_scaler = data["column_scaler"]
    # reshape the last sequence
    last_sequence = last_sequence.reshape((last_sequence.shape[1], last_sequence.shape[0]))
    # expand dimension
    last_sequence = np.expand_dims(last_sequence, axis=0)
    # get the prediction (scaled from 0 to 1)
    prediction = model.predict(last_sequence)
    # get the price (by inverting the scaling)
    predicted_price = column_scaler["adjclose"].inverse_transform(prediction)[0][0]
    return predicted_price


# predict the future price
future_price = predict(model, data)
print(f"Future price after {LOOKUP_STEP} days is {future_price:.2f}$")


# Result:
Mean Absolute Error: 3.4357253022539096
Future price after 1 days is 311.41$

def plot_graph(model, data):
    y_test = data["y_test"]
    X_test = data["X_test"]
    y_pred = model.predict(X_test)
    y_test = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(np.expand_dims(y_test, axis=0)))
    y_pred = np.squeeze(data["column_scaler"]["adjclose"].inverse_transform(y_pred))
    # last 200 days, feel free to edit that
    plt.plot(y_test[-200:], c='b')
    plt.plot(y_pred[-200:], c='r')
    plt.xlabel("Days")
    plt.ylabel("Price")
    plt.legend(["Actual Price", "Predicted Price"])
    plt.show()
    
    
plot_graph(model, data)

运行 100 次迭代...

Epoch 99/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.0276e-04 - mean_absolute_error: 0.0086
Epoch 00099: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.0276e-04 - mean_absolute_error: 0.0086 - val_loss: 3.8095e-05 - val_mean_absolute_error: 0.0057
Epoch 100/100
7872/7885 [============================>.] - ETA: 0s - loss: 1.1034e-04 - mean_absolute_error: 0.0086 
Epoch 00100: val_loss did not improve from 0.00002
7885/7885 [==============================] - 11s 1ms/sample - loss: 1.1040e-04 - mean_absolute_error: 0.0086 - val_loss: 2.9450e-05 - val_mean_absolute_error: 0.0035

最后,你明白了:

再一次,你可以用这个去许多不同的方向!

© www.soinside.com 2019 - 2024. All rights reserved.