ValueError:matmul: 输入操作数1在其核心维度0中存在不匹配,gufunc签名为(n?,k),(k,m?)->(n?,m?)

问题描述 投票:0回答:1

试图用我的决策树模型得到一个预测,在最后一行代码上给出了挂名的错误。

     X=BTC_cleanData[-1:]
---> print(regressor.predict(X))
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),
(k,m?)->(n?,m?) (size 145 is different from 146)

据我所知,我已经成功地训练和测试了该模型,但当我试图输出预测时,我做错了什么。我认为我定义要预测的目标的方式有些问题,就是在矩阵的某个地方增加了一列,因此出现了matmul错误。我该如何写一个能用的预测函数呢?

这是完整的代码,我没有写特征选择,因为它非常长。

import pandas as pd
import numpy as np
import talib
import matplotlib.pyplot as plt
%matplotlib inline
import investpy
from investpy import data 
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split

#Import open, high, low, close, volume and Return data from csv using investpy
BTC = data = investpy.get_crypto_historical_data(crypto='bitcoin', from_date='01/01/2014', to_date='06/08/2020')

#Convert Data from Int to Float
BTC.Volume = BTC.Volume.astype(float)
BTC.High = BTC.High.astype(float)
BTC.Low = BTC.Low.astype(float)
BTC.Close = BTC.Close.astype(float)

#Drop Unnecessary Columns
del BTC['Currency']

#Select Indicators as Features
BTC['AD'] = talib.AD(BTC['High'].values, BTC['Low'].values, BTC['Close'].values, BTC['Volume'].values)
...(there is a long list here)

#Create forward looking columns using shift
BTC['NextDayPrice'] = BTC['Close'].shift(-1)

#Copy dataframe and clean data 
BTC_cleanData = BTC.copy()
BTC_cleanData.dropna(inplace=True)
BTC_cleanData.to_csv('C:/Users/Admin/Desktop/BTCdata.csv')

#Split Data into Training and Testing Set
#separate the features and targets into separate datasets.
#split the data into training and testing sets using a 70/30 split 
#Using splicing, separate the features from the target into individual data sets.  
X_all = BTC_cleanData.iloc[:, BTC_cleanData.columns != 'NextDayPrice']  # feature values for all days
y_all = BTC_cleanData['NextDayPrice']  # corresponding targets/labels
print (X_all.head())  # print the first 5 rows

#Split the data into training and testing sets using the given feature as the target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.30, random_state=42)

from sklearn.linear_model import LinearRegression


#Create a decision tree regressor and fit it to the training set
regressor = LinearRegression()
regressor.fit(X_train,y_train)

print ("Training set: {} samples".format(X_train.shape[0]))
print ("Test set: {} samples".format(X_test.shape[0]))

#Evaluate Model (out of sample Accuracy and Mean Squared Error)
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score

scores = cross_val_score(regressor, X_test, y_test, cv=10)
print ("accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() / 2))    

from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, regressor.predict(X_test))
print("MSE: %.4f" % mse)

#Evaluate Model (In sample Accuracy and Mean Squared Error)
trainscores = cross_val_score(regressor, X_train, y_train, cv=10)
print ("accuracy: %0.2f (+/- %0.2f)" % (trainscores.mean(), trainscores.std() / 2))    

mse = mean_squared_error(y_train, regressor.predict(X_train))
print("MSE: %.4f" % mse)
print(regressor.predict(X_train))

#Predict Next Day Price
X=BTC_cleanData[-1:]
print(regressor.predict(X))
python machine-learning scikit-learn decision-tree
1个回答
0
投票

您已经使用 X_train 的数据。要预测看不见的数据,你只需要做到 print(regressor.predict(X_test)).

之前你有。

X=BTC_cleanData[-1:] # this has one more column compared to X_train and X_test
print(regressor.predict(X))

但是... BTC_cleanData[-1:] 与X_train和X_test相比,多了一列。然而,该模型是用 X_train 没有这个额外的列,这就导致了错误。


清理工作代码。

import pandas as pd
import numpy as np
import talib
import matplotlib.pyplot as plt
%matplotlib inline
import investpy
from investpy.crypto import get_crypto_historical_data
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_validate
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

#Import open, high, low, close, volume and Return data from csv using investpy
BTC = get_crypto_historical_data(crypto='bitcoin', from_date='01/01/2014', to_date='06/08/2020')

#Convert Data from Int to Float
BTC.Volume = BTC.Volume.astype(float)
BTC.High = BTC.High.astype(float)
BTC.Low = BTC.Low.astype(float)
BTC.Close = BTC.Close.astype(float)

#Drop Unnecessary Columns
del BTC['Currency']

#Select Indicators as Features
BTC['AD'] = talib.AD(BTC['High'].values, BTC['Low'].values, BTC['Close'].values, BTC['Volume'].values)


#Create forward looking columns using shift
BTC['NextDayPrice'] = BTC['Close'].shift(-1)

#Copy dataframe and clean data 
BTC_cleanData = BTC.copy()
BTC_cleanData.dropna(inplace=True)
#BTC_cleanData.to_csv('C:/Users/Admin/Desktop/BTCdata.csv')

#Split Data into Training and Testing Set
#separate the features and targets into separate datasets.
#split the data into training and testing sets using a 70/30 split 
#Using splicing, separate the features from the target into individual data sets.  
X_all = BTC_cleanData.iloc[:, BTC_cleanData.columns != 'NextDayPrice']  # feature values for all days
y_all = BTC_cleanData['NextDayPrice']  # corresponding targets/labels
print (X_all.head())  # print the first 5 rows

#Split the data into training and testing sets using the given feature as the target
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.30, random_state=42)

#Create a decision tree regressor and fit it to the training set
regressor = LinearRegression()
regressor.fit(X_train,y_train)

print ("Training set: {} samples".format(X_train.shape[0]))
print ("Test set: {} samples".format(X_test.shape[0]))

#Evaluate Model (out of sample Accuracy and Mean Squared Error)
scores = cross_val_score(regressor, X_test, y_test, cv=10)
print ("accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() / 2))    

mse = mean_squared_error(y_test, regressor.predict(X_test))
print("MSE: %.4f" % mse)

#Evaluate Model (In sample Accuracy and Mean Squared Error)
trainscores = cross_val_score(regressor, X_train, y_train, cv=10)
print ("accuracy: %0.2f (+/- %0.2f)" % (trainscores.mean(), trainscores.std() / 2))    

mse = mean_squared_error(y_train, regressor.predict(X_train))
print("MSE: %.4f" % mse)
print(regressor.predict(X_train))

#Predict Next Day Price
print(regressor.predict(X_test))
© www.soinside.com 2019 - 2024. All rights reserved.