我使用 SVR 来预测我的数据
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
import csv
from math import sqrt
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
filepath = '/content/drive/MyDrive/TESIS/DATA/'
pilihankolom = 35
X = pd.read_csv(filepath+'Data_Radiomic.csv').to_numpy()
Y = pd.read_csv(filepath+'Data_Dosiomic.csv', usecols=[pilihankolom])
print(X.shape)
Y_label = str(Y.columns)
Y = Y.to_numpy()
(187, 98)
model=SVR(kernel='rbf', C=10, epsilon=0.01)
kf = KFold(n_splits=4)
X_training, X_testing, Y_training, Y_testing = train_test_split(X, Y, test_size=0.2, random_state=0)
print(X_training.shape, X_testing.shape)
(149, 98) (38, 98)
prediction = []
mse_set = []
for train_index, test_index in kf.split(X_training):
print(train_index, test_index)
X_train, X_test = X_training[train_index], X_training[test_index]
Y_train, Y_test = Y_training[train_index], Y_training[test_index]
model.fit(X_train, Y_train)
Y_pred = model.predict(X_test)
mse_set.append(mean_squared_error(Y_test, Y_pred))
prediction.extend(Y_pred)
这就是结果
ValueError Traceback (most recent call last)
<ipython-input-62-98eca0acb6ac> in <cell line: 3>()
5 X_train, X_test = X_training[train_index], X_training[test_index]
6 Y_train, Y_test = Y_training[train_index], Y_training[test_index]
----> 7 model.fit(X_train, Y_train)
8 Y_pred = model.predict(X_test)
9 mse_set.append(mean_squared_error(Y_test, Y_pred))
4 frames
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py in fit(self, X, y, sample_weight)
190 check_consistent_length(X, y)
191 else:
--> 192 X, y = self._validate_data(
193 X,
194 y,
/usr/local/lib/python3.10/dist-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
582 y = check_array(y, input_name="y", **check_y_params)
583 else:
--> 584 X, y = check_X_y(X, y, **check_params)
585 out = X, y
586
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1104 )
1105
-> 1106 X = check_array(
1107 X,
1108 accept_sparse=accept_sparse,
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
919
920 if force_all_finite:
--> 921 _assert_all_finite(
922 array,
923 input_name=input_name,
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name)
159 "#estimators-that-handle-nan-values"
160 )
--> 161 raise ValueError(msg_err)
162
163
ValueError:输入 X 包含 NaN。 SVR 不接受原生编码为 NaN 的缺失值。对于监督学习,您可能需要考虑 sklearn.ensemble.HistGradientBoostingClassifier 和 Regressor,它们接受本机编码为 NaN 的缺失值。或者,可以对数据进行预处理,例如通过在管道中使用输入转换器或删除具有缺失值的样本。请参阅 https://scikit-learn.org/stable/modules/impute.html 您可以在以下页面找到处理 NaN 值的所有估计器的列表:https://scikit-learn.org/stable/模块/impute.html#estimators-that-handle-nan-values
请提供解决此错误的任何建议
您的输入数据包含无效/缺失值(NaN 或“不是数字”)。三个主要选项:
HistGradientBoostingRegressor
。您使用的 (SVM
) 会出现 NaN 错误,就像大多数 sklearn
估计器一样。有关处理 NaN 值的估计器列表,请参阅此处。SVR
,您可以简单地从数据集中删除 NaN,然后再将其提供给 SVR
。您应该考虑是否可以简单地丢弃任何具有一个或多个缺失值的样本/行。SimpleImputer
替换它们。这会将 NaN 替换为数字,然后您可以将清理后的数据集提供给 SVR
。 SimpleImputer
是一个很好的起点,here 更多的是关于插补。