ValueError:输入 X 包含 NaN。 SVR 不接受原生编码为 NaN 的缺失值

问题描述 投票:0回答:1

我使用 SVR 来预测我的数据

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_absolute_error
import csv
from math import sqrt
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

filepath = '/content/drive/MyDrive/TESIS/DATA/'

pilihankolom = 35

X = pd.read_csv(filepath+'Data_Radiomic.csv').to_numpy()
Y = pd.read_csv(filepath+'Data_Dosiomic.csv', usecols=[pilihankolom])
print(X.shape)
Y_label = str(Y.columns)
Y = Y.to_numpy()

(187, 98)

model=SVR(kernel='rbf', C=10, epsilon=0.01)
kf = KFold(n_splits=4)

X_training, X_testing, Y_training, Y_testing = train_test_split(X, Y, test_size=0.2, random_state=0)
print(X_training.shape, X_testing.shape)

(149, 98) (38, 98)

prediction = []
mse_set = []
for train_index, test_index in kf.split(X_training):
  print(train_index, test_index)
  X_train, X_test = X_training[train_index], X_training[test_index]
  Y_train, Y_test = Y_training[train_index], Y_training[test_index]
  model.fit(X_train, Y_train)
  Y_pred = model.predict(X_test)
  mse_set.append(mean_squared_error(Y_test, Y_pred))
  prediction.extend(Y_pred)

这就是结果

[ 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148] [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37]

ValueError                                Traceback (most recent call last)
<ipython-input-62-98eca0acb6ac> in <cell line: 3>()
      5   X_train, X_test = X_training[train_index], X_training[test_index]
      6   Y_train, Y_test = Y_training[train_index], Y_training[test_index]
----> 7   model.fit(X_train, Y_train)
      8   Y_pred = model.predict(X_test)
      9   mse_set.append(mean_squared_error(Y_test, Y_pred))

4 frames
/usr/local/lib/python3.10/dist-packages/sklearn/svm/_base.py in fit(self, X, y, sample_weight)
    190             check_consistent_length(X, y)
    191         else:
--> 192             X, y = self._validate_data(
    193                 X,
    194                 y,

/usr/local/lib/python3.10/dist-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
    582                 y = check_array(y, input_name="y", **check_y_params)
    583             else:
--> 584                 X, y = check_X_y(X, y, **check_params)
    585             out = X, y
    586 

/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
   1104         )
   1105 
-> 1106     X = check_array(
   1107         X,
   1108         accept_sparse=accept_sparse,

/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
    919 
    920         if force_all_finite:
--> 921             _assert_all_finite(
    922                 array,
    923                 input_name=input_name,

/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in _assert_all_finite(X, allow_nan, msg_dtype, estimator_name, input_name)
    159                 "#estimators-that-handle-nan-values"
    160             )
--> 161         raise ValueError(msg_err)
    162 
    163 

ValueError:输入 X 包含 NaN。 SVR 不接受原生编码为 NaN 的缺失值。对于监督学习,您可能需要考虑 sklearn.ensemble.HistGradientBoostingClassifier 和 Regressor,它们接受本机编码为 NaN 的缺失值。或者,可以对数据进行预处理,例如通过在管道中使用输入转换器或删除具有缺失值的样本。请参阅 https://scikit-learn.org/stable/modules/impute.html 您可以在以下页面找到处理 NaN 值的所有估计器的列表:https://scikit-learn.org/stable/模块/impute.html#estimators-that-handle-nan-values

请提供解决此错误的任何建议

python machine-learning artificial-intelligence prediction
1个回答
0
投票

您的输入数据包含无效/缺失值(NaN 或“不是数字”)。三个主要选项:

  • 使用可以处理无效值的估计器,例如
    HistGradientBoostingRegressor
    。您使用的 (
    SVM
    ) 会出现 NaN 错误,就像大多数
    sklearn
    估计器一样。有关处理 NaN 值的估计器列表,请参阅此处
  • 如果您想使用
    SVR
    ,您可以简单地从数据集中删除 NaN,然后再将其提供给
    SVR
    。您应该考虑是否可以简单地丢弃任何具有一个或多个缺失值的样本/行。
  • 作为删除带有 NaN 的行的替代方法,您可以使用
    SimpleImputer
    替换它们。这会将 NaN 替换为数字,然后您可以将清理后的数据集提供给
    SVR
    SimpleImputer
    是一个很好的起点,here 更多的是关于插补。
© www.soinside.com 2019 - 2024. All rights reserved.