问题
使用 sklearn.impute.IterativeImputer 中的 interativeImputer,获得适合 BayesianRidge() 的回归模型,用于估算变量“Frontage”中的缺失数据。
在 interative_imputer_fit = interative_imputer.fit(data) 运行后,interative_imputer_fit.transform(X) 运行但调用函数 imputer_bay_ridge(data),来自 interative_imputer 的 transform() 函数,例如 interative_imputer_fit.transform(X) 值错误错误。传入两个变量,Frontage 和 Area。但是只有 Frontage 在 numpy.array 里面。
使用 sklearn 的 Python 代码
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.linear_model import BayesianRidge
def imputer_bay_ridge(data):
data_array = data.to_numpy()
data_array.reshape(1, -1)
interative_imputer = IterativeImputer(BayesianRidge())
interative_imputer_fit = interative_imputer.fit(data_array)
X = data['LotFrontage']
data_imputed = interative_imputer_fit.transform(X)
train_data[['Frontage', 'Area']]
调用函数
fit_tranformed_imputed = imputer_bay_ridge(train_data[['Frontage', 'Area']])
数据示例
train_data[['Frontage', 'Area']]
Frontage Area
0 65.0 8450
1 80.0 9600
2 68.0 11250
3 60.0 9550
4 84.0 14260
... ... ...
1455 62.0 7917
1456 85.0 13175
1457 66.0 9042
1458 68.0 9717
1459 75.0 9937
1460 rows × 2 columns
错误
ValueError Traceback (most recent call last)
Cell In[243], line 1
----> 1 fit_tranformed_imputed = imputer_bay_ridge(train_data[['LotFrontage', 'LotArea']])
Cell In[242], line 12, in imputer_bay_ridge(data)
10 interative_imputer_fit = interative_imputer.fit(data_array)
11 X = data['LotFrontage']
---> 12 data_imputed = interative_imputer_fit.transform(X)
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/impute/_iterative.py:724, in IterativeImputer.transform(self, X)
707 """Impute all missing values in `X`.
708
709 Note that this is stochastic, and that if `random_state` is not fixed,
(...)
720 The imputed input data.
721 """
722 check_is_fitted(self)
--> 724 X, Xt, mask_missing_values, complete_mask = self._initial_imputation(X)
726 X_indicator = super()._transform_indicator(complete_mask)
728 if self.n_iter_ == 0 or np.all(mask_missing_values):
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/impute/_iterative.py:514, in IterativeImputer._initial_imputation(self, X, in_fit)
511 else:
512 force_all_finite = True
--> 514 X = self._validate_data(
515 X,
516 dtype=FLOAT_DTYPES,
517 order="F",
518 reset=in_fit,
519 force_all_finite=force_all_finite,
520 )
521 _check_inputs_dtype(X, self.missing_values)
523 X_missing_mask = _get_mask(X, self.missing_values)
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py:566, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
564 raise ValueError("Validation should be done on X, y or both.")
565 elif not no_val_X and no_val_y:
--> 566 X = check_array(X, **check_params)
567 out = X
568 elif no_val_X and not no_val_y:
File ~/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py:769, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
767 # If input is 1D raise error
768 if array.ndim == 1:
--> 769 raise ValueError(
770 "Expected 2D array, got 1D array instead:\narray={}.\n"
771 "Reshape your data either using array.reshape(-1, 1) if "
772 "your data has a single feature or array.reshape(1, -1) "
773 "if it contains a single sample.".format(array)
774 )
776 # make sure we actually converted to numeric:
777 if dtype_numeric and array.dtype.kind in "OUSV":
ValueError: Expected 2D array, got 1D array instead:
array=[65. 80. 68. ... 66. 68. 75.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.