我正在尝试从头开始构建自定义的K折RandomSearchCV。我了解RandomSearchCV的工作原理,并尝试从头开始在随机生成的数据集上实现它。当我尝试运行代码时,出现以下错误。我认为这与我在x_train
列表中创建群组的方式有关。这是什么错误及其解决方法? :
ValueError Traceback (most recent call last)
<ipython-input-12-229cc493eeb9> in <module>
41
42 classifier = KNeighborsClassifier()
---> 43 RandomSearchCV(X_train,y_train, classifier, folds = 3)
44
45
<ipython-input-12-229cc493eeb9> in RandomSearchCV(x_train, y_train, classifier, folds)
26 #classifier (K-NN)
27 classifier.n_neighbors = parameter
---> 28 classifier.fit(x_train_group, y_train_group)
29
30 #Predicton
~\anaconda3\lib\site-packages\sklearn\neighbors\_base.py in fit(self, X, y)
1128 """
1129 if not isinstance(X, (KDTree, BallTree)):
-> 1130 X, y = check_X_y(X, y, "csr", multi_output=True)
1131
1132 if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1:
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
753 ensure_min_features=ensure_min_features,
754 warn_on_dtype=warn_on_dtype,
--> 755 estimator=estimator)
756 if multi_output:
757 y = check_array(y, 'csr', force_all_finite=True, ensure_2d=False,
~\anaconda3\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)
572 if not allow_nd and array.ndim >= 3:
573 raise ValueError("Found array with dim %d. %s expected <= 2."
--> 574 % (array.ndim, estimator_name))
575
576 if force_all_finite:
ValueError: Found array with dim 3. Estimator expected <= 2.
这是我的实现:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy
from tqdm import tqdm
import numpy as np
from sklearn.metrics.pairwise import euclidean_distances
import random
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
x,y = make_classification(n_samples=10000, n_features=2, n_informative=2, n_redundant= 0, n_clusters_per_class=1, random_state=60)
X_train, X_test, y_train, y_test = train_test_split(x,y,stratify=y,random_state=42)
def RandomSearchCV(x_train,y_train, classifier, folds):
train_scores = []
test_scores = []
#1. Generating 10 unique values from given range
params = random.sample(range(0, 50), 10)
x_train_split = []
y_train_split = []
#dividing x_train into groups
for i in range(0, len(x_train), int(len(x_train)/folds)):
x_train_split.append(x_train[i:i+int(len(x_train)/folds)])
y_train_split.append(y_train[i:i+int(len(y_train)/folds)])
#3.for each hyperparameter that we generated in step 1 and dividing dataset into training and CV datasets:
for parameter in params:
trainscores_folds = []
testscores_folds = []
for group in range(len(x_train_split)):
x_train_group = x_train_split[0:group] + x_train_split[group+1:]
x_cv_group = [x_train_split[group]]
y_train_group = y_train_split[0:group] + y_train_split[group+1:]
y_cv_group = [y_train_split[group]]
#classifier (K-NN)
classifier.n_neighbors = parameter
classifier.fit(x_train_group, y_train_group)
#Predicton
y_pred = classifier.predict(x_cv_group)
testscores_folds.append(accuracy_score(y_cv_group, Y_pred))
y_pred = classifier.predict(x_train_group)
trainscores_folds.append(accuracy_score(y_train_group, Y_pred))
trainscores.append(np.mean(np.array(trainscores_folds)))
testscores.append(np.mean(np.array(testscores_folds)))
return trainscores, testscores
classifier = KNeighborsClassifier()
RandomSearchCV(X_train,y_train, classifier, folds = 3)
谢谢您的帮助。
x_train_group
是数组的列表,这使其成为3维的(如错误中所述)。这不适用于分类器,因为它需要二维输入。尝试调用np.concatenate(x_train_group)
连接折叠并将其设为二维输入。
作为错误状态,当classifier.fit()
方法需要二维矩阵时,您正在使用三维数组。解决此问题所需要做的就是将您的训练/简历/测试组更改为以下内容:
for group in range(len(x_train_split)):
x_train_group = np.concatenate(x_train_split[0:group] + x_train_split[group+1:])
x_cv_group = x_train_split[group]
y_train_group = np.concatenate(y_train_split[0:group] + y_train_split[group+1:])
y_cv_group = y_train_split[group]
....