我编写了以下算法来实现 Ridge 回归并通过交叉验证估计其参数。特别是,我想实现以下目标:
我在执行第 2 点的过程中做错了什么,但我不知道是什么。我可以对下面的实施发表意见吗?
# Create two lists of the indexes of the train and test sets as per point 1
n_splits=10
kf = KFold(n_splits=n_splits, shuffle=False)
folds = [idx for _, idx in kf.split(df_train)]
indexes_train = [folds[0]]
indexes_test = [folds[1]]
for i in range(1,n_splits-1):
indexes_train.append(np.concatenate((np.array(indexes_train[i-1]), folds[i])))
indexes_test.append(folds[i+1])
# Tune the model as per point 2
pipe = Pipeline(steps = [('scaler', StandardScaler()), ('model', Ridge(fit_intercept=True))])
alpha_tune = {'model__alpha': self.alpha_values}
cross_validation = [i for i in zip(indexes_train, indexes_test)]
model = GridSearchCV(estimator=pipe, param_grid=alpha_tune, cv=cross_validation, scoring='neg_mean_squared_error', n_jobs=-1).fit(features_train, labels_train)
best_alpha = model.best_params_['model__alpha']