我正在对需要使用正则化的 python 多元回归模型进行编程。我正在使用 sklearn 函数 Ridge、Lasso、ElasticNet y HuberRegressor 和 GridSearchCV 来找到最佳拟合参数,然后我提取最佳参数(代码的最后 3 行)。但是现在,对于拟合模型,我有两个主要疑虑:
Python的脚本
for Reg_model in ['Ridge','Lasso','ElasticNet','HuberRegressor']: #,'SCAD'
if Reg_model == 'Ridge':
#I need normalize data to do ridge, will normalize=True
model = Ridge(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'Lasso':
#I need normalize data to do Lasso, will normalize=True
model = Lasso(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'ElasticNet':
#I need normalize data to do ElasticNet, will normalize=True
model = ElasticNet(normalize = True)
param_grid = {'alpha': np.logspace(-5, 3, 10), 'l1_ratio': np.logspace(0, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
elif Reg_model == 'HuberRegressor':
#I need normalize data to do HubberRegressor, will normalize=True
model = HuberRegressor()
param_grid = {'alpha': np.logspace(-5, 3, 10), 'epsilon': np.logspace(0, 3, 10)}
model = GridSearchCV(estimator = model, param_grid = param_grid,scoring = 'neg_root_mean_squared_error',cv=time_split,verbose= 0, refit=True)
#We cand add this to GridSearchCV n_jobs = multiprocessing.cpu_count() - 1, return_train_score = True
#print(model.outliers_) This Attribute belongs to HuberRegressor
model.fit(X = X_train, y = y_train)
print(model.best_estimator_)
print(model.best_estimator_.coef_)
print(model.best_estimator_.intercept_)