我在5个scikit学习分类器和Keras分类器上使用了StackingClassifier
。然而,似乎并没有将Keras作为分类器。
相关代码:
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
def create_model ():
# create model
model = Sequential()
model.add(Dense(best_neurons, input_shape=(X_train.shape[1],), kernel_initializer=best_init_mode, activation='relu',
kernel_constraint=maxnorm(best_weight_constraint)))
model.add(Dropout(best_dropout_rate))
model.add(Flatten())
optimizer= tf.keras.optimizers.RMSprop(lr=best_learn_rate)
model.add(Dense(units = 1, kernel_initializer=best_init_mode, activation = 'sigmoid')) # Compile model
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
return model
NN_clf=KerasClassifier(build_fn=create_model, epochs=best_epochs, batch_size= best_batch_size)
RF_clf =RandomForestClassifier(max_depth=best_max_depth_rf, n_estimators=best_n_estimators_rf,
min_samples_leaf=best_min_samples_leaf_rf, max_features=best_max_features_rf,
class_weight=best_class_weight_rf, max_samples=best_max_samples_rf,
random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier(n_neighbors=best_n_neighbors, p=best_p, leaf_size=best_leaf_size )
#DT_clf = DecisionTreeClassifier(max_depth=best_max_depth_dt, min_samples_leaf=best_min_samples_leaf_dt)
SV_clf = SVC(gamma=best_gamma_sv, C=best_c_sv, kernel=best_kernel_sv, random_state=42, probability=True)
GBC_clf = xgb.XGBClassifier(learning_rate=best_learning_rate_gbc, random_state=42, colsample_bytree=best_colsample_bytree_gbc,
max_depth=best_max_depth_gbc, n_estimators=best_n_estimators_gbc,
gamma=best_gamma_gbc, subsample=best_subsample_gbc)
EX_clf= ExtraTreesClassifier(max_depth=best_max_depth_ex, n_estimators=best_n_estimators_ex,
min_samples_leaf=best_min_samples_leaf_ex, max_features=best_max_features_ex,
warm_start=False, oob_score=True, bootstrap=True, random_state=42)
LR_clf=LogisticRegression(random_state=42, solver=best_solver, penalty=best_penalty, class_weight=best_class_weight, C=best_log_C)
estimators= [('RF', RF_clf), ('GBC', GBC_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
clf.fit(X_train, y_train.values.ravel())
print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-41-272df6aa838e> in <module>
2 ('SV', SV_clf), ('NN', NN_clf) ]
3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
----> 4 clf.fit(X_train, y_train.values.ravel())
5 print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))
~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
411 self._le = LabelEncoder().fit(y)
412 self.classes_ = self._le.classes_
--> 413 return super().fit(X, self._le.transform(y), sample_weight)
414
415 @if_delegate_has_method(delegate='final_estimator_')
~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
129 # all_estimators contains all estimators, the one to be fitted and the
130 # 'drop' string.
--> 131 names, all_estimators = self._validate_estimators()
132 self._validate_final_estimator()
133
~\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py in _validate_estimators(self)
247 raise ValueError(
248 "The estimator {} should be a {}.".format(
--> 249 est.__class__.__name__, is_estimator_type.__name__[3:]
250 )
251 )
ValueError: The estimator KerasClassifier should be a classifier.
我正在使用Sci-kit学习版本2.2,TF版本2.x。我见过类似的错误here,但不想重写我的代码并使用MLextend库。
此问题是由于here报告了类似的问题VotingClassifier
。
解决方案只是将此_estimator_type='classifier'
添加到KerasClassifier
。
注意:请仅提供最少的代码来重现您的问题。
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
def create_model ():
# create model
model = Sequential()
model.add(Dense(20, input_dim=20, activation='relu'))
model.add(Dropout(0.2))
model.add(Flatten())
optimizer= keras.optimizers.RMSprop(lr=0.001)
model.add(Dense(units = 1, activation = 'sigmoid')) # Compile model
model.compile(loss='binary_crossentropy',
optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
return model
NN_clf=KerasClassifier(build_fn=create_model, epochs=15, batch_size= 32)
NN_clf._estimator_type = "classifier"
RF_clf =RandomForestClassifier(random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier()
SV_clf = SVC(random_state=42, probability=True)
EX_clf= ExtraTreesClassifier(random_state=42)
LR_clf=LogisticRegression(random_state=42,)
estimators= [('RF', RF_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
X, y = make_classification()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.3)
clf.fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_test, y_test))
# Stacking model score: 0.967