堆叠分类器无法识别Keras

问题描述 投票:1回答:1

我在5个scikit学习分类器和Keras分类器上使用了StackingClassifier。然而,似乎并没有将Keras作为分类器。

相关代码:

from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(best_neurons, input_shape=(X_train.shape[1],), kernel_initializer=best_init_mode, activation='relu', 
                   kernel_constraint=maxnorm(best_weight_constraint)))
    model.add(Dropout(best_dropout_rate))
    model.add(Flatten())
    optimizer= tf.keras.optimizers.RMSprop(lr=best_learn_rate)
    model.add(Dense(units = 1, kernel_initializer=best_init_mode, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model


NN_clf=KerasClassifier(build_fn=create_model, epochs=best_epochs, batch_size= best_batch_size)
RF_clf =RandomForestClassifier(max_depth=best_max_depth_rf, n_estimators=best_n_estimators_rf, 
                               min_samples_leaf=best_min_samples_leaf_rf, max_features=best_max_features_rf,
                               class_weight=best_class_weight_rf, max_samples=best_max_samples_rf,
                               random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier(n_neighbors=best_n_neighbors,  p=best_p, leaf_size=best_leaf_size )
#DT_clf = DecisionTreeClassifier(max_depth=best_max_depth_dt, min_samples_leaf=best_min_samples_leaf_dt)
SV_clf =  SVC(gamma=best_gamma_sv, C=best_c_sv, kernel=best_kernel_sv, random_state=42, probability=True)
GBC_clf =  xgb.XGBClassifier(learning_rate=best_learning_rate_gbc, random_state=42, colsample_bytree=best_colsample_bytree_gbc,
                             max_depth=best_max_depth_gbc, n_estimators=best_n_estimators_gbc,
                            gamma=best_gamma_gbc, subsample=best_subsample_gbc)
EX_clf= ExtraTreesClassifier(max_depth=best_max_depth_ex, n_estimators=best_n_estimators_ex, 
                             min_samples_leaf=best_min_samples_leaf_ex, max_features=best_max_features_ex,
                             warm_start=False, oob_score=True, bootstrap=True, random_state=42)
LR_clf=LogisticRegression(random_state=42, solver=best_solver, penalty=best_penalty, class_weight=best_class_weight, C=best_log_C)

estimators= [('RF', RF_clf), ('GBC', GBC_clf),  ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
            ('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
clf.fit(X_train, y_train.values.ravel())
print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-41-272df6aa838e> in <module>
      2             ('SV', SV_clf), ('NN', NN_clf) ]
      3 clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression(), n_jobs=-1)
----> 4 clf.fit(X_train, y_train.values.ravel())
      5 print("Stacking model score: %.3f" % clf.score(X_test, y_test.values.ravel()))

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    411         self._le = LabelEncoder().fit(y)
    412         self.classes_ = self._le.classes_
--> 413         return super().fit(X, self._le.transform(y), sample_weight)
    414 
    415     @if_delegate_has_method(delegate='final_estimator_')

~\Anaconda3\lib\site-packages\sklearn\ensemble\_stacking.py in fit(self, X, y, sample_weight)
    129         # all_estimators contains all estimators, the one to be fitted and the
    130         # 'drop' string.
--> 131         names, all_estimators = self._validate_estimators()
    132         self._validate_final_estimator()
    133 

~\Anaconda3\lib\site-packages\sklearn\ensemble\_base.py in _validate_estimators(self)
    247                 raise ValueError(
    248                     "The estimator {} should be a {}.".format(
--> 249                         est.__class__.__name__, is_estimator_type.__name__[3:]
    250                     )
    251                 )

ValueError: The estimator KerasClassifier should be a classifier.

我正在使用Sci-kit学习版本2.2,TF版本2.x。我见过类似的错误here,但不想重写我的代码并使用MLextend库。

tensorflow keras scikit-learn ensemble-learning
1个回答
0
投票

此问题是由于here报告了类似的问题VotingClassifier

解决方案只是将此_estimator_type='classifier'添加到KerasClassifier

注意:请仅提供最少的代码来重现您的问题。

from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np
from tensorflow.keras import layers
from tensorflow import keras
from keras.constraints import maxnorm

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation,  Flatten, Input
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras import metrics
import joblib
from joblib import parallel_backend
np.random.seed(42)
from sklearn.model_selection import GridSearchCV
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import sklearn
from sklearn.ensemble import StackingClassifier
from sklearn.neighbors import KNeighborsClassifier

def create_model ():
    # create model
    model = Sequential()
    model.add(Dense(20, input_dim=20, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    optimizer= keras.optimizers.RMSprop(lr=0.001)
    model.add(Dense(units = 1, activation = 'sigmoid'))  # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer, metrics=[keras.metrics.AUC(), 'accuracy'])
    return model


NN_clf=KerasClassifier(build_fn=create_model, epochs=15, batch_size= 32)
NN_clf._estimator_type = "classifier"

RF_clf =RandomForestClassifier(random_state=42, oob_score=True)
KN_clf =KNeighborsClassifier()
SV_clf =  SVC(random_state=42, probability=True)
EX_clf= ExtraTreesClassifier(random_state=42)
LR_clf=LogisticRegression(random_state=42,)

estimators= [('RF', RF_clf), ('EX', EX_clf), ('LR',LR_clf), ('KN', KN_clf),
            ('SV', SV_clf), ('NN', NN_clf) ]
clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

X, y = make_classification()

from sklearn.model_selection import train_test_split

X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.3)


clf.fit(X_train, y_train)
print("Stacking model score: %.3f" % clf.score(X_test, y_test))

# Stacking model score: 0.967
© www.soinside.com 2019 - 2024. All rights reserved.