无法使用 sckit learn 1.4.1.post1 堆叠经过训练的模型

问题描述 投票:0回答:1

我有四个训练有素的模型。 2 个 VotingClassifier 模型和 2 个 StackingClassifier 模型。它们已经经过训练并以 joblib 格式保存。我什至可以加载它们并使用它们进行预测(在产品中,使用它们的预测方法)。但是当我尝试将它们全部堆叠起来创建一个新的 StackingClassifier 时,我遇到了“未安装”错误。

代码:

class JoblibModelWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y=None):
        # dont fit because self.model is already fitted
        return self

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        if hasattr(self.model, "predict_proba"):
            return self.model.predict_proba(X)
        else:
            raise RuntimeError("Le modèle sous-jacent ne supporte pas predict_proba")


import os
import joblib
import pandas as pd

models_directory = '/content/drive/MyDrive/X/preprod_models'
joblib_models = {}
files = os.listdir(models_directory)
total_models_count = sum(1 for filename in files if filename.endswith('.joblib'))
current_model_index = 0

for filename in files:
        model_path = os.path.join(models_directory, filename)
        model_name = filename[:-7]  # remove .joblib
        # load trained models already saved in joblib format (they are used in prod today, and works perfectly when we call .predict method)
        model = joblib.load(model_path)
        joblib_models[model_name] = model
        current_model_index += 1
        print(f'Modèle {current_model_index} / {total_models_count} processed: {model_name}')

print('Total processed: {total_models_count}')

wrapped_joblib_models = [
    (name.replace(' ', '_').replace('__', '_'), JoblibModelWrapper(model)) for name, model in joblib_models.items()
]

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stacking_models = StackingClassifier(
    estimators=wrapped_joblib_models ,
    final_estimator=LogisticRegression(solver='saga', max_iter=10000),
    cv=cv,
    verbose=3,
    passthrough=True,
    stack_method='predict',
    # n_jobs=-1
)


def evaluate_model(model_name, X_train, y_train, X_test, y_test, X_train_no_encoded, model = None):
    print(f'======================================{model_name}=============================')
    print(mapping)

    threshold = 0.0005
    model_path = f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib'

    model.fit(X_train, y_train)
    dump(model, f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib')
     
    print('modèle sauvegardé')


    plot_confusion_matrix_normalized(model, X_test, y_test)
    plot_confusion_matrix_normalized(model, X_train, y_train)

    # Courbe ROC
    n_classes = len(np.unique(y_train))  # Nombre de classes uniques dans y_train
    if hasattr(model, "predict_proba"):
        plot_multiclass_roc_curve(model, X_test, y_test, n_classes)
        plot_multiclass_roc_curve(model, X_train, y_train, n_classes)

evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models)

我收到此错误

---------------------------------------------------------------------------
======================================PROD_ALL_TRAINED_MODELS_STACKED=============================
{'X': 0, 'Y': 1, 'Z': 2}
modèle sauvegardé
---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
<ipython-input-19-89e6497ba9b5> in <cell line: 30>()
     28 )
     29 
---> 30 evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models)
     31 
     32 # all_models[0]

8 frames
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
   1620 
   1621     if not _is_fitted(estimator, attributes, all_or_any):
-> 1622         raise NotFittedError(msg % {"name": type(estimator).__name__})
   1623 
   1624 

NotFittedError: This StackingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

python tensorflow machine-learning keras scikit-learn
1个回答
0
投票

它是通过像这样重构包装器来工作的:

from sklearn.exceptions import NotFittedError
from sklearn.utils.validation import check_is_fitted
 
class JoblibModelWrapper(BaseEstimator, ClassifierMixin):
    def __init__(self, model, is_fitted=True):
        self.model = model
        self.is_fitted = is_fitted
        if is_fitted:
            self.fitted_ = True

    def fit(self, X, y=None):
        self.fitted_ = True
        return self

    def predict(self, X):
        check_is_fitted(self, 'fitted_')
        return self.model.predict(X)

    def predict_proba(self, X):
        check_is_fitted(self, 'fitted_')
        if hasattr(self.model, "predict_proba"):
            return self.model.predict_proba(X)
        else:
            raise RuntimeError("Le modèle sous-jacent ne supporte pas predict_proba")
© www.soinside.com 2019 - 2024. All rights reserved.