我有四个训练有素的模型。 2 个 VotingClassifier 模型和 2 个 StackingClassifier 模型。它们已经经过训练并以 joblib 格式保存。我什至可以加载它们并使用它们进行预测(在产品中,使用它们的预测方法)。但是当我尝试将它们全部堆叠起来创建一个新的 StackingClassifier 时,我遇到了“未安装”错误。
代码:
class JoblibModelWrapper(BaseEstimator, ClassifierMixin):
def __init__(self, model):
self.model = model
def fit(self, X, y=None):
# dont fit because self.model is already fitted
return self
def predict(self, X):
return self.model.predict(X)
def predict_proba(self, X):
if hasattr(self.model, "predict_proba"):
return self.model.predict_proba(X)
else:
raise RuntimeError("Le modèle sous-jacent ne supporte pas predict_proba")
import os
import joblib
import pandas as pd
models_directory = '/content/drive/MyDrive/X/preprod_models'
joblib_models = {}
files = os.listdir(models_directory)
total_models_count = sum(1 for filename in files if filename.endswith('.joblib'))
current_model_index = 0
for filename in files:
model_path = os.path.join(models_directory, filename)
model_name = filename[:-7] # remove .joblib
# load trained models already saved in joblib format (they are used in prod today, and works perfectly when we call .predict method)
model = joblib.load(model_path)
joblib_models[model_name] = model
current_model_index += 1
print(f'Modèle {current_model_index} / {total_models_count} processed: {model_name}')
print('Total processed: {total_models_count}')
wrapped_joblib_models = [
(name.replace(' ', '_').replace('__', '_'), JoblibModelWrapper(model)) for name, model in joblib_models.items()
]
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
stacking_models = StackingClassifier(
estimators=wrapped_joblib_models ,
final_estimator=LogisticRegression(solver='saga', max_iter=10000),
cv=cv,
verbose=3,
passthrough=True,
stack_method='predict',
# n_jobs=-1
)
def evaluate_model(model_name, X_train, y_train, X_test, y_test, X_train_no_encoded, model = None):
print(f'======================================{model_name}=============================')
print(mapping)
threshold = 0.0005
model_path = f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib'
model.fit(X_train, y_train)
dump(model, f'/content/drive/MyDrive/X/prod_models/{model_name}.joblib')
print('modèle sauvegardé')
plot_confusion_matrix_normalized(model, X_test, y_test)
plot_confusion_matrix_normalized(model, X_train, y_train)
# Courbe ROC
n_classes = len(np.unique(y_train)) # Nombre de classes uniques dans y_train
if hasattr(model, "predict_proba"):
plot_multiclass_roc_curve(model, X_test, y_test, n_classes)
plot_multiclass_roc_curve(model, X_train, y_train, n_classes)
evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models)
我收到此错误
---------------------------------------------------------------------------
======================================PROD_ALL_TRAINED_MODELS_STACKED=============================
{'X': 0, 'Y': 1, 'Z': 2}
modèle sauvegardé
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
<ipython-input-19-89e6497ba9b5> in <cell line: 30>()
28 )
29
---> 30 evaluate_model('PROD_ALL_TRAINED_MODELS_STACKED', X_train_balanced, y_train_balanced, X_test_scaled, y_test, X_train, model=stacking_models)
31
32 # all_models[0]
8 frames
/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py in check_is_fitted(estimator, attributes, msg, all_or_any)
1620
1621 if not _is_fitted(estimator, attributes, all_or_any):
-> 1622 raise NotFittedError(msg % {"name": type(estimator).__name__})
1623
1624
NotFittedError: This StackingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
它是通过像这样重构包装器来工作的:
from sklearn.exceptions import NotFittedError
from sklearn.utils.validation import check_is_fitted
class JoblibModelWrapper(BaseEstimator, ClassifierMixin):
def __init__(self, model, is_fitted=True):
self.model = model
self.is_fitted = is_fitted
if is_fitted:
self.fitted_ = True
def fit(self, X, y=None):
self.fitted_ = True
return self
def predict(self, X):
check_is_fitted(self, 'fitted_')
return self.model.predict(X)
def predict_proba(self, X):
check_is_fitted(self, 'fitted_')
if hasattr(self.model, "predict_proba"):
return self.model.predict_proba(X)
else:
raise RuntimeError("Le modèle sous-jacent ne supporte pas predict_proba")