AttributeError:无法设置属性:如何修复这个类才能正常工作?

问题描述 投票:0回答:1

smoteboost.py
文件中给出以下 SMOTEBoost 类实现:

import numbers
import numpy as np
from collections import Counter
from sklearn.base import (clone,
                          is_regressor)
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble._forest import BaseForest
from sklearn.preprocessing import normalize
from sklearn.tree import BaseDecisionTree
from sklearn.utils import (check_random_state,
                           check_X_y,
                           check_array,
                           _safe_indexing)
from imblearn.utils import check_neighbors_object
from imblearn.over_sampling import SMOTE

__all__ = ['SMOTEBoost']

MAX_INT = np.iinfo(np.int32).max


class SMOTEBoost(AdaBoostClassifier):

    def __init__(self,
                 k_neighbors=5,
                 base_estimator=None,
                 n_estimators=50,
                 learning_rate=1.,
                 sampling_strategy="auto",
                 algorithm='SAMME.R',
                 random_state=None,
                 n_jobs=1):

        super(AdaBoostClassifier, self).__init__( 
            base_estimator=base_estimator,
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            random_state=random_state)

        self.algorithm = algorithm
        self.k_neighbors = k_neighbors
        self.sampling_strategy = sampling_strategy
        self.n_jobs=n_jobs

    def _validate_estimator(self, default=AdaBoostClassifier()):

        if not isinstance(self.n_estimators, (numbers.Integral, np.integer)):
            raise ValueError("n_estimators must be an integer, "
                             "got {0}.".format(type(self.n_estimators)))

        if self.n_estimators <= 0:
            raise ValueError("n_estimators must be greater than zero, "
                             "got {0}.".format(self.n_estimators))

        if self.base_estimator is not None:
            base_estimator = clone(self.base_estimator)
        else:
            base_estimator = clone(default)

        if isinstance(self.sampling_strategy, dict) and self.sampling_strategy != {}:
            raise ValueError("'dict' type cannot be accepted for ratio in this class; "
                             "use alternative options")

        self.nn_k_ = check_neighbors_object('k_neighbors',
                                            self.k_neighbors,
                                            additional_neighbor=1)
        self.nn_k_.set_params(**{'n_jobs': self.n_jobs})

        self.smote = SMOTE(sampling_strategy=self.sampling_strategy, k_neighbors=self.k_neighbors,
                           random_state=self.random_state)

        self.base_estimator_ = base_estimator

    def fit(self, X, y, sample_weight=None):
        if self.algorithm not in ('SAMME', 'SAMME.R'):
            raise ValueError("algorithm %s is not supported" % self.algorithm)
        # Check parameters
        if self.learning_rate <= 0:
            raise ValueError("learning_rate must be greater than zero")

        if (self.base_estimator is None or
                isinstance(self.base_estimator, (BaseDecisionTree,
                                                 BaseForest))):
            DTYPE = np.float64
            dtype = DTYPE
            accept_sparse = 'csc'
        else:
            dtype = None
            accept_sparse = ['csr', 'csc']

        X, y = check_X_y(X, y, accept_sparse=accept_sparse, dtype=dtype,
                         y_numeric=is_regressor(self))

        if sample_weight is None:
            # Initialize weights to 1 / n_samples
            sample_weight = np.empty(X.shape[0], dtype=np.float64)
            sample_weight[:] = 1. / X.shape[0]
        else:
            sample_weight = check_array(sample_weight, ensure_2d=False)
            # Normalize existing weights
            sample_weight = sample_weight / sample_weight.sum(dtype=np.float64)

            # Check that the sample weights sum is positive
            if sample_weight.sum() <= 0:
                raise ValueError(
                    "Attempting to fit with a non-positive "
                    "weighted number of samples.")
        # Check parameters
        self._validate_estimator()
        # Clear any previous fit results
        self.estimators_ = []
        self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64)
        self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64)

        random_state = check_random_state(self.random_state)

        for iboost in range(self.n_estimators):
            # SMOTE step
            target_stats = Counter(y)
            min_class = min(target_stats, key=target_stats.get)
            n_sample_majority = max(target_stats.values())
            n_samples = n_sample_majority - target_stats[min_class]
            target_class_indices = np.flatnonzero(y == min_class)
            X_class = _safe_indexing(X, target_class_indices)
            self.nn_k_.fit(X_class)
            nns = self.nn_k_.kneighbors(X_class, return_distance=False)[:, 1:]
#smote._make_samples(X_class, y.dtype, 
            X_new, y_new = self.smote._make_samples(X_class, y.dtype, min_class, X_class,   
                                              nns, n_samples, 1.0)
            # Normalize synthetic sample weights based on current training set.
            sample_weight_syn = np.empty(X_new.shape[0], dtype=np.float64)
            sample_weight_syn[:] = 1. / X.shape[0]
            # Combine the original and synthetic samples.
            X = np.vstack((X, X_new))
            y = np.append(y, y_new)
            # Combine the weights.
            sample_weight = \
                np.append(sample_weight, sample_weight_syn).reshape(-1, 1)
            sample_weight = \
                np.squeeze(normalize(sample_weight, axis=0, norm='l1'))
            # Boosting step
            sample_weight, estimator_weight, estimator_error = self._boost(
                iboost,
                X, y,
                sample_weight,
                random_state)
            # Early termination
            if sample_weight is None:
                break
            self.estimator_weights_[iboost] = estimator_weight
            self.estimator_errors_[iboost] = estimator_error
            # Stop if error is zero
            if estimator_error == 0:
                break
            sample_weight_sum = np.sum(sample_weight)
            # Stop if the sum of sample weights has become non-positive
            if sample_weight_sum <= 0:
                break
            if iboost < self.n_estimators - 1:
                # Normalize
                sample_weight /= sample_weight_sum

        return self

我正在尝试让它工作,但不知道如何修复。 重现:

from sklearn.datasets import make_classification
from smoteboost import SMOTEBoost
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=1000, n_features=10, n_classes=5,
   n_informative=4, weights=[0.22,0.03,0.16,0.51,0.05])

X_train,X_test,y_train,y_test=train_test_split(X,y)
smt = SMOTEBoost()
smt.fit(X_train, y_train)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "~/smoteboost.py", line 176, in fit
    self._validate_estimator()
  File "~/smoteboost.py", line 129, in _validate_estimator
    self.base_estimator_ = base_estimator
AttributeError: can't set attribute

我理解错误消息表明

SMOTEBoost
对象没有属性
estimator_
。所以我尝试这样设置:

self.set_params(base_estimator_=self.base_estimator)

Error:
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "~/smoteboost.py", line 177, in fit
    self._validate_estimator()
  File "~/smoteboost.py", line 130, in _validate_estimator
    self.set_params(base_estimator_=self.base_estimator) #self.set_params(base_estimator=self.base_estimator_)
  File "~/venv/lib/python3.9/site-packages/sklearn/base.py", line 205, in set_params
    raise ValueError(
ValueError: Invalid parameter 'base_estimator_' for estimator SMOTEBoost(). Valid parameters are: ['algorithm', 'base_estimator', 'k_neighbors', 'learning_rate', 'n_estimators', 'n_jobs', 'random_state', 'sampling_strategy'].

编辑

scikit-learn 版本:

import sklearn
sklearn.__version__
'1.2.2'
python numpy scikit-learn
1个回答
0
投票

我不会分配或更改名称以下划线结尾的 scikit-learn 类的属性。似乎这些属性是生成的(在调用

.fit
方法后进行计算),以及粗略的只读属性(所谓的“估计属性”,根据 scikit-learn 文档

此外,从 1.2 版本开始,

base_estimator

 已更名为 
estimator
。所以使用后者。

这建议你应该使用

self.set_params(estimator=estimator)
并将所有出现的 

base_estimator

 替换为简单的 
estimator
,并且不分配给 
estimator_

© www.soinside.com 2019 - 2024. All rights reserved.