逻辑回归实现 - 损失不收敛且模型结果不佳

Question

我正在尝试实现逻辑回归模型，二元分类器。我必须使用随机梯度下降和二元交叉熵梯度的封闭形式。尝试根据数据训练模型后，模型似乎无法正常工作：迭代次数的损失不会减少，也不会收敛，仅当 eta=0.002 时，并且在我们设置一个更大的 eta 后立即。（我们设置 eta>>0.002，然后重置 eta=0.002，然后它可能会收敛）。之后，评估模型性能给出非常差的结果，类似于将所有测试示例预测为相同的朴素模型：

Confusion Matrix:
[[344. 240.]
 [294. 322.]]
True Negatives (TN): 322.0
False Positives (FP): 294.0
False Negatives (FN): 240.0
True Positives (TP): 344.0
Sensitivity (Se): 0.589041095890411
Specificity (Sp): 0.5227272727272727
Positive Predictive Value (PPV): 0.5391849529780565
Negative Predictive Value (NPV): 0.5729537366548043
Accuracy (Acc): 0.555
F1 Score: 0.563011456628478
Area Under the ROC Curve (AUC): 0.555.

这个实现有什么问题？

import numpy as np
import matplotlib.pyplot as plt


def sigmoid(z):
    sig = 1 / (1+np.exp(-z)) 
    return sig

class ManualLogisticRegression:
    def __init__(self, random_state=1):
        np.random.seed(random_state)
        self.w = np.random.randn(5)

    def fit(self, X, Y, eta=0.005, plot=False):

        if plot:
            loss_vec = np.zeros(len(X))
        for idx, (x, y) in enumerate(zip(X, Y)):

            z = np.dot(x, self.w)
            a = sigmoid(z)
            grad = np.dot(x.T, (a - y))
            self.w -= eta * grad
            if plot:
                loss_vec[idx] = self.log_loss(X, Y)
        if plot:
            plt.plot(loss_vec)
            plt.xlabel('# of iterations')
            plt.ylabel('Loss')

    def log_loss(self, x, y):

        z = np.dot(x, self.w)
        p = sigmoid(z)
        epsilon = 1e-5
        p = np.clip(p, epsilon, 1 - epsilon)
        log_loss = (-1 / len(x)) * np.sum(y * np.log(p) + (1 - y) * np.log(1 - p))
        
        return log_loss

    def predict_proba(self, x):
        """
        This function computes the probability of every example in x to belong to the class "1" using the trained model.
        :param x: Feature matrix (could be also a single vector).
        :return: vector at the length of examples in x where every element is the probability to belong to class "1" per example.
        """

        z = np.dot(x, self.w)
        y_pred_proba = sigmoid(z)

        return y_pred_proba

    def predict(self, x, thresh=0.5):
        """
        This function labels every example according to the calculated probability with the use of a threshold.
        :param x: Feature matrix (could be also a single vector).
        :param thresh: decision threshold.
        :return: vector at the length of examples in x where every element is the estimated label (0 or 1) per example.
        """

        z = np.dot(x, self.w)
        probabilities = sigmoid(z)
        y_pred = np.where(probabilities >= thresh, 1, 0)

        return y_pred

    def score(self, x, y):
        """
        This function computes the accuracy of the trained model's estimations.
        :param x: Feature matrix (could be also a single vector).
        :param y: Adequate true labels (either 1 or 0).
        :return: Estimator's accuracy.
        """
        return np.sum(self.predict(x) == y)/len(y)

    def conf_matrix(self, x, y):
        """
        This function computes the confusion matrix for the prediction of the trained model. First value of the matrix
        was given as a hint.
        :param x: Feature matrix (could be also a single vector).
        :param y: Adequate true labels (either 1 or 0).
        :return: Confusion matrix.
        """
        conf_mat = np.zeros((2, 2))
        y_pred = self.predict(x)
        
        conf = (y_pred == y)
        conf_mat[0, 0] += np.sum(1 * (conf[y_pred == 0] == 1))

           #the code provided is checking if the prediction is matching a true positive case, so it is
           #calculating the number of TN,if y==0 we get True, if y==1 we get False.
        conf_mat[1, 0] += np.sum(1 * (conf[y_pred == 0] == 0)) # FN
        conf_mat[0, 1] += np.sum(1 * (conf[y_pred == 1] == 0)) # FP
        conf_mat[1, 1] += np.sum(1 * (conf[y_pred == 1] == 1)) # TP


        # --------------------------------------------------------------------------------------
        return conf_mat

这是笔记本本身：

%load_ext autoreload
%autoreload 2
from manual_log_reg import ManualLogisticRegression
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score

X = pd.read_csv('X_data.csv')
X.drop(columns=X.columns[0], axis=1, inplace=True)
X.head()

y = pd.read_csv('y_data.csv')  # read and convert to numpy
y.drop(columns=y.columns[0], axis=1, inplace=True)
y.head()

X = X.values  # convert to numpy
y = y.values.astype(int).flatten()  # convert to numpy integers and flatten
X = np.concatenate((np.ones((len(y), 1)), X), axis=1) # add bias term

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

log_reg = ManualLogisticRegression()

log_reg.fit(X_train, y_train, eta=0.003, plot=True)


sorted_weights = np.sort(np.abs(log_reg.w[:-1]))  # Exclude bias term and sort by absolute value
most_important_feature_index = np.argmax(np.abs(log_reg.w[:-1]))  # Find the index of the most important feature
most_important_feature_weight = log_reg.w[most_important_feature_index]  # Get the weight of the most important feature
print(f"The most important feature is feature {most_important_feature_index + 1} with weight {most_important_feature_weight}.")

conf_matrix = log_reg.conf_matrix(X_test, y_test)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate additional performance metrics
TN = conf_matrix[1, 1]
FP = conf_matrix[1, 0]
FN = conf_matrix[0, 1]
TP = conf_matrix[0, 0]
Se = TP / (TP + FN)
Sp = TN / (TN + FP)
PPV = TP / (TP + FP)
NPV = TN / (TN + FN)
Acc = (TP + TN) / (TP + TN + FP + FN)
F1 = 2 * (PPV * Se) / (PPV + Se)

# Calculate AUC using the score method of ManualLogisticRegression
AUC = log_reg.score(X_test, y_test)

# Report the performance metrics
print(f"True Negatives (TN): {TN}")
print(f"False Positives (FP): {FP}")
print(f"False Negatives (FN): {FN}")
print(f"True Positives (TP): {TP}")
print(f"Sensitivity (Se): {Se}")
print(f"Specificity (Sp): {Sp}")
print(f"Positive Predictive Value (PPV): {PPV}")
print(f"Negative Predictive Value (NPV): {NPV}")

print(f"Accuracy (Acc): {Acc}")
print(f"F1 Score: {F1}")
print(f"Area Under the ROC Curve (AUC): {AUC}")
conf_mat = log_reg.conf_matrix(X_test, y_test)
import seaborn as sns
import matplotlib.pyplot as plt
# Plot confusion matrix
sns.heatmap(conf_mat, annot=True, cmap='Blues', fmt='g')
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

Answer 1

您的实现中最大的问题可能是关于导数，导数 wrt 向量 w 实际上应该是向量而不是缩放器，计算梯度的正确方法是：

grad = (a - y) * x

其次，为单个时期训练模型似乎不是一个合理的选择。尝试通过添加以下内容来运行多个纪元：

def fit(self, X, Y, epochs=10, eta=0.005, plot=False):
       
        if plot:
            loss_vec = np.zeros(epochs)
        for epoch in range(epochs):
            for (x, y) in zip(X, Y):
                z = np.dot(x, self.w)
                a = sigmoid(z)
                grad = (a - y) * x
                self.w -= eta * grad
                epoch_loss += self.log_loss(
            if plot:
               loss_vec[epoch] = self.log_loss(X, Y)
        if plot:
            plt.plot(loss_vec)
            plt.xlabel('# of iterations')
            plt.ylabel('Loss')

逻辑回归实现 - 损失不收敛且模型结果不佳

问题描述投票：0回答：1

1个回答

最新问题

逻辑回归实现 - 损失不收敛且模型结果不佳

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1