使用梯度下降时，线性回归模型的训练误差和测试误差非常相似

Question

我必须实现梯度下降来学习 B0 和 B1 来预测线性多项式方程，程序按预期运行，但绘图显示模型的训练误差和测试误差非常相似。

以下是程序：

import random

# Importing the numerical Python library
import numpy as np

# Importing matplotlib for plotting the graph
import matplotlib.pyplot as plt


# Function to generate a dataset based on the input x
def datasetGenerator(x: int) -> int:
    return ((2 * x) - 3) + np.random.normal(0, 5)


# mean squared error calculation for comparing the actual y with the prediction
def meanSquaredError(y: float, y_pred: float) -> float:
    # summation of squares of all the y with y predictions divided by the length of y using the np.mean method
    # return np.mean((y - y_pred) ** 2)
    return np.mean((y - y_pred) ** 2)


def gradientDescent(
    X: list[float],
    Y: list[float],
    Y_pred: list[float],
    B0: float,
    B1: float,
    learningRate: float,
):
    # doe E by doe B0
    B0 -= learningRate * np.mean(-2 * (Y - Y_pred))
    # doe E by doe B1
    B1 -= learningRate * np.mean(-2 * (Y - Y_pred) * (X))
    return B0, B1


def betaCalculation(X: list[float], Y: list[float], n: int) -> int:
    # numpy array of X power 1 to 5
    Xtrans = [np.power(X, i) for i in range(n + 1)]

    # actual X values in form of X tranpose's transpose
    Xnew = np.transpose(Xtrans)

    # dot product of X transpose with X
    XTX = np.matmul(Xtrans, Xnew)

    # inverse of dot product of X Transpose with X
    XTXm1 = np.linalg.inv(XTX)

    # dot product of inverse of dot product of X transpose and X with X tranpose
    XTXinvintoXT = np.matmul(XTXm1, Xtrans)

    # Dot product of dot product of inverse of dot product of X transpose and X with X tranpose with Y
    Beta = np.matmul(XTXinvintoXT, Y)

    # array of beta elements
    return Beta


def optimalFit(
    B0: float,
    B1: float,
    X_train: list[float],
    Y_train: list[float],
    X_test: list[float],
    Y_test: list[float],
    learningRate,
) -> tuple[list[list[float]], list[float], list[int]]:
    flag = True
    Ynew = Y_train
    epsTrainArr = []
    epsTestArr = []
    epochsArr = []
    B = []
    epochs = 0

    while flag:
        # prediction of the model for previous B0 and B1
        Y_pred_train = B0 + B1 * X_train
        Y_pred_test = B0 + B1 * X_test

        # checking if the code converged or not
        flag = False if meanSquaredError(Ynew, Y_pred_train) <= 1e-6 else True

        # finding the error of the mean squared error
        eps_train = meanSquaredError(Y_train, Y_pred_train)
        eps_test = meanSquaredError(Y_test, Y_pred_test)

        # calculating new B0 and B1 using gradient descent
        B0, B1 = gradientDescent(X_train, Y_train, Y_pred_train, B0, B1, learningRate)
        B.append([B0, B1])
        Ynew = Y_pred_train

        # adding the epoch value to see how it converges
        epochs += 1

        # appending the arror onto the eps array it has eps until the model converges
        epsTrainArr.append(eps_train)
        epsTestArr.append(eps_test)

        # appending the apochs onto the epochs array it has epoch count until the model converges
        epochsArr.append(epochs)

    return B, epsTrainArr, epsTestArr, epochsArr


# gets executed when the file is executed
def main():
    # generating y values with the
    # Generating 100 evenly spaced numbers between -5 and 5
    # contains the whole population of data
    X_init = np.linspace(-5, 5, 1000)

    # Calling the datasetGenerator function to get y for every x in X
    Y_init = datasetGenerator(X_init)

    XYtup = []
    for i in range(len(X_init)):
        XYtup.append(tuple([X_init[i], Y_init[i]]))

    XYtup = np.array(XYtup)
    rng = np.random.default_rng()
    rng.shuffle(XYtup)
    XYtup = XYtup.tolist()

    # getting the training dataset
    # The X array is sliced to get the training array, this is also the X^1 array
    X = np.array([XYtup[i][0] for i in range(len(X_init))])
    Y = np.array([XYtup[i][1] for i in range(len(X_init))])

    # Splitting the dataset into training, validation, and testing sets
    split_idx_train = int(X.shape[0] * 0.8)

    # creating the test and training part of the dataset
    train_data = random.sample(XYtup, split_idx_train)
    test_data = [i for i in XYtup if i not in train_data]

    X_train, Y_train = zip(*train_data)
    X_test, Y_test = zip(*test_data)

    X_train = np.array(X_train)
    Y_train = np.array(Y_train)

    X_test = np.array(X_test)
    Y_test = np.array(Y_test)

    B0 = np.random.normal(0, 1)
    B1 = np.random.normal(0, 1)
    # putting the B0 and B1 in a list
    B = [[B0, B1]]
    learningRate = 0.1

    B, epsTrainArr, epsTestArr, epochsarr = optimalFit(
        B0, B1, X_train, Y_train, X_test, Y_test, learningRate
    )
    plt.plot(
        epochsarr,
        epsTrainArr,
        label=f"Epoch vs Training Error for learning rate: {learningRate}",
        c="r",
    )
    plt.plot(
        epochsarr,
        epsTestArr,
        label=f"Epoch vs Testing Error for learning rate: {learningRate}",
        c="b",
    )
    plt.title(
        "The epoch count vs the epsilon of the model for different learning rates"
    )
    # plotting with the x label as epochs
    plt.xlabel("Epochs")
    # plotting with the y label as error
    plt.ylabel("Error")
    # having a description for the graph to explain what it does
    plt.figtext(
        0.5,
        0.01,
        f"this plot represents how the mean squared error of the model decreases as the number of epoch increases for different learning rates",
        wrap=True,
        horizontalalignment="center",
        fontsize=10,
        bbox={"facecolor": "grey", "alpha": 0.3, "pad": 5},
    )
    # legends to explain which coloured line represents which learning rate
    plt.legend()
    plt.show()

    learningRate = 0.001
    while learningRate <= 0.1:
        B, epsTrainArr, epsTestArr, epochsarr = optimalFit(
            B0, B1, X_train, Y_train, X_test, Y_test, learningRate
        )
        epochs = epochsarr[-1]

        # printing the Beta computed using gradient descent and closedform solution
        print(
            f"B0 and B1 after the model converges: {B[-1]} with error {epsTrainArr[-1]}"
        )
        print(f"B0 and B1 for closed form solution: {betaCalculation(X, Y, 1)}")

        # printing the epoch value to see how it converges
        print(
            f"number of epochs needed for convergence: {epochs} for learning rate: {learningRate}\n"
        )
        plt.plot(
            epochsarr[5:],
            epsTrainArr[5:],
            label=f"Epoch vs Error for learning rate: {learningRate}",
        )

        learningRate *= 10
    print(len(B))

    # setting the title of the graph to specify which axis has which variable and other things
    plt.title(
        "The epoch count vs the epsilon of the model for different learning rates"
    )
    # plotting with the x label as epochs
    plt.xlabel("Epochs")
    # plotting with the y label as error
    plt.ylabel("Error")
    # having a description for the graph to explain what it does
    plt.figtext(
        0.5,
        0.01,
        f"this plot represents how the mean squared error of the model decreases as the number of epoch increases for different learning rates",
        wrap=True,
        horizontalalignment="center",
        fontsize=10,
        bbox={"facecolor": "grey", "alpha": 0.3, "pad": 5},
    )
    # legends to explain which coloured line represents which learning rate
    plt.legend()
    plt.show()


if __name__ == "__main__":
    main()

这是绘图输出

纪元 vs 训练误差和纪元 vs 测试误差

Answer 1

让我们绘制 X 与 Y 的对比：

plt.scatter(X_init, Y_init)
plt.show()

这只是一条直线。如果这条线上有两个点，则可以完美预测这条线上的任何点。

让我们看看如何生成 Y。

# Function to generate a dataset based on the input x
def datasetGenerator(x: int) -> int:
    return ((2 * x) - 3) + np.random.normal(0, 5)

这对 X 进行线性变换，然后添加一些随机噪声。但请注意，

np.random.normal(0, 5)

抽取单个随机样本。然后将该随机样本“广播”到数组。这意味着相同的随机值将添加到所有值中。您可以为每个 X 值生成一个随机值：

# Function to generate a dataset based on the input x def datasetGenerator(x: int) -> int: return ((2 * x) - 3) + np.random.normal(0, 5, size=len(x))

这为您提供了更真实的数据集。

这也使得你的测试和训练错误有所不同。

使用梯度下降时，线性回归模型的训练误差和测试误差非常相似

问题描述投票：0回答：1

1个回答

最新问题

使用梯度下降时，线性回归模型的训练误差和测试误差非常相似

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1