运行时警告:double_scalars diff += X_values * (y_values - ((X_values * m) + b)) 中遇到溢出

问题描述 投票:0回答:0
import math, copy 

import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("D:/Code/Vscode/test1/honeyproduction (3).csv")
prod_per_year = df.groupby('year').totalprod.mean().reset_index()

X = prod_per_year["year"].values.reshape(-1,)
y = prod_per_year["totalprod"].values.reshape(-1,)
prod_per_year.fillna(method='ffill', inplace=True)

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X.reshape(-1,1))
y_scaled = scaler.fit_transform(y.reshape(-1,1))


def get_gradient_at_b(X, y, b, m):
    N = len(X)
    diff = 0
    for i in range(N):
        X_values = X[i]
        y_values = y[i]
        diff += y_values - ((X_values * m) + b)
    b_gradient = -(2/N) * diff
    return b_gradient    

def get_gradient_at_m(X, y, b, m):
    N = len(X)
    diff = 0
    for i in range(N):
        X_values = X[i]
        y_values = y[i]
        diff += X_values * (y_values - ((X_values * m) + b))
    m_gradient = -(2/N) * diff
    return m_gradient

def step_gradient(X, y, learning_rate, b_current, m_current):
    b_gradient = get_gradient_at_b(X, y, b_current, m_current)
    m_gradient = get_gradient_at_m(X, y, b_current, m_current)
    b = b_current - (b_gradient * learning_rate)
    m = m_current - (m_gradient * learning_rate)
    return [b, m]

def gradient_descent(X, y, learning_rate, num_iterations):
    b = 0
    m = 0
    for i in range(num_iterations):
        b, m = step_gradient(X, y, learning_rate, b, m)
    return b, m

b, m = gradient_descent(X, y, 0.004, 1000)

y_pred_scaled = [(x * m + b) for x in X_scaled]
y_pred_scaled = np.array(y_pred_scaled) 
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1,1))
X = X.tolist()
y = y.tolist()
print(X)
print(np.shape(X))
print(y)
print(y_pred)
print(np.shape(y_pred))

plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.show()

我尝试缩放 X,y,尝试修复任何缺失值,打印出 y_pred 后仍然是 NaN,

我还遇到了运行时错误 invalid value encountered in double_scalars m = m_current - (m_gradient * learning_rate)
所以 rn 我尝试降低学习率或增加迭代次数以查看是否可以提高模型的性能,

任何人都可以检查我的代码,谢谢大家!

pandas numpy machine-learning scikit-learn linear-regression
© www.soinside.com 2019 - 2024. All rights reserved.