import math, copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("D:/Code/Vscode/test1/honeyproduction (3).csv")
prod_per_year = df.groupby('year').totalprod.mean().reset_index()
X = prod_per_year["year"].values.reshape(-1,)
y = prod_per_year["totalprod"].values.reshape(-1,)
prod_per_year.fillna(method='ffill', inplace=True)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X.reshape(-1,1))
y_scaled = scaler.fit_transform(y.reshape(-1,1))
def get_gradient_at_b(X, y, b, m):
N = len(X)
diff = 0
for i in range(N):
X_values = X[i]
y_values = y[i]
diff += y_values - ((X_values * m) + b)
b_gradient = -(2/N) * diff
return b_gradient
def get_gradient_at_m(X, y, b, m):
N = len(X)
diff = 0
for i in range(N):
X_values = X[i]
y_values = y[i]
diff += X_values * (y_values - ((X_values * m) + b))
m_gradient = -(2/N) * diff
return m_gradient
def step_gradient(X, y, learning_rate, b_current, m_current):
b_gradient = get_gradient_at_b(X, y, b_current, m_current)
m_gradient = get_gradient_at_m(X, y, b_current, m_current)
b = b_current - (b_gradient * learning_rate)
m = m_current - (m_gradient * learning_rate)
return [b, m]
def gradient_descent(X, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b, m = step_gradient(X, y, learning_rate, b, m)
return b, m
b, m = gradient_descent(X, y, 0.004, 1000)
y_pred_scaled = [(x * m + b) for x in X_scaled]
y_pred_scaled = np.array(y_pred_scaled)
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1,1))
X = X.tolist()
y = y.tolist()
print(X)
print(np.shape(X))
print(y)
print(y_pred)
print(np.shape(y_pred))
plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.show()
我尝试缩放 X,y,尝试修复任何缺失值,打印出 y_pred 后仍然是 NaN,
我还遇到了运行时错误 invalid value encountered in double_scalars
m = m_current - (m_gradient * learning_rate)
所以 rn 我尝试降低学习率或增加迭代次数以查看是否可以提高模型的性能,
任何人都可以检查我的代码,谢谢大家!