我有一个数据集,并尝试利用二维回归来获取周围曲线的斜率以近似缺失值来填充缺失值。我不确定这是否是正确的方法,但我愿意倾听其他想法。然而,这是我的例子:
local_window = pd.DataFrame({102.5: {0.021917: 0.0007808776581961896,
0.030136: 0.0009108521507099643,
0.035616: 0.001109650616093018,
0.041095: 0.0013238862647034224,
0.060273: 0.0018552410055933753},
105.0: {0.021917: 0.0008955896980595855,
0.030136: 0.001003244315807649,
0.035616: 0.0011852612740301449,
0.041095: 0.0013952857530607904,
0.060273: 0.0018525880756980716},
107.5: {0.021917: np.nan,
0.030136: 0.0012354997955153118,
0.035616: 0.00140044893559622,
0.041095: 0.0015902024099268574,
0.060273: 0.001973254493672934}})
def predict_nan_local(local_window):
if not local_window.isnull().values.any():
return local_window
# Extract x and y values for the local window
X_local = local_window.columns.values.copy()
y_local = local_window.index.values.copy()
# Create a meshgrid of x and y values
X_local, y_local = np.meshgrid(X_local, y_local)
# Flatten x and y for fitting the model
X_local_flat = X_local.flatten()
y_local_flat = y_local.flatten()
values_local_flat = local_window.values.flatten()
# Find indices of non-NaN values
non_nan_indices = ~np.isnan(values_local_flat)
# Filter out NaN values
X_local_flat_filtered = X_local_flat[non_nan_indices]
y_local_flat_filtered = y_local_flat[non_nan_indices]
values_local_flat_filtered = values_local_flat[non_nan_indices]
regressor = LinearRegression()
regressor.fit(np.column_stack((X_local_flat_filtered, y_local_flat_filtered)), values_local_flat_filtered)
nan_indices = np.argwhere(np.isnan(local_window.values))
X_nan = local_window.columns.values[nan_indices[:, 1]]
y_nan = local_window.index.values[nan_indices[:, 0]]
# Predict missing value
predicted_values = regressor.predict(np.column_stack((X_nan, y_nan)))
local_window.iloc[nan_indices[:, 0], nan_indices[:, 1]] = predicted_values
return local_window
如您所见,输出没有多大意义。我有什么遗漏的吗?
可能缺少一些值。
def predict_nan_local(local_window):
# Your implemented function remains the same
# Use the function to predict and fill missing values
predicted_local_window = predict_nan_local(local_window)
# Display the original and predicted DataFrames
print("Original DataFrame:")
print(local_window)
print("\nPredicted DataFrame:")
print(predicted_local_window)