import polars as pl
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS
# y = ax^2 + bx + c
def rolling_ols(x, y, window_size):
x_upper = sm.add_constant(np.column_stack((x, x**2)))
model = RollingOLS(y, x_upper, window_size)
results = model.fit()
params = results.params
return params[0], params[1], params[2]
# window_size
size = 12
# test_data
test_df = pl.DataFrame(
{
'x': np.arange(100),
'y': np.random.randn(100)
}
)
# output
df = test_df.select(
'x',
'y',
# TODO
# c, b, a = rolling_ols(x, y, window_size=size)
# y_hat = c + b * 12 + a * 12^2
)
上面代码中的自定义函数
rolling_ols
是否正确计算形式为y = ax^2 + bx + c
的方程的系数?
如何在极坐标数据框中使用
params[0]
、params[1]
、params[2]
的结果,即c
、b
、a
,假设滚动窗口为12
,以获得结果栏y_hat
?
polars 版本 = 0.17.12,statsmodels 版本 = 0.14,numpy 版本 = 1.24.2
为了测试这一点,在rolling_apply中设置rolling的window_size而不是直接在statsmodels中使用RollingOLS,并且使用to_numpy方法将Series类型的polars转换为numpy.ndarray,这样statsmodels就可以使用了。
import polars as pl
import numpy as np
import statsmodels.api as sm
# window_size
size = 12
# test_data
np.random.seed(0)
y = np.random.random(100)
x = np.array(np.arange(1, len(y) + 1))
test_df = pl.DataFrame({'x': x, 'y': y})
# y = ax^2 + bx + c
def sm_ols(data_x, data_y):
data_y = data_y.to_numpy()
length = len(data_x)
x_upper = np.vstack([np.ones(length), data_x, data_x ** 2]).T
model = sm.OLS(data_y, x_upper)
results = model.fit()
params = results.params
return params[0] + length * params[1] + length ** 2 * params[2]
# output
df = test_df.select(
'x',
'y',
# TODO
pl.col('y')
.rolling_apply(
lambda s: sm_ols(
np.arange(1, size + 1),
s
),
window_size=size
)
.alias('t')
)
print(df)