如何在 Polars 中使用 Rolling OLS?

问题描述 投票:0回答:1
import polars as pl
import numpy as np
import statsmodels.api as sm
from statsmodels.regression.rolling import RollingOLS


# y = ax^2 + bx + c
def rolling_ols(x, y, window_size):
    x_upper = sm.add_constant(np.column_stack((x, x**2)))
    model = RollingOLS(y, x_upper, window_size)
    results = model.fit()
    params = results.params
    return params[0], params[1], params[2]


# window_size
size = 12

# test_data
test_df = pl.DataFrame(
    {
        'x': np.arange(100),
        'y': np.random.randn(100)
    }
)

# output
df = test_df.select(
    'x',
    'y',

    # TODO
    # c, b, a = rolling_ols(x, y, window_size=size)
    # y_hat = c + b * 12 + a * 12^2
)

上面代码中的自定义函数

rolling_ols
是否正确计算形式为
y = ax^2 + bx + c
的方程的系数?

如何在极坐标数据框中使用

params[0]
params[1]
params[2]
的结果,即
c
b
a
,假设滚动窗口为
12
,以获得结果栏
y_hat
?

polars 版本 = 0.17.12,statsmodels 版本 = 0.14,numpy 版本 = 1.24.2

numpy statsmodels python-polars
1个回答
0
投票

为了测试这一点,在rolling_apply中设置rolling的window_size而不是直接在statsmodels中使用RollingOLS,并且使用to_numpy方法将Series类型的polars转换为numpy.ndarray,这样statsmodels就可以使用了。

import polars as pl
import numpy as np
import statsmodels.api as sm

# window_size
size = 12

# test_data
np.random.seed(0)
y = np.random.random(100)
x = np.array(np.arange(1, len(y) + 1))
test_df = pl.DataFrame({'x': x, 'y': y})


# y = ax^2 + bx + c
def sm_ols(data_x, data_y):
    data_y = data_y.to_numpy()
    length = len(data_x)
    x_upper = np.vstack([np.ones(length), data_x, data_x ** 2]).T
    model = sm.OLS(data_y, x_upper)
    results = model.fit()
    params = results.params
    return params[0] + length * params[1] + length ** 2 * params[2]


# output
df = test_df.select(
    'x',
    'y',

    # TODO
    pl.col('y')
    .rolling_apply(
        lambda s: sm_ols(
            np.arange(1, size + 1),
            s
        ),
        window_size=size
    )
    .alias('t')
)
print(df)
© www.soinside.com 2019 - 2024. All rights reserved.