我有一个像这样的数据集:
这是季节性数据,但在某个点之后会出现水平变化
我希望Prophet能够更快地适应电平转换后的数据。我该怎么做?
我已经阅读了文档,有一些选项:
但是有什么方法可以迫使先知“更快”地适应电平转换数据?
这是一个重现:
import pandas as pd
from prophet import Prophet
from random import randint
from datetime import datetime
import matplotlib.pyplot as plt
def get_dataset():
d = {}
total = 100
level_shift_point = 10
values = [20+i%3 for i in range(level_shift_point)]
for i in range(level_shift_point, total):
values.append(100 + i%3)
d["y"] = values
d["ds"] = [datetime.utcfromtimestamp(3600*i).strftime('%Y-%m-%d %H:%M:%S') for i in range(total)]
return pd.DataFrame.from_dict(d)
df = get_dataset()
m = Prophet(changepoint_prior_scale=0.0001)
m.fit(df)
future = m.make_future_dataframe(periods=100, freq="h", include_history=False)
forecast = m.predict(future)
m.plot(forecast)
plt.show()
如你所见,预测根本没有任何意义。
我希望预测与电平转换后的数据保持一致。我该怎么做?
使用假期删除
import pandas as pd
from prophet import Prophet
from random import randint
from datetime import datetime
import matplotlib.pyplot as plt
def to_str_date(i):
return datetime.utcfromtimestamp(3600*i).strftime('%Y-%m-%d %H:%M:%S')
def get_holiday_df():
d = {
"holiday":["one"],
"ds": [to_str_date(1)],
"upper_window":[1],
"lower_window":[0]
}
return pd.DataFrame.from_dict(d)
def get_dataset():
d = {}
total = 100
level_shift_point = 24
values = [20+i%3 for i in range(level_shift_point)]
for i in range(level_shift_point, total):
values.append(100 + i%3)
d["y"] = values
d["ds"] = [datetime.utcfromtimestamp(3600*i).strftime('%Y-%m-%d %H:%M:%S') for i in range(total)]
return pd.DataFrame.from_dict(d)
df = get_dataset()
hol = get_holiday_df()
m = Prophet(holidays=hol)
m.fit(df)
future = m.make_future_dataframe(periods=100, freq="h", include_history=False)
forecast = m.predict(future)
m.plot(forecast)
plt.show()