如果这里的人们可以看一下这段代码并尝试帮助评估这些预测间隔是否正确计算或需要更改什么,将不胜感激。我尝试使用袋装ETS模型(使用R中的Forecast.baggedETS()函数)生成提前48步的预测。数据是来自纽约市的每小时交通流量(计数)。我的问题是,我的点预测似乎很好,而模拟的预测间隔似乎有点宽,并且PI手段也与点预测有所不同(但我想它们可能是不对称的,而且也应该更宽)而不是常规的置信区间)。为了评估预测间隔,我计算出PI覆盖率(PICP)仅为71%,这似乎很低。我还附上了点预测和效绩指标的图表。databagged model + PIs
这些有意义吗,或者我应该改变什么?预先非常感谢。
#original series
ts_ori <- ts(df_ori$V2, start = c(1,1), frequency = 24) #
# train test split
train <- window(ts_ori, start = c(1,1), end = c(21,24))
test <- window(ts_ori, start = c(22,1), end = c(24,1))
L <- BoxCox.lambda(ts_ori)
L # the optimal lambda is 0.9176
# Box-Cox tranformed original series
ts_ori_bc <- BoxCox(x= ts_ori, lambda = L)
train_bc <- window(ts_ori_bc, start = c(1,1), end = c(21,24))
test_bc <- window(ts_ori_bc, start = c(22,1), end = c(24,1))
ets_ZZZ <- ets(y = train, model = "ZZZ")
summary(ets_ZZZ)
forecast(ets_ZZZ) %>% autoplot() + autolayer(test)
# Bergmeirs Bagging ETS function
# the function bld.mbb.bootstrap is used to calculate the bootstrapped series with the
# Box-Cox and Loess-based decomposition (BLD) bootstrap
#num is the number of bootstrap versions to generate
#blocksize is the size of the MBB
# ensembling
# the default for the ETS function is the "ZZZ" automated model selection
# so the bootstrapped ETS models can theoretically choose a different ETS model everytime
# creating 48 step ahead forecasts from a regular automatically chosen ETS model
etsfc <- forecast(ets(train), h=48)
# the bagged ETS() function implements the bagged model forecasting method from the
# Bergmeir 2016 paper
# the default for num in the baggedETS() function is 100
baggedfc <- forecast(baggedETS(train), h=48)
# plotting the bagged forecasts against the normal ETS forecasts
# the last two lines before dev.off() remove grid and grey background color
autoplot(train, xlab = "Time", ylab = "Traffic flows") +
autolayer(baggedfc, series="Bagged.BLD.MBB.ETS", PI=FALSE) +
autolayer(etsfc, series="ETS", PI=FALSE) +
guides(colour=guide_legend(title="Point Forecasts")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
# the bagged ETS model is the average of 100 ETS models of the bootstrapped training data
# gives the point forecasts and the min/max of the 100 ETS models
summary(baggedfc)
# prediction intervals for the bootstrapped series
# step 1: simulating nsim = 1000 series using the MBB procedure
numsim <- 1000
sim <- bld.mbb.bootstrap(train, numsim)
# step 2: For each of these series, we fit an ETS model and simulate one sample path from that model
# A different ETS model may be selected in each case, although it will most likely select the same
# model because the series are similar. However, the estimated parameters will be different.
# the simulate() function simulates responses from the distribution of the fitted ets(sim) object
h <- 48
future <- matrix(0, nrow=numsim, ncol=h)
for(i in seq(numsim))
future[i,] <- simulate(ets(sim[[i]]), nsim=h, seed = 123)
# step 3: we take the means and quantiles of these simulated sample paths to form point
# forecasts and prediction intervals.
# tsp() gives the start time in time units the end time and the frequency of a time series
start <- tsp(train)[2]+1/24
simfc <- structure(list(
mean = ts(colMeans(future), start=start, frequency=24),
lower = ts(apply(future, 2, quantile, prob=0.025),
start=start, frequency=24),
upper = ts(apply(future, 2, quantile, prob=0.975),
start=start, frequency=24),
level=95),
class="forecast")
# step 4: plotting the bagged ETS model and the prediction intervals
etsfc <- forecast(ets(train), h=48, level=95)
autoplot(train, ylab = "Traffic flows", xlab = "Time", series = "Training data", color = "black") +
autolayer(simfc, series="Simulated PIs") +
autolayer(baggedfc, PI=FALSE, series = "Bagged.BLD.MBB.ETS") +
guides(colour=guide_legend(title="Series")) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line = element_line(colour = "black"))
# PICP
for (i in 1:length(baggedfc$mean)) {
# creating an indicator variable for every element of the mean column and store the
# elements in picp_dummy
picp_dummy <- ifelse(baggedfc$mean[i] >= simfc$lower & baggedfc$mean[i] <= simfc$upper,1,0)
}
print(picp_dummy)
picp = 1/length(baggedfc$mean) *sum(picp_dummy)
print(picp)
# the PICP for the simulated 95% PI for the bagged ETS model is 0.7083333
# MPIW
length = length(simfc$upper)
length
mpiw = (1/length)*sum(simfc$upper - simfc$lower)
print(mpiw)
# the MPIW of the simulated 95% PI is 15437.7
这里有几个问题。
这里有一些清理后的代码可以执行您想要的操作。
library(forecast)
#> Registered S3 method overwritten by 'quantmod':
#> method from
#> as.zoo.data.frame zoo
library(ggplot2)
set.seed(666)
# original series (random data for the example)
ts_ori <- ts(rnorm(24*24), frequency=24)
# train test split
train <- window(ts_ori, start = c(1, 1), end = c(21, 24))
test <- window(ts_ori, start = c(22, 1), end = c(24, 1))
# creating 48 step ahead forecasts from ETS model
etsfc <- forecast(ets(train), h = 48)
# And from a bagged model
baggedfc <- forecast(baggedETS(train), h = 48)
# plotting the bagged forecasts against the normal ETS forecasts
autoplot(train, xlab = "Time", ylab = "Traffic flows") +
autolayer(baggedfc, series = "Bagged ETS", PI = FALSE) +
autolayer(etsfc, series = "ETS", PI = FALSE) +
guides(colour = guide_legend(title = "Point Forecasts"))
# prediction intervals for the bootstrapped series
# step 1: simulate 1000 series using the MBB procedure
numsim <- 200
sim <- bld.mbb.bootstrap(train, numsim)
# step 2: Fit an ETS model to each bootstrapped series, then refit to
# training data and simulate from refit.
h <- 48
future <- matrix(0, nrow = numsim, ncol = h)
for (i in seq(numsim)) {
model <- ets(sim[[i]])
refit <- ets(train, model = model, use.initial.values = TRUE)
future[i, ] <- simulate(refit, nsim = h)
}
# step 3: we take the means and quantiles of these simulated sample paths to
# form point forecasts and prediction intervals.
start <- tsp(train)[2] + 1 / 24
simfc <- structure(list(
mean = ts(colMeans(future),
start = start, frequency = 24
),
lower = ts(apply(future, 2, quantile, prob = 0.025),
start = start, frequency = 24
),
upper = ts(apply(future, 2, quantile, prob = 0.975),
start = start, frequency = 24
),
level = 95
), class = "forecast")
# step 4: plotting the bagged ETS model and the prediction intervals
autoplot(train, ylab = "Traffic flows") +
autolayer(simfc, series = "Simulated PIs") +
autolayer(test, series = "Test") +
guides(colour = guide_legend(title = ""))
# PICP
mean(test >= simfc$lower & test <= simfc$upper)
#> [1] 0.9166667
# MPIW
mean(simfc$upper - simfc$lower)
#> [1] 4.079027
由reprex package(v0.3.0)在2020-05-04创建