我有巨大的数据集,需要计算滞后数字。生成的数据集需要每天都有所请求的“回顾”数量的列。在我的示例中,我回顾了之前 10 个周期的 5 天片段。我正在尝试找出如何概括这一点以涵盖任意数量的请求时间片长度和回溯次数(例如,365 天,前 5 年)。
我已经包含了 60 天的单个站点和 3 个代码示例。第一个示例展示了通过硬编码来完成此操作。如果我有一百个回顾期,这会变得很乏味。
第二个示例展示了我如何开始使用参数,因为它是有价值的。
第三个(不起作用)显示了我如何希望编写某种语句,该语句在要创建的度量向量中发生变化,使用第二个向量表示滞后量。也许最好使用 {col} 而不是在向量中预先指定名称?我只是不知道如何引用“固定”变量(第一个汇总,但通过连续更长的周期创建动态数量的新变量。
谢谢!!!
图书馆(tidyverse)
每日<- structure(list(stnID = c(165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686, 165638621686), date = structure(c(10957, 10958, 10959, 10960, 10961, 10962, 10963, 10964, 10965, 10966, 10967, 10968, 10969, 10970, 10971, 10972, 10973, 10974, 10975, 10976, 10977, 10978, 10979, 10980, 10981, 10982, 10983, 10984, 10985, 10986, 10987, 10988, 10989, 10990, 10991, 10992, 10993, 10994, 10995, 10996, 10997, 10998, 10999, 11000, 11001, 11002, 11003, 11004, 11005, 11006, 11007, 11008, 11009, 11010, 11011, 11012, 11013, 11014, 11015, 11016), class = "Date"), measure = c(15.3333333333333, 15.9791666666667, 16.65, 12.675, 9.32916666666667, 9.71041666666667, 11.8916666666667, 11.9958333333333, 11.025, 10.94375, 11.3791666666667, 9.04166666666667, 10.5604166666667, 10.8583333333333, 11.4083333333333, 10.1979166666667, 10.19375, 13.1645833333333, 13.7604166666667, 13.21875, 11.16875, 10.43125, 11.0604166666667, 13.4041666666667, 14.0979166666667, 10.8521739130435, 8.54375, 5.44375, 8.06666666666667, 9.77291666666667, 10.2676470588235, 11.5979166666667, 12.1375, 11.7958333333333, 12.3916666666667, 12.7875, 13.4604166666667, 10.7541666666667, 10.1979166666667, 10.9145833333333, 11.76875, 13.6291666666667, 12.5, 10.9416666666667, 12.16875, 12.2229166666667, 12.0541666666667, 11.69375, 11.05, 12.3229166666667, 12.1208333333333, 11.5020833333333, 13.1770833333333, 11.3833333333333, 9.88, 10.9520833333333, 11.275, 11.4208333333333, 11.3270833333333, 11.0104166666667)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -60L))
rolled1 <- Daily %>% group_by(stnID) %>%
mutate(measure_01 := roll_meanr(measure, 5, na.rm=TRUE)) %>%
mutate(measure_02 := lag(measure_01, 10)) %>%
mutate(measure_03 := lag(measure_01, 15)) %>%
mutate(measure_04 := lag(measure_01, 20)) %>%
mutate(measure_05 := lag(measure_01, 25)) %>%
mutate(measure_06 := lag(measure_01, 30)) %>%
mutate(measure_07 := lag(measure_01, 35)) %>%
mutate(measure_08 := lag(measure_01, 40)) %>%
mutate(measure_09 := lag(measure_01, 45)) %>%
mutate(measure_10 := lag(measure_01, 50))
期N <- 10 # number of lagged periods we want to create periodSize <- 5 # of days back to look
varList <- paste0("measure_",sprintf("%02d", seq_along(1:periodN)))
rolled2 <- Daily %>% group_by(stnID) %>%
mutate(!!varList[1] := roll_meanr(measure, periodSize, na.rm=TRUE)) %>%
mutate(!!varList[2] := lag(measure_01, periodSize * 2)) %>%
mutate(!!varList[3] := lag(measure_01, periodSize * 3)) %>%
mutate(!!varList[4] := lag(measure_01, periodSize * 4)) %>%
mutate(!!varList[5] := lag(measure_01, periodSize * 5)) %>%
mutate(!!varList[6] := lag(measure_01, periodSize * 6)) %>%
mutate(!!varList[7] := lag(measure_01, periodSize * 7)) %>%
mutate(!!varList[8] := lag(measure_01, periodSize * 8)) %>%
mutate(!!varList[9] := lag(measure_01, periodSize * 9)) %>%
mutate(!!varList[10]:= lag(measure_01, periodSize * 10))
varList <- paste0("measure_",sprintf("%02d", seq(2, periodN, 1))) numList <- periodSize * seq(2, periodN, 1)
# I want to loop over the vectors which start from measure_02 to measure_10, and 10 to 50
rolled3 <- Daily %>% group_by_stnID) %>%
mutate(!!varList[1] := roll_meanr(measure, periodSize, na.rm=TRUE)) %>%
mutate(across(varList) ~ lag(measure01, numList))
前两种方法我都成功了。我查找了示例,但似乎找不到执行此类操作的示例。
我是否可以作为前一步“创建”结果列,将滞后量放入每个列中,然后进行变异,使每个列成为滞后[目标日期,.x]???
这让我头疼。
取自这个答案。是你想要的吗?
library(data.table)
library(rlang)
library(tidyverse)
n_lags <- 50
step <- 5
lags <- function(var, n = 50, step = 5) {
var <- enquo(var)
indices <- seq(5, n, by = 5)
# create a list of quosures by looping over `indices`
# then give them names for `mutate` to use later
map(indices, ~ quo(lag(!!var, !!.x))) %>%
set_names(sprintf("measure_%02d", indices))
}
# unquote the list of quosures so that they are evaluated by `mutate`
Daily %>%
mutate(measure_01 = frollmean(measure, n = 5, na.rm=TRUE)) %>%
mutate_at(vars(measure_01), funs(!!!lags(measure_01, n_lags, step)))
#>
#> # A tibble: 60 × 14
#> stnID date measure measure_01 measure_05 measure_10 measure_15
#> <dbl> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 165638621686 2000-01-01 15.3 NA NA NA NA
#> 2 165638621686 2000-01-02 16.0 NA NA NA NA
#> 3 165638621686 2000-01-03 16.6 NA NA NA NA
#> 4 165638621686 2000-01-04 12.7 NA NA NA NA
#> 5 165638621686 2000-01-05 9.33 14.0 NA NA NA
#> 6 165638621686 2000-01-06 9.71 12.9 NA NA NA
#> 7 165638621686 2000-01-07 11.9 12.1 NA NA NA
#> 8 165638621686 2000-01-08 12.0 11.1 NA NA NA
#> 9 165638621686 2000-01-09 11.0 10.8 NA NA NA
#> 10 165638621686 2000-01-10 10.9 11.1 14.0 NA NA
#> # ℹ 50 more rows
#> # ℹ 7 more variables: measure_20 <dbl>, measure_25 <dbl>, measure_30 <dbl>,
#> # measure_35 <dbl>, measure_40 <dbl>, measure_45 <dbl>, measure_50 <dbl>
创建于 2023-10-28,使用 reprex v2.0.2