使用 dplyr 按组滚动应用多列

问题描述 投票:0回答:1

我有具有多个值和分类分组变量的时间序列数据。我想计算每组每个值的运行平均值。我的数据包括某些时间步长的 NA,我希望在计算滚动平均值时将其忽略。我在 stackoverflow 上找到了几个关于此问题的问题和答案,但到目前为止没有一个解决方案有效,我想知道我错过了什么。我知道我可以以长格式执行此操作,但我还有其他几个分类变量想保留不变。

df[1:10,]
             Timestamp     val1     val2 group
1  2018-03-28 10:56:14       NA       NA     2
2  2018-03-28 10:56:18 152.4206 150.2132     2
3  2018-03-28 10:56:30 140.1681 140.4355     2
4  2018-03-28 10:56:38 141.4961 140.6526     2
5  2018-03-28 10:56:40 140.0825 139.0457     2
6  2018-03-28 10:56:42 140.0453 140.0724     2
7  2018-03-28 10:56:44 140.7152 140.6107     2
8  2018-03-28 10:56:46 137.1136 137.7038     2
9  2018-03-28 10:56:52 138.1676 138.1061     2
10 2018-03-28 10:57:00 139.7503 138.8120     2

df.sma<-as.data.frame(df%>%
                        group_by(group)%>%
                        mutate(across(2:3, rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA
                        )))

> head(df.sma)
            Timestamp     val1     val2 group
1 2018-03-28 10:56:14      NaN      NaN     2
2 2018-03-28 10:56:18 152.4206 150.2132     2
3 2018-03-28 10:56:30 146.2944 145.3243     2
4 2018-03-28 10:56:38 144.6950 143.7671     2
5 2018-03-28 10:56:40 143.5419 142.5867     2
6 2018-03-28 10:56:42 142.8425 142.0839     2

df.sma2<-as.data.frame(df%>%
                        group_by(group)%>%
                        mutate_at(vars(2:3), rollapplyr, 5, mean, na.rm = TRUE, by = 1, partial = TRUE, fill = NA)%>%
                        ungroup())

head(df.sma2)
            Timestamp     val1     val2 group
1 2018-03-28 10:56:14      NaN      NaN     2
2 2018-03-28 10:56:18 152.4206 150.2132     2
3 2018-03-28 10:56:30 146.2944 145.3243     2
4 2018-03-28 10:56:38 144.6950 143.7671     2
5 2018-03-28 10:56:40 143.5419 142.5867     2
6 2018-03-28 10:56:42 142.8425 142.0839     2

一些示例数据:

df<-structure(list(Timestamp = structure(c(1522270574, 1522270578, 
1522270590, 1522270598, 1522270600, 1522270602, 1522270604, 1522270606, 
1522270612, 1522270620, 1522270624, 1522270626, 1522270630, 1522270638, 
1522270646, 1522270650, 1522270652, 1522270656, 1522270658, 1522270666, 
1522270672, 1522270674, 1522270678, 1522270682, 1522270684, 1522270700, 
1522270704, 1522270706, 1522270710, 1522270712, 1522270714, 1522270720, 
1522270728, 1522270732, 1522270736, 1522270742, 1522270760, 1522270764, 
1522270766, 1522270770, 1522270788, 1522270792, 1522270796, 1522270800, 
1522270808, 1522270814, 1522270820, 1522270828, 1522270832, 1522270858, 
1522272018, 1522272022, 1522272038, 1522272042, 1522272044, 1522272045, 
1522272047, 1522272048, 1522272049, 1522272051, 1522272052, 1522272053, 
1522272055, 1522272056, 1522272068, 1522272070, 1522272071, 1522272072, 
1522272074, 1522272075, 1522272077, 1522272079, 1522272080, 1522272081, 
1522272083, 1522272084, 1522272086, 1522272094, 1522272096, 1522272097, 
1522272099, 1522272100, 1522272102, 1522272104, 1522272105, 1522272108, 
1522272116, 1522272142, 1522272150, 1522272154, 1522272162, NA, 
NA, NA, NA, NA, NA, NA, NA, NA), class = c("POSIXct", "POSIXt"
), tzone = "HST"), val1 = c(NA, 152.420640676898, 140.168146874079, 
141.496126708293, 140.082529977673, 140.045275612881, 140.715183202103, 
137.113586611079, 138.16758361454, 139.75031798328, 140.215984869446, 
139.764008780732, 140.037498184457, 139.200923807515, 138.761322782691, 
139.228081249134, 140.132374508114, 140.603522297505, 140.303591344302, 
138.79444324265, 138.592897758994, 138.933406279942, 136.231013692759, 
139.381687594324, 140.308379184618, 137.223670050664, 139.508300292213, 
139.405762972775, 140.498207629702, 140.557352578793, 141.007893031604, 
138.469342717392, 138.988541937918, 140.073618166769, 140.465068264112, 
140.943824496289, 139.503847830698, 139.828469267282, 139.866901113749, 
140.231110786363, 138.700458457692, 140.256192399129, 141.361416033739, 
140.713907096823, 138.980835812608, 138.303403317905, 139.408023758492, 
139.030991942014, 137.83274536635, 140.004326485218, 138.181652816362, 
138.080081349046, 140.177572453938, 144.798191178199, 145.348070693365, 
NA, 142.87268547048, NA, 143.973806261838, 145.697177880129, 
NA, 147.039813464252, 143.441477045751, NA, NA, 148.579113171457, 
NA, 147.138583941394, 146.320431451225, NA, 146.226167224113, 
142.448219150013, NA, 145.885281881276, 144.985690058093, NA, 
144.912584374715, 145.582507664862, 148.121988771797, NA, 146.248506959323, 
NA, 145.151760593063, 149.537625507927, NA, 144.812868524753, 
146.137437723411, 149.767161659034, 150.696017313469, 152.223027191881, 
153.784657674474, NA, NA, NA, NA, NA, NA, NA, NA, NA), val2 = c(NA, 
150.213158225837, 140.435520231684, 140.652568715905, 139.045674169061, 
140.072442579546, 140.610737632044, 137.703775588197, 138.106142335419, 
138.812041508755, 140.683604067168, 140.038379585955, 140.880060790739, 
138.942708204401, 138.041374001909, 139.30346343814, 139.652246580653, 
140.711970095059, 139.504007815648, 138.203914835007, 137.584657523716, 
138.547354791723, 138.663066399806, 138.546464167489, 139.327159278758, 
138.505236822648, 138.446301065168, 139.119069068746, 139.88861955423, 
140.430073431184, 139.008313625876, 137.484299942125, 138.41216391793, 
139.133242967442, 140.59965630854, 141.946089778931, 139.224562211565, 
140.076879364045, 139.765358074175, 139.632715461796, 135.802066432194, 
140.129853910036, 140.270979286976, 139.992006786585, 138.808982052871, 
137.756255920595, 139.103795821709, 137.881639260096, 136.807200185064, 
139.397877607737, 137.239641462637, 138.30344591474, 139.844021908301, 
144.416297156919, 143.146988896577, NA, 142.678276011255, NA, 
142.89580356053, 144.437403738214, NA, 144.108154956793, 143.871264835533, 
NA, NA, 145.343486626838, NA, 146.610006556818, 146.45235647302, 
NA, 145.459509738373, 145.022626197219, NA, 144.175461347449, 
142.859062038079, NA, 142.76288245402, 144.170746259675, 146.629646285058, 
NA, 146.095040263688, NA, 144.142762608373, 146.492350679929, 
NA, 143.929271519015, 143.03006571855, 148.272680355239, 154.998098793559, 
150.788837950036, 152.361521480621, NA, NA, NA, NA, NA, NA, NA, 
NA, NA), group = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -100L
), class = "data.frame")
r dplyr zoo rollapply
1个回答
0
投票

你在找吗

library(dplyr)
df |>
  mutate(across(starts_with("val"), 
                ~ zoo::rollmeanr(x = .x, k = 5L, fill = NA), .names = "{col}_rm"))

给予

              Timestamp     val1     val2 group  val1_rm  val2_rm
1   2018-03-28 10:56:14       NA       NA     2       NA       NA
2   2018-03-28 10:56:18 152.4206 150.2132     2       NA       NA
3   2018-03-28 10:56:30 140.1681 140.4355     2       NA       NA
4   2018-03-28 10:56:38 141.4961 140.6526     2       NA       NA
5   2018-03-28 10:56:40 140.0825 139.0457     2       NA       NA
6   2018-03-28 10:56:42 140.0453 140.0724     2 142.8425 142.0839
7   2018-03-28 10:56:44 140.7152 140.6107     2 140.5015 140.1634
8   2018-03-28 10:56:46 137.1136 137.7038     2 139.8905 139.6170
9   2018-03-28 10:56:52 138.1676 138.1061     2 139.2248 139.1078
10  2018-03-28 10:57:00 139.7503 138.8120     2 139.1584 139.0610
...
© www.soinside.com 2019 - 2024. All rights reserved.