请考虑以下示例
library(dplyr)
library(lubridate)
time <- seq(from =ymd("2014-01-01"),to= ymd("2014-02-20"), by="days")
values <- sample(seq(from = 20, to = 50, by = 5), size = length(time), replace = TRUE)
tipe <- sample(rep(x = c("Tipe_A", "Tipe_B", "Tipe_C")), size = length(time), replace = TRUE)
df2 <- data_frame(time, tipe, values)
# A tibble: 51 x 3
time tipe values
<date> <chr> <dbl>
1 2014-01-01 Tipe_B 40
2 2014-01-02 Tipe_B 30
3 2014-01-03 Tipe_A 35
4 2014-01-04 Tipe_A 50
5 2014-01-05 Tipe_B 35
6 2014-01-06 Tipe_B 50
7 2014-01-07 Tipe_A 50
8 2014-01-08 Tipe_B 40
9 2014-01-09 Tipe_A 30
10 2014-01-10 Tipe_B 25
# ... with 41 more rows
我想计算值之间的差异,并按周和小费汇总此数据框。
我只能按类型分开做
df2 %>%
filter(tipe == "Tipe_A") %>%
mutate(diff = values - lag(values, order_by = time)) %>%
group_by(week = week(time)) %>%
summarise(avr = mean(diff, na.rm = T))
# A tibble: 7 x 2
week avr
<dbl> <dbl>
1 1 7.5
2 2 -20
3 3 3.33
4 5 0
5 6 -3.33
6 7 -10
7 8 25
但是我有很多类型,所以这是一个乏味的过程。
是否有一种方法可以使每种类型的效率更高?
[在这里,我们可能需要先通过'笔尖'进行分组,然后计算'diff',并在分组mean
中获得summarise
之前,还添加'week'作为分组列,>
library(dplyr) df2 %>% group_by(tipe) %>% mutate(diff = values - lag(values, order_by = time)) %>% group_by(week = week(time), .add = TRUE) %>% summarise(avr = mean(diff, na.rm = TRUE))
或先进行
arrange
df2 %>%
arrange(tipe, time) %>%
group_by(tipe) %>%
mutate(diff = values - lag(values)) %>%
group_by(week = week(time), .add = TRUE) %>%
summarise(avr = mean(diff, na.rm = TRUE))