将cumsum重置为变量中的值

问题描述 投票:0回答:3

我有一个简单的数据集,有datesampleapplication。每天sample的值应该减少1并且随着应用的增加而增加。当我们采取另一个sample时,我们从新值开始。结果在变量estimate中给出。我正在尝试计算estimate中的变量R

这是我的数据集:

structure(
    list(
        date = structure(c(17591, 17592, 17593, 17594, 
          17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603, 
          17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612, 
          17613, 17614), class = "Date"),
        sample = c(30, NA, NA, NA, NA, 20, NA, NA, 40, NA, NA, 
          NA, NA, NA, 35, NA, NA, 15, NA, NA, NA, 
          NA, 5, NA), 
        applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA, 
          15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA), 
        estimate = c(30, 29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52, 
          51, 50, 35, 34, 33, 15, 29, 28, 27, 36, 5, 4)
        ), 
    class = "data.frame", row.names = c(NA, -24L)
)
r cumsum
3个回答
2
投票

这可能会被清理一点,但它的工作原理:

library(dplyr)
df %>% 
  mutate(group = cumsum(!is.na(sample))) %>%
  group_by(group) %>%
  mutate(
    to_add = ifelse(!is.na(sample), 0, ifelse(is.na(applied), -1, applied - 1)),
    result = first(sample) + cumsum(to_add)
  )
# # A tibble: 24 x 7
# # Groups:   group [6]
#    date       sample applied estimate group to_add result
#    <date>      <dbl>   <dbl>    <dbl> <int>  <dbl>  <dbl>
#  1 2018-03-01     30      NA       30     1      0     30
#  2 2018-03-02     NA      NA       29     1     -1     29
#  3 2018-03-03     NA      10       38     1      9     38
#  4 2018-03-04     NA      NA       37     1     -1     37
#  5 2018-03-05     NA      NA       36     1     -1     36
#  6 2018-03-06     20      NA       20     2      0     20
#  7 2018-03-07     NA      NA       19     2     -1     19
#  8 2018-03-08     NA      10       28     2      9     28
#  9 2018-03-09     40      NA       40     3      0     40
# 10 2018-03-10     NA      NA       39     3     -1     39
# # ... with 14 more rows

1
投票
ave(df1$sample,
    cumsum(!is.na(df1$sample)),
    FUN = function(x) max(x, na.rm = TRUE) + -1 * (seq_along(x) - 1)) +
    ave(replace(df1$applied, is.na(df1$applied), 0),
        cumsum(!is.na(df1$sample)),
        FUN = cumsum)
# [1] 30 29 38 37 36 20 19 28 40 39 53 52 51 50 45 44 43 15 29 28 27 36  5  4

0
投票
df <-  structure(list(date = structure(c(17591, 17592, 17593, 17594, 
                                         17595, 17596, 17597, 17598, 17599, 17600, 17601, 17602, 17603, 
                                         17604, 17605, 17606, 17607, 17608, 17609, 17610, 17611, 17612, 
                                         17613, 17614), class = "Date"),
                      sample = c(30, NA, NA, NA, NA, 
                                 20, NA, NA, 40, NA, NA, NA, NA, NA, 35, NA, NA, 15, NA, NA, NA, 
                                 NA, 5, NA), 
                      applied = c(NA, NA, 10, NA, NA, NA, NA, 10, NA, NA, 
                                  15, NA, NA, NA, 10, NA, NA, NA, 15, NA, NA, 10, NA, NA), 
                      estimate = c(30,29, 38, 37, 36, 20, 19, 28, 40, 39, 53, 52, 51, 50, 35, 34, 33, 
                                   15, 29, 28, 27, 36, 5, 4)), class = "data.frame", 
                 row.names = c(NA,-24L))




require(dplyr)

## create asubset without "estimate"
df2 <- df[,1:3]


df2 %>% arrange(date) %>%  
  mutate(sample_id = cumsum(!is.na(sample))) %>%                      #create an id for each sample 
  mutate(applied = if_else(is.na(applied),0, applied)) %>%            # turn NA applied to 0
  mutate(applied = if_else(is.na(sample), applied -1, applied)) %>%   # turn applied to -1 when sample is NA
  mutate(estimate0 = if_else(is.na(sample), 0, sample)) %>%           # duplicate sample column 
  mutate(estimate0 = estimate0 + applied) %>%                         # horizontal calculation sample + applied
  group_by(sample_id) %>%                                             # group by to restrict the cumsum
  arrange(date)  %>%                                                  # necessary order for the cumsum 
  mutate(estimate = cumsum(estimate0)) %>%                            # cumsum 
  ungroup                  %>% 
  select(date, sample, applied, estimate)   %>% 
  print(n = 24)


# A tibble: 24 x 4
   date       sample applied estimate
   <date>      <dbl>   <dbl>    <dbl>
 1 2018-03-01     30       0       30
 2 2018-03-02     NA      -1       29
 3 2018-03-03     NA       9       38
 4 2018-03-04     NA      -1       37
 5 2018-03-05     NA      -1       36
 6 2018-03-06     20       0       20
 7 2018-03-07     NA      -1       19
 8 2018-03-08     NA       9       28
 9 2018-03-09     40       0       40
10 2018-03-10     NA      -1       39
11 2018-03-11     NA      14       53
12 2018-03-12     NA      -1       52
13 2018-03-13     NA      -1       51
14 2018-03-14     NA      -1       50
15 2018-03-15     35      10       45
16 2018-03-16     NA      -1       44
17 2018-03-17     NA      -1       43
18 2018-03-18     15       0       15
19 2018-03-19     NA      14       29
20 2018-03-20     NA      -1       28
21 2018-03-21     NA      -1       27
22 2018-03-22     NA       9       36
23 2018-03-23      5       0        5
24 2018-03-24     NA      -1        4
© www.soinside.com 2019 - 2024. All rights reserved.