计算有基准年和相对百分比变化的指数。

问题描述 投票:0回答:1

我正在寻找一种方法,在id和组中,使用滞后(或者说是领先)的 value 和新的索引号 idx_value 来计算下一个索引号。

# install.packages(c("tidyverse"), dependencies = TRUE)
library(tibble)
library(magrittr)

比如,我有这个数据框。

start_tbl <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L), grp = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 2L, 2L, 2L), year = c(7L, 8L, 9L, 10L, 7L, 8L, 9L, 10L, 
7L, 8L, 9L, 7L, 8L, 9L), value = c(2, -7, -2.3, 1.1, -1, -12, 
-4, 2, 1, -3, 2, -1, -4, -2)), row.names = c(NA, -14L), class = c("tbl_df", 
"tbl", "data.frame"))
start_tbl
# A tibble: 14 x 4
      id   grp  year value
   <int> <int> <int> <dbl>
 1     1     1     7   2  
 2     1     1     8  -7  
 3     1     1     9  -2.3
 4     1     1    10   1.1
 5     1     2     7  -1  
 6     1     2     8 -12  
 7     1     2     9  -4  
 8     1     2    10   2  
 9     2     1     7   1  
10     2     1     8  -3  
11     2     1     9   2  
12     2     2     7  -1  
13     2     2     8  -4  
14     2     2     9  -2  

现在我想把ID 1 grp 1做为指数 然后计算ID 1 grp 1第7年的指数为100*(1+-7100)=93. 0 接下来用93这个结果来计算下一年的指数 93*(1+-2. 3100)=90. 861,以此类推。在所有指数年上重新开始,也就是新的id和新的grp,基年7。

我很接近与。

tbl %>% group_by(id) %>% mutate(idx_value = value-lag(value), idx_value = 100*(1+value/100) )
# A tibble: 14 x 5
# Groups:   id [2]
      id   grp  year value idx_value
   <int> <int> <int> <dbl>     <dbl>
 1     1     1     7   2       102  
 2     1     1     8  -7        93  
 3     1     1     9  -2.3      97.7
 4     1     1    10   1.1     101. 
 5     1     2     7  -1        99  
 6     1     2     8 -12        88  
 7     1     2     9  -4        96  
 8     1     2    10   2       102  
 9     2     1     7   1       101  
10     2     1     8  -3        97  
11     2     1     9   2       102  
12     2     2     7  -1        99  
13     2     2     8  -4        96  
14     2     2     9  -2        98  

但我想得到的是:

end_tbl <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L), grp = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 
1L, 1L, 2L, 2L, 2L), year = c(7L, 8L, 9L, 10L, 7L, 8L, 9L, 10L, 
7L, 8L, 9L, 7L, 8L, 9L), value = c(2, -7, -2.3, 1.1, -1, -12, 
-4, 2, 1, -3, 2, -1, -4, -2), idx_value = c(100L, 93L, 91L, 92L, 
100L, 88L, 84L, 86L, 100L, 97L, 99L, 100L, 96L, 94L)), row.names = c(NA, 
-14L), class = c("tbl_df", "tbl", "data.frame"))
end_tbl
# A tibble: 14 x 5
      id   grp  year value idx_value
   <int> <int> <int> <dbl>     <int>
 1     1     1     7   2         100
 2     1     1     8  -7          93
 3     1     1     9  -2.3        91
 4     1     1    10   1.1        92
 5     1     2     7  -1         100
 6     1     2     8 -12          88
 7     1     2     9  -4          84
 8     1     2    10   2          86
 9     2     1     7   1         100
10     2     1     8  -3          97
11     2     1     9   2          99
12     2     2     7  -1         100
13     2     2     8  -4          96
14     2     2     9  -2          94

任何帮助都将被感激。也许 答案就在这里.

额外的小数据示例 start_tbl2 来说明这个问题。如果我使用一个起始tibble,如 start_tbl2 下面

    start_tbl2 <- structure(list(id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
grp = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
year = c(7L, 8L, 9L, 10L, 7L, 8L, 9L, 10L), 
value = c(2, -12, -18.3, 100, 15, 30, 40, -50)), 
row.names = c(NA, -8L), class = c("tbl_df", "tbl", "data.frame"))

library(dplyr)
start_tbl2 %>%
   group_by(id, grp) %>% 
   mutate(idx_value = c(100, round(100 * (1 + cumsum(value[-1])/100))))
# A tibble: 8 x 5
# Groups:   id, grp [2]
     id   grp  year value idx_value
  <int> <int> <int> <dbl>     <dbl>
1     1     1     7   2         100
2     1     1     8 -12          88
3     1     1     9 -18.3        70
4     1     1    10 100         170
5     1     2     7  15         100
6     1     2     8  30         130
7     1     2     9  40         170
8     1     2    10 -50         120

而我在手工计算时却得到了这个结果。

Percentage_change   cal_by_hand cumsum  diff
2                   100         100     0
-12                 88          88      0
-18.3               71.896      70      1.896
100                 143.792     170     -26.208
15                  100         100     0
30                  130         130     0
40                  182         170     12
-50                 91          120     -29
r dplyr percentage mutate tibble
1个回答
3
投票

基于新的数据集

library(purrr)
library(dplyr)
start_tbl2 %>%
      group_by(id, grp) %>%
      mutate(idx_vlue = accumulate(value[-1], ~ .x * (1 + .y/100), .init = 100 ))
# A tibble: 8 x 5
# Groups:   id, grp [2]
#     id   grp  year value idx_vlue
#  <int> <int> <int> <dbl>    <dbl>
#1     1     1     7   2      100  
#2     1     1     8 -12       88  
#3     1     1     9 -18.3     71.9
#4     1     1    10 100      144. 
#5     1     2     7  15      100  
#6     1     2     8  30      130  
#7     1     2     9  40      182  
#8     1     2    10 -50       91  

并使用'start_tbl

start_tbl %>%
     group_by(id, grp) %>%
     mutate(idx_vlue = accumulate(value[-1], ~ .x * (1 + .y/100), .init = 100 ))
# A tibble: 14 x 5
# Groups:   id, grp [4]
#      id   grp  year value idx_vlue
#   <int> <int> <int> <dbl>    <dbl>
# 1     1     1     7   2      100  
# 2     1     1     8  -7       93  
# 3     1     1     9  -2.3     90.9
# 4     1     1    10   1.1     91.9
# 5     1     2     7  -1      100  
# 6     1     2     8 -12       88  
# 7     1     2     9  -4       84.5
# 8     1     2    10   2       86.2
# 9     2     1     7   1      100  
#10     2     1     8  -3       97  
#11     2     1     9   2       98.9
#12     2     2     7  -1      100  
#13     2     2     8  -4       96  
#14     2     2     9  -2       94.1

4
投票

另一种方法是使用 cumprod() 将数值转换为百分比后。

library(dplyr)

start_tbl %>%
  group_by(id, grp) %>%
  mutate(idx_value = cumprod(c(100, (100 + value[-1]) / 100))) 

# A tibble: 14 x 5
# Groups:   id, grp [4]
      id   grp  year value idx_value
   <int> <int> <int> <dbl>     <dbl>
 1     1     1     7   2       100  
 2     1     1     8  -7        93  
 3     1     1     9  -2.3      90.9
 4     1     1    10   1.1      91.9
 5     1     2     7  -1       100  
 6     1     2     8 -12        88  
 7     1     2     9  -4        84.5
 8     1     2    10   2        86.2
 9     2     1     7   1       100  
10     2     1     8  -3        97  
11     2     1     9   2        98.9
12     2     2     7  -1       100  
13     2     2     8  -4        96  
14     2     2     9  -2        94.1

1
投票

就用 base 我写了这个函数,至少用这两个例子来返回所需的数据帧。

addIdxValue <- function(X) {
  idx <- function(y) {
    u <- (100+c(0, y[-1]))/100
    v <- 1
    for (i in 1:(length(u)-1)) {
      v[i+1] <- v[i]*u[i+1]
    }
    100*v
  }
  X[,"idx_values"] <- unlist(tapply(X[,4], list(X[,2], X[,1]), idx))
  X
}

> addIdxValue(start_tbl)
   id grp year value idx_values
1   1   1    7   2.0  100.00000
2   1   1    8  -7.0   93.00000
3   1   1    9  -2.3   90.86100
4   1   1   10   1.1   91.86047
5   1   2    7  -1.0  100.00000
6   1   2    8 -12.0   88.00000
7   1   2    9  -4.0   84.48000
8   1   2   10   2.0   86.16960
9   2   1    7   1.0  100.00000
10  2   1    8  -3.0   97.00000
11  2   1    9   2.0   98.94000
12  2   2    7  -1.0  100.00000
13  2   2    8  -4.0   96.00000
14  2   2    9  -2.0   94.08000

> addIdxValue(start_tbl2)
  id grp year value idx_values
1  1   1    7   2.0    100.000
2  1   1    8 -12.0     88.000
3  1   1    9 -18.3     71.896
4  1   1   10 100.0    143.792
5  1   2    7  15.0    100.000
6  1   2    8  30.0    130.000
7  1   2    9  40.0    182.000
8  1   2   10 -50.0     91.000
© www.soinside.com 2019 - 2024. All rights reserved.