有条件地插入新行,并在R中向新行添加值。

问题描述 投票:0回答:1

我有一个数据框,和一个只有两个数字的向量。201和200

type <- c(222, 222, 199, 251, 106, 88, 88, 88, 88, 61, 199, 251)
latency <- c(4167, 4433, 5109, 5635, 6618, 6980, 7246, 7512, 7778, 8045, 8311, 8577)
urevent <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)

acc <- c(201, 200)

df1 = data.frame(type, latency, urevent)

我需要添加来自 acc 在栏目中 type,在每一个实例下 df1$type == 199,依次进行。有相等数量的199和数字的实例,在 acc.

latency 应该从上面的行中复制出来,然后再加上50。urevent 在每一行中,应该是0。

下面是所需的输出结果。

----------------------------
type | latency | urevent |
----------------------------
222  | 4167    | 1       |
222  | 4433    | 2       |
199  | 5109    | 3       |
201  | 5159    | 0       |
251  | 5635    | 4       |
106  | 6618    | 5       |
88   | 6980    | 6       |
88   | 7246    | 7       |
88   | 7512    | 8       |
88   | 7778    | 9       |
61   | 8045    | 10      |
199  | 8311    | 11      |
200  | 8361    | 0       |
251  | 8577    | 12      |
---------------------------



r dataframe rstudio data-munging data-wrangling
1个回答
2
投票

我们可以用 add_row 从 "叽叽喳喳 "开始

library(tibble)
library(dplyr)
library(tidyr)
df1 %>% 
    add_row(type = acc[1], .after = 3) %>% 
    add_row(type = acc[2], .after = 12) %>% 
    fill(latency) %>%
    mutate(latency = case_when(type %in% acc ~ latency + 50,
               TRUE ~ latency), urevent = replace_na(urevent, 0))
#   type latency urevent
#1   222    4167       1
#2   222    4433       2
#3   199    5109       3
#4   201    5159       0
#5   251    5635       4
#6   106    6618       5
#7    88    6980       6
#8    88    7246       7
#9    88    7512       8
#10   88    7778       9
#11   61    8045      10
#12  199    8311      11
#13  200    8361       0
#14  251    8577      12

另一种选择是 group_split 根据'类型'中'199'值的出现情况,创建一个分组列。

library(purrr)
lst1 <- df1 %>%
            group_split(grp = cumsum(type == 199), keep = FALSE)
i1 <-  map_lgl(lst1, ~ .x$type[1] == 199)

lst1[i1] <- map2(lst1[i1], acc, ~
          .x %>%
            add_row(tibble(type = .y, urevent = 0), .after = 1) %>%
            fill(latency) %>% 
            mutate(latency = case_when(type %in% acc ~ latency + 50,
           TRUE ~ latency)))

df2 <- bind_rows(lst1)
df2
# A tibble: 14 x 3
#    type latency urevent
# * <dbl>   <dbl>   <dbl>
# 1   222    4167       1
# 2   222    4433       2
# 3   199    5109       3
# 4   201    5159       0
# 5   251    5635       4
# 6   106    6618       5
# 7    88    6980       6
# 8    88    7246       7
# 9    88    7512       8
#10    88    7778       9
#11    61    8045      10
#12   199    8311      11
#13   200    8361       0
#14   251    8577      12

或者另一种选择是用 uncount 然后 replace 根据重复的行号,相应地在列中增加数值。

df1 %>%
    mutate(rn = row_number()) %>%
    uncount(1 + (type == 199))  %>%
    mutate(type = replace(type, duplicated(rn), acc), 
           urevent = replace(urevent, duplicated(rn), 0)) %>% 
    group_by(rn) %>%
    mutate(latency = if(n() > 1) replace(latency, 2,  first(latency) + 50) 
               else latency) %>%
    ungroup %>%
    select(-rn)
# A tibble: 14 x 3
#    type latency urevent
#   <dbl>   <dbl>   <dbl>
# 1   222    4167       1
# 2   222    4433       2
# 3   199    5109       3
# 4   201    5159       0
# 5   251    5635       4
# 6   106    6618       5
# 7    88    6980       6
# 8    88    7246       7
# 9    88    7512       8
#10    88    7778       9
#11    61    8045      10
#12   199    8311      11
#13   200    8361       0
#14   251    8577      12
© www.soinside.com 2019 - 2024. All rights reserved.