我有一个数据框,和一个只有两个数字的向量。201和200
type <- c(222, 222, 199, 251, 106, 88, 88, 88, 88, 61, 199, 251)
latency <- c(4167, 4433, 5109, 5635, 6618, 6980, 7246, 7512, 7778, 8045, 8311, 8577)
urevent <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
acc <- c(201, 200)
df1 = data.frame(type, latency, urevent)
我需要添加来自 acc
在栏目中 type
,在每一个实例下 df1$type == 199
,依次进行。有相等数量的199和数字的实例,在 acc
.
latency
应该从上面的行中复制出来,然后再加上50。urevent
在每一行中,应该是0。
下面是所需的输出结果。
----------------------------
type | latency | urevent |
----------------------------
222 | 4167 | 1 |
222 | 4433 | 2 |
199 | 5109 | 3 |
201 | 5159 | 0 |
251 | 5635 | 4 |
106 | 6618 | 5 |
88 | 6980 | 6 |
88 | 7246 | 7 |
88 | 7512 | 8 |
88 | 7778 | 9 |
61 | 8045 | 10 |
199 | 8311 | 11 |
200 | 8361 | 0 |
251 | 8577 | 12 |
---------------------------
我们可以用 add_row
从 "叽叽喳喳 "开始
library(tibble)
library(dplyr)
library(tidyr)
df1 %>%
add_row(type = acc[1], .after = 3) %>%
add_row(type = acc[2], .after = 12) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency), urevent = replace_na(urevent, 0))
# type latency urevent
#1 222 4167 1
#2 222 4433 2
#3 199 5109 3
#4 201 5159 0
#5 251 5635 4
#6 106 6618 5
#7 88 6980 6
#8 88 7246 7
#9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
另一种选择是 group_split
根据'类型'中'199'值的出现情况,创建一个分组列。
library(purrr)
lst1 <- df1 %>%
group_split(grp = cumsum(type == 199), keep = FALSE)
i1 <- map_lgl(lst1, ~ .x$type[1] == 199)
lst1[i1] <- map2(lst1[i1], acc, ~
.x %>%
add_row(tibble(type = .y, urevent = 0), .after = 1) %>%
fill(latency) %>%
mutate(latency = case_when(type %in% acc ~ latency + 50,
TRUE ~ latency)))
df2 <- bind_rows(lst1)
df2
# A tibble: 14 x 3
# type latency urevent
# * <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12
或者另一种选择是用 uncount
然后 replace
根据重复的行号,相应地在列中增加数值。
df1 %>%
mutate(rn = row_number()) %>%
uncount(1 + (type == 199)) %>%
mutate(type = replace(type, duplicated(rn), acc),
urevent = replace(urevent, duplicated(rn), 0)) %>%
group_by(rn) %>%
mutate(latency = if(n() > 1) replace(latency, 2, first(latency) + 50)
else latency) %>%
ungroup %>%
select(-rn)
# A tibble: 14 x 3
# type latency urevent
# <dbl> <dbl> <dbl>
# 1 222 4167 1
# 2 222 4433 2
# 3 199 5109 3
# 4 201 5159 0
# 5 251 5635 4
# 6 106 6618 5
# 7 88 6980 6
# 8 88 7246 7
# 9 88 7512 8
#10 88 7778 9
#11 61 8045 10
#12 199 8311 11
#13 200 8361 0
#14 251 8577 12