我在我的 R 代码中实践了 DRY 原则,但我已经到了无法减少代码行数的地步,我发现它非常重复,我需要你的帮助。
这是一个可重现的例子:
library(tidyverse)
set.seed(2023)
# first, I generate the data
data <- data.frame(var1_01 = c(sample(0:1, 7, replace=T)),
var1_02 = c(sample(0:1, 7, replace=T)),
var1_03 = c(sample(0:1, 7, replace=T)),
var1_04 = c(sample(0:1, 7, replace=T)),
var1_05 = c(sample(0:1, 7, replace=T)),
var1_06 = c(sample(0:1, 7, replace=T)),
var1_07 = c(sample(0:1, 7, replace=T)),
var1_08 = c(sample(0:1, 7, replace=T)),
var1_09 = c(sample(0:1, 7, replace=T)),
var1_10 = c(sample(0:1, 7, replace=T)),
var2_01 = c(sample(30:100, 7, replace=T)),
var2_02 = c(sample(30:100, 7, replace=T)),
var2_03 = c(sample(30:100, 7, replace=T)),
var2_04 = c(sample(30:100, 7, replace=T)),
var2_05 = c(sample(30:100, 7, replace=T)),
var2_06 = c(sample(30:100, 7, replace=T)),
var2_07 = c(sample(30:100, 7, replace=T)),
var2_08 = c(sample(30:100, 7, replace=T)),
var2_09 = c(sample(30:100, 7, replace=T)),
var2_10 = c(sample(30:100, 7, replace=T)))
这是我要减少的代码:
data %<>%
mutate(var3_01 = case_when(var1_01 == 1 ~ var2_01 + 0,
TRUE ~ 0),
var3_02 = case_when(var1_02 == 1 ~ var2_02 + 0,
TRUE ~ 0),
var3_03 = case_when(var1_03 == 1 ~ var2_03 + 0,
TRUE ~ 0),
var3_04 = case_when(var1_04 == 1 ~ var2_04 + 0,
TRUE ~ 0),
var3_05 = case_when(var1_05 == 1 ~ var2_05 + 0,
TRUE ~ 0),
var3_06 = case_when(var1_06 == 1 ~ var2_06 + 0,
TRUE ~ 0),
var3_07 = case_when(var1_07 == 1 ~ var2_07 + 0,
TRUE ~ 0),
var3_08 = case_when(var1_08 == 1 ~ var2_08 + 0,
TRUE ~ 0),
var3_09 = case_when(var1_09 == 1 ~ var2_09 + 0,
TRUE ~ 0),
var3_10 = case_when(var1_10 == 1 ~ var2_10 + 0,
TRUE ~ 0))
目标是如果 var1_* == 1,它为每一行取 var2_* 的值,但是我无法在较短的版本中复制此代码(tidyverse o R Base 版本,这无关紧要).我试过这个:
numbers <- c(paste0("0", 1:5))
data %<>%
mutate(across(starts_with("var1_"), ~ifelse(isTRUE(.x==1), .x:=data[, 6:10], 0), .names="var3_{numbers}"))
...但是这段代码不会生成与扩展版本相同的结果
感谢任何建议!
如果你想维护宽数据格式,我会使用矩阵。像这样的东西:
set.seed(2023)
DF <- do.call(data.frame,
c(setNames(replicate(10, sample(0:1, 7, replace=T), simplify = FALSE),
sprintf("var1_%02d", 1:10)),
setNames(replicate(10, sample(30:100, 7, replace=T), simplify = FALSE),
sprintf("var2_%02d", 1:10))))
foo <- function(a, b) {
a <- as.matrix(a)
b <- as.matrix(b)
b[a == 0] <- 0
colnames(b) <- gsub("var2", "var3", colnames(b))
as.data.frame(b)
}
DF <- cbind(DF, foo(DF[, grepl("var1", names(DF))],
DF[, grepl("var2", names(DF))]))
all.equal(data, DF)
#[1] TRUE
留在 tidyverse 中
可以用
across
,用get
在case_when
内使用,免去重复
cols = names(data)[1:10]
data |>
mutate(across({cols}, \(x){
partName <- gsub("var\\d", "", cur_column())
case_when(x == 1 ~ get(paste0("var2", partName)), TRUE ~ 0)
}, .names = "{paste0('var3_', c(paste0('0', seq(cols)[-length(cols)]), length(cols)))}"))
var1_01 var1_02 var1_03 var1_04 var1_05 var1_06 var1_07 var1_08 var1_09 var1_10 var2_01 var2_02 var2_03 var2_04
1 0 0 1 1 1 0 0 1 1 1 31 74 42 60
2 0 1 0 0 1 0 1 0 1 1 92 63 57 98
3 1 1 0 1 0 0 0 1 1 0 53 89 64 42
4 0 1 0 0 0 1 0 1 1 1 55 37 41 97
5 0 0 0 0 1 1 0 0 0 1 47 87 56 60
6 0 0 1 0 1 0 0 0 0 1 99 73 79 31
7 1 0 0 1 0 0 0 1 1 0 61 44 52 90
var2_05 var2_06 var2_07 var2_08 var2_09 var2_10 var3_01 var3_02 var3_03 var3_04 var3_05 var3_06 var3_07 var3_08
1 60 55 57 67 97 40 0 0 42 60 60 0 0 67
2 97 78 74 30 90 49 0 63 0 0 97 0 74 0
3 77 43 52 84 43 78 53 89 0 42 0 0 0 84
4 95 94 65 86 32 82 0 37 0 0 0 94 0 86
5 47 65 100 70 91 40 0 0 0 0 47 65 0 0
6 93 77 92 57 76 93 0 0 79 0 93 0 0 0
7 46 100 74 35 38 56 61 0 0 90 0 0 0 35
var3_09 var3_10
1 97 40
2 90 49
3 43 0
4 32 82
5 0 40
6 0 93
7 38 0
您可以使用
across(...)
选择变量并重命名,然后将其传递给ifelse()
。
data %>%
mutate(as_tibble(
ifelse(as.matrix(across(contains("var1"), .names = "{sub('var1', 'var3', .col)}")) == 1,
as.matrix(across(contains("var2"))), 0)
))