我有一个类似于下面的数据表:
foo bar a1 a2 a3 b1 b2 b3 b4 c1 c2 A_1 A_2 A_3 C_1 C_2
m 19 0 1 2 2 1 3 0 0 2 25 33 61 50 50
f 30 1 2 1 0 4 2 1 2 2 10 43 30 45 73
n 22 0 2 2 1 3 1 0 1 2 7 84 33 12 40
我想将其转换为长格式,每个小写字母变量对应一行。同时,我也想携带与小写变量匹配的大写变量,并在没有匹配时引入 NAs。这是最终结果的片段:
foo bar lower lower_value upper upper_value
m 19 a1 0 A_1 25
m 19 a2 1 A_2 33
...
f 30 b4 1 B_4 NA
...
n 22 c2 2 C_2 40
实现这一目标的最佳方法是什么?
编辑: 这里的数据表供参考:
library(data.table)
dt <- data.table(
foo = c("m", "f", "n"),
bar = c(19, 30, 22),
a1 = c(0, 1, 0),
a2 = c(1, 2, 2),
a3 = c(2, 1, 2),
b1 = c(2, 0, 1),
b2 = c(1, 4, 3),
b3 = c(3, 2, 1),
b4 = c(0, 1, 0),
c1 = c(0, 2, 1),
c2 = c(2, 2, 2),
A_1 = c(25,10,7),
A_2 = c(33,43,84),
A_3 = c(61,30,33),
C_1 = c(50,45,12),
C_2 = c(50,73,40)
)
这是一个潜在的解决方案,但我相信可以有一种更有效的方法来处理这个问题
melted_dt <- melt(dt, value.name = "lower_value", variable.name = "lower",
measure.vars = patterns(lower = "^[a-z]\\d+$")) # melt by lowercase
melted_dt[,upper := toupper(gsub("^([a-z])(\\d+)$", "\\1_\\2", lower))] # create uppercase name variable
melted_dt[,upper_value := ifelse(upper %in% names(melted_dt), get(upper), as.double(NA)), 1:nrow(melted_dt)] # search through the melted data.table to get the uppercase value
melted_dt[, grep("^([A-Z])_\\d+$", names(melted_dt)) := NULL] # drop the old uppercase columns
合并两个熔体?
library(data.table)
merge(
melt(dt, c("foo", "bar"), names(dt)[3:11], "lower", "lower_value"),
melt(dt, c("foo", "bar"), names(dt)[12:16], "upper", "upper_value")[
, lower := tolower(gsub("_", "", upper))
], by = c("foo", "bar", "lower"), all = TRUE
)[is.na(upper), upper := toupper(lower)][]
#> foo bar lower lower_value upper upper_value
#> 1: f 30 a1 1 A1 10
#> 2: f 30 a2 2 A2 43
#> 3: f 30 a3 1 A3 30
#> 4: f 30 b1 0 B1 NA
#> 5: f 30 b2 4 B2 NA
#> 6: f 30 b3 2 B3 NA
#> 7: f 30 b4 1 B4 NA
#> 8: f 30 c1 2 C1 45
#> 9: f 30 c2 2 C2 73
#> 10: m 19 a1 0 A1 25
#> 11: m 19 a2 1 A2 33
#> 12: m 19 a3 2 A3 61
#> 13: m 19 b1 2 B1 NA
#> 14: m 19 b2 1 B2 NA
#> 15: m 19 b3 3 B3 NA
#> 16: m 19 b4 0 B4 NA
#> 17: m 19 c1 0 C1 50
#> 18: m 19 c2 2 C2 50
#> 19: n 22 a1 0 A1 7
#> 20: n 22 a2 2 A2 84
#> 21: n 22 a3 2 A3 33
#> 22: n 22 b1 1 B1 NA
#> 23: n 22 b2 3 B2 NA
#> 24: n 22 b3 1 B3 NA
#> 25: n 22 b4 0 B4 NA
#> 26: n 22 c1 1 C1 12
#> 27: n 22 c2 2 C2 40
#> foo bar lower lower_value upper upper_value
数据
dt <- data.table(
foo = c("m", "f", "n"),
bar = c(19, 30, 22),
a1 = c(0, 1, 0),
a2 = c(1, 2, 2),
a3 = c(2, 1, 2),
b1 = c(2, 0, 1),
b2 = c(1, 4, 3),
b3 = c(3, 2, 1),
b4 = c(0, 1, 0),
c1 = c(0, 2, 1),
c2 = c(2, 2, 2),
A1 = c(25,10,7),
A2 = c(33,43,84),
A3 = c(61,30,33),
C1 = c(50,45,12),
C2 = c(50,73,40)
)