我在通过从其他作为变量的列名称计算它们的值并同时满足某些条件的情况下获取数据表来创建28个新列时遇到问题。
我试图实现的是区分四种情况:
您可以看到我尝试了许多不同的变体(如果子句2尝试以一种DT方式进行操作),if子句4试图事先获取值(当然,这是行不通的,因为my_ic_value具有所有其中包含数千个值,而不是只有一行)。我尝试失败的其他方法是
all <- all[, ratio_col:= get(ic_col) / get(nc_col)), by=.(m_island_id_1, m_owner_id_1)]
也仅使用列名不起作用,因为它们是字符串,不能被分割:
all <- all[, ratio_col:= (ic_col / nc_col), by=.(m_island_id_1, m_owner_id_1)]
我希望问题已经很清楚了,我期待所有data.table专家向我展示我的代码有多么无希望的混乱,以及它能做多短而快! ;-)
谢谢您的帮助!
这里有一些数据(一小部分):
structure(list(m_island_id_1 = c("020d49b9580f071075cfb6f9da7a426669a56d2d",
"020d49b9580f071075cfb6f9da7a426669a56d2d", "020d49b9580f071075cfb6f9da7a426669a56d2d"
), m_owner_id_1 = c("01d96c16a2caf720617b266ae5d053243cef0920",
"4e25ef4cf04f023d96134856098b8104fa6a6add", "7818c5081257a837e7cfad2c1aba4d90dcd18a69"
), Sum_nc = c(2L, 2L, 2L), Sum_ic = c(4L, 4L, 4L), Sum_ic_2 = c(0L,
0L, 0L), Sum_ic_3 = c(0L, 0L, 0L), Sum_ic_4 = c(0L, 0L, 0L),
Sum_ic_5 = c(0L, 0L, 0L), Sum_ic_6 = c(0L, 0L, 0L), Sum_ic_7 = c(0L,
0L, 0L), Sum_ic_8 = c(0L, 0L, 0L), Sum_ic_9 = c(0L, 0L, 0L
), Sum_ic_10 = c(0L, 0L, 0L), Sum_ic_11 = c(0L, 0L, 0L),
Sum_ic_12 = c(0L, 0L, 0L), Sum_ic_13 = c(0L, 0L, 0L), Sum_ic_14 = c(0L,
0L, 0L), Sum_ic_15 = c(0L, 0L, 0L), Sum_ic_16 = c(0L, 0L,
0L), Sum_ic_17 = c(1L, 1L, 1L), Sum_ic_18 = c(1L, 1L, 1L),
Sum_ic_19 = c(2L, 2L, 2L), Sum_ic_20 = c(0L, 0L, 0L), Sum_ic_21 = c(0L,
0L, 0L), Sum_ic_22 = c(0L, 0L, 0L), Sum_ic_23 = c(0L, 0L,
0L), Sum_ic_24 = c(0L, 0L, 0L), Sum_ic_25 = c(0L, 0L, 0L),
Sum_ic_26 = c(0L, 0L, 0L), Sum_ic_27 = c(0L, 0L, 0L), Sum_ic_28 = c(0L,
0L, 0L), Sum_nc_2 = c(0L, 0L, 0L), Sum_nc_3 = c(0L, 0L, 0L
), Sum_nc_4 = c(0L, 0L, 0L), Sum_nc_5 = c(0L, 0L, 0L), Sum_nc_6 = c(0L,
0L, 0L), Sum_nc_7 = c(0L, 0L, 0L), Sum_nc_8 = c(0L, 0L, 0L
), Sum_nc_9 = c(0L, 0L, 0L), Sum_nc_10 = c(0L, 0L, 0L), Sum_nc_11 = c(0L,
0L, 0L), Sum_nc_12 = c(0L, 0L, 0L), Sum_nc_13 = c(0L, 0L,
0L), Sum_nc_14 = c(0L, 0L, 0L), Sum_nc_15 = c(0L, 0L, 0L),
Sum_nc_16 = c(1L, 1L, 1L), Sum_nc_17 = c(0L, 0L, 0L), Sum_nc_18 = c(1L,
1L, 1L), Sum_nc_19 = c(0L, 0L, 0L), Sum_nc_20 = c(0L, 0L,
0L), Sum_nc_21 = c(0L, 0L, 0L), Sum_nc_22 = c(0L, 0L, 0L),
Sum_nc_23 = c(0L, 0L, 0L), Sum_nc_24 = c(0L, 0L, 0L), Sum_nc_25 = c(0L,
0L, 0L), Sum_nc_26 = c(0L, 0L, 0L), Sum_nc_27 = c(0L, 0L,
0L), Sum_nc_28 = c(0L, 0L, 0L), inhabited = c(TRUE, TRUE,
FALSE), Sum_time_ic = c(3L, 1L, 0L), groupsize = c(2L, 2L,
2L), Ratio_nc_to_ic_16 = c(0, 0, 0), Ratio_nc_to_ic_17 = c(TRUE,
TRUE, TRUE), Ratio_nc_to_ic_18 = c(1, 1, 1), Ratio_nc_to_ic_19 = c(TRUE,
TRUE, TRUE), ratio_col = c(NaN, NaN, NaN), Ratio_nc_to_ic = c(NA,
NA, NA), Ratio_nc_to_ic_ = c(2, 2, 2), Ratio_nc_to_ic_2 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_3 = c(NaN, NaN, NaN), Ratio_nc_to_ic_4 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_5 = c(NaN, NaN, NaN), Ratio_nc_to_ic_6 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_7 = c(NaN, NaN, NaN), Ratio_nc_to_ic_8 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_9 = c(NaN, NaN, NaN), Ratio_nc_to_ic_10 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_11 = c(NaN, NaN, NaN), Ratio_nc_to_ic_12 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_13 = c(NaN, NaN, NaN), Ratio_nc_to_ic_14 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_15 = c(NaN, NaN, NaN), Ratio_nc_to_ic_20 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_21 = c(NaN, NaN, NaN), Ratio_nc_to_ic_22 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_23 = c(NaN, NaN, NaN), Ratio_nc_to_ic_24 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_25 = c(NaN, NaN, NaN), Ratio_nc_to_ic_26 = c(NaN,
NaN, NaN), Ratio_nc_to_ic_27 = c(NaN, NaN, NaN), Ratio_nc_to_ic_28 = c(NaN,
NaN, NaN)), row.names = c(NA, -3L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x000001f487d51ef0>)
这是无效代码:
for (j in 2:28){
ic_col <- paste0("Sum_ic_", j)
nc_col <- paste0("Sum_nc_", j)
ratio_col <- paste0("Ratio_nc_to_ic_", j)
my_ic_value <- all[,get(ic_col)]
my_nc_value <- all[,get(nc_col)]
if ((ic_col == 0) & (nc_col == 0)) {
all <- all[, ratio_col:= NA, by=.(m_island_id_1, m_owner_id_1)] # no incumbents, no newcomers
}
if ((ic_col > 0) & (nc_col == 0)) {
all <- all[, .(ratio_col= ic_col), by=.(m_island_id_1, m_owner_id_1)] # Incumbents without newcomers
}
if ((ic_col == 0) & (nc_col > 0)) {
all <- all[, ratio_col:= 0, by=.(m_island_id_1, m_owner_id_1)] # Newcomers without incumbents
}
if ((ic_col >= 1) & (nc_col >= 1)) {
all <- all[, ratio_col:= (my_ic_value / my_nc_value), by=.(m_island_id_1, m_owner_id_1)] # Newcomers with incumbents
}
}
似乎是按行执行计算,因此您可以删除by
(如果我错了,请纠正我)。使用矩阵运算可以覆盖最后两种情况。然后使用一些索引来处理情况1和2,如下所示:
DT[, (ratio_cols) := {
icmat <- as.matrix(.SD[, mget(ic_cols)])
ncmat <- as.matrix(.SD[, mget(nc_cols)])
m <- icmat / ncmat
#when nc_col==0
m[is.infinite(m)] <- icmat[is.infinite(m)]
#when ic_col==0 & nc_col==0
m[icmat==0 & ncmat==0] <- NA_real_
as.data.table(m)
}]
数据:
#where dat is your sample data and ratio cols are removed
DT <- dat[, .SD, .SDcols=m_island_id_1:groupsize]
rename <- c("Sum_nc","Sum_ic")
setnames(DT, rename, paste0(rename, "_1"))
jcols <- 1:28
ic_cols <- paste0("Sum_ic_", jcols)
nc_cols <- paste0("Sum_nc_", jcols)
ratio_cols <- paste0("Ratio_nc_to_ic_", jcols)