我提前为问这个棘手的问题表示歉意。
我有一个看起来像这样的数据集(总共大约有 1000 行,这只是头部)。每列都是 z 评分的血液分析物。
structure(list(alt_zscore = c(1.15628571428571, 0.899333333333333,
NA, -0.730708333333333, 0.0963571428571428, -1.06795833333333
), alb_zscore = c(1.888599484682, 0.134900515317999, NA, 0.6745,
-0.809400515317999, 0.6745), alp_zscore = c(2.99309375, -1.39021528321409,
NA, -0.64982016264779, -0.274015625, -0.304302439716851), calcium_zscore = c(1.09606450959036,
0.449665953945447, NA, -0.674500004496664, -0.33725, -0.674500004496664
), uc_ratio_zscore = c(0.691189771122184, 0.00395552487310546,
NA, -0.955924044178282, -0.545585328858177, -0.54077986726889
), sodium_zscore = c(0.932489252756058, -0.6745, NA, -1.180375,
0.310829750918686, -1.01175), phos_zscore = c(-0.103769544771059,
1.21409991456333, NA, 1.39396640945733, -1.93270290026917, -1.30403359054267
), pot_zscore = c(1.07919974530892, 0.134899228372, NA, -0.404700259007998,
1.21409890946892, -0.269801030635998), pcv_zscore = c(NA, NA,
-0.243018626530217, 1.2141, -0.959399470734058, -0.20235), glob_zscore = c(-1.079198972062,
0.385428307960541, NA, -0.963571690112316, -1.21409948738, -0.963571690112316
), baso_per_ul_zscore = c(NA, NA, NA, NA, NA, -1.2646875), esino_per_ul_zscore = c(NA,
NA, 2.0877380952381, -0.108792935519412, -0.21912328042328, 1.68625
), lympho_per_ul_zscore = c(NA, NA, -0.173182432432432, -1.11988412698413,
-0.525016216216216, 0.565295238095238), mono_per_ul_zscore = c(NA,
NA, 1.01941477272727, -0.332159846897352, 3.01532159090909, 0.844644122370989
), neutro_per_ul_zscore = c(NA, NA, -0.82978274474743, -0.560261015649008,
1.3692018328118, -0.69385232985532), cortisol_zscore = c(9.53660294508432,
8.6351129251796, NA, NA, NA, NA)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
我的目标是创建名为metab_index、imm_index、neuro_index 和dysreg_index 的列。这些是“失调”的指数,而根据某些规则(参见下面的代码)发现失调的任何分析物都会得到“1”。然后,例如,metab_index 是失调的代谢分析物的数量(参见下面的失调规则代码)除以可用代谢分析物的总数(可用意味着该个体没有 NA)。 imm_index 和neuro_index 也是如此。 Dysreg_index 包括所有分析物,未按生理系统划分。我对所有“_index”变量的预期输出是 0(表示没有分析物处于失调状态)和 1(所有分析物都处于失调状态)之间的数字。
进入每个索引的分析物是: metab_index:包括 alt_zscore、alb_zscore、alp_zscore、calcium_zscore、uc_ratio_zscore、sodium_zscore、phos_zscore、pot_zscore、pcv_zscore 和 glob_zscore(如果太低)。
imm_index:包括 glob_zscore(如果太高)、baso_per_ul_zscore、esino_per_ul_zscore、lympho_per_ul_zscore、mono_per_ul_zscore 和 neutro_per_ul_zscore。
neuro_index:仅包括 cortisol_zscore。
dysreg_index:包括以上所有内容。
出现的问题是,我有一种分析物 glob_zscore,该值过高意味着需要将其计入 imm_index,而该值过低则意味着需要将其计入 metab_index。我可以将 glob_zscore 的值分配给 metab_index 或 imm_index 就好,但我的问题出现在尝试将 glob_zscore 分配给总 Dysreg_index 时。如果 glob_zscore 太高或太低,则应将其计入 Dysreg_index 的分子。 Dysreg_index 应该是 0-1 之间的数字,但正如我现在的代码所示,我得到的数字 >1。
这是我的代码,希望能表达我的意思:
funcs <- list(
alt_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
alb_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
alp_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
calcium_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
uc_ratio_zscore = function(z) !is.na(z) & z < quantile(z, 0.25, na.rm = TRUE), # lower quartile (25%) is dysregulation
sodium_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
phos_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
pot_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
pcv_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
glob_zscore = function(z) !is.na(z) & z < quantile(z, 0.25, na.rm = TRUE), # GLOB: lower quartile (25%) is dysregulation for METABOLIC
glob_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), # GLOB: upper quartile (75%) is dysregulation for IMMUNE
baso_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
esino_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
lympho_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
mono_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
neutro_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
cortisol_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)) #lower (20%) or upper (80%) quintile is dysfunction
)
mapply(function(fn, x) fn(x), funcs, df[names(funcs)])
df <- df %>%
mutate(
metab_index = { ## METABOLIC INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[1:10], pick(all_of(names(funcs[1:10])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[1:10]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
imm_index = { ## IMMUNE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[11:16], pick(all_of(names(funcs[11:16])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[11:16]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
neuro_index = { ## NEUROENDOCRINE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[17], pick(all_of(names(funcs[17])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[17]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
dysreg_index = { ## TOTAL DYSREGULATION INDEX (includes metabolic, immune, and neuroendocrine)
numerator <- mapply(function(fn, x) fn(x), funcs, pick(all_of(names(funcs))))
denominator <- (!is.na(pick(all_of(names(funcs))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
)
我的代码中的错误位于 Dysreg_index 参数中的某个位置。我的预期输出是 0-1 之间的数字,但有时我得到的数字 >1。我知道这是因为从技术上讲我有两个用于 glob_zscore 的函数,这可能会增加“分子”中可供选择的函数数量,但我这样做是因为我想将一个函数分配给metab_index,另一个函数分配给metab_index到imm_index。
非常感谢任何帮助。非常感谢!
我不确定这是否算作答案,但这希望有帮助......
运行您的代码时收到此警告:
Warning message:
There was 1 warning in `mutate()`.
ℹ In argument: `dysreg_index = { ... }`.
Caused by warning in `mapply()`:
! longer argument not a multiple of length of shorter
为了在dysreg_index中分别查看分子和分母,我在最后
mutate()
中修改了你的代码:
df2 <- df %>%
mutate(
metab_index = { ## METABOLIC INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[1:10], pick(all_of(names(funcs[1:10])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[1:10]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
imm_index = { ## IMMUNE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[11:16], pick(all_of(names(funcs[11:16])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[11:16]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
neuro_index = { ## NEUROENDOCRINE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[17], pick(all_of(names(funcs[17])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[17]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
# dysreg_index = { ## TOTAL DYSREGULATION INDEX (includes metabolic, immune, and neuroendocrine)
numerator = mapply(function(fn, x) fn(x), funcs, pick(all_of(names(funcs)))),
denominator = (!is.na(pick(all_of(names(funcs))))) # denominator is the number of non-NA elements in a row
# rowSums(numerator) / rowSums(denominator)
# }
)
这显示分子和分母的长度不相等:
> glimpse(df2)
Rows: 6
Columns: 21
$ alt_zscore <dbl> 1.15628571, 0.89933333, NA, -0.73070833, 0.09635714, -1.06795833
$ alb_zscore <dbl> 1.8885995, 0.1349005, NA, 0.6745000, -0.8094005, 0.6745000
$ alp_zscore <dbl> 2.9930937, -1.3902153, NA, -0.6498202, -0.2740156, -0.3043024
$ calcium_zscore <dbl> 1.096065, 0.449666, NA, -0.674500, -0.337250, -0.674500
$ uc_ratio_zscore <dbl> 0.691189771, 0.003955525, NA, -0.955924044, -0.545585329, -0.540779867
$ sodium_zscore <dbl> 0.9324893, -0.6745000, NA, -1.1803750, 0.3108298, -1.0117500
$ phos_zscore <dbl> -0.1037695, 1.2140999, NA, 1.3939664, -1.9327029, -1.3040336
$ pot_zscore <dbl> 1.0791997, 0.1348992, NA, -0.4047003, 1.2140989, -0.2698010
$ pcv_zscore <dbl> NA, NA, -0.2430186, 1.2141000, -0.9593995, -0.2023500
$ glob_zscore <dbl> -1.0791990, 0.3854283, NA, -0.9635717, -1.2140995, -0.9635717
$ baso_per_ul_zscore <dbl> NA, NA, NA, NA, NA, -1.264687
$ esino_per_ul_zscore <dbl> NA, NA, 2.0877381, -0.1087929, -0.2191233, 1.6862500
$ lympho_per_ul_zscore <dbl> NA, NA, -0.1731824, -1.1198841, -0.5250162, 0.5652952
$ mono_per_ul_zscore <dbl> NA, NA, 1.0194148, -0.3321598, 3.0153216, 0.8446441
$ neutro_per_ul_zscore <dbl> NA, NA, -0.8297827, -0.5602610, 1.3692018, -0.6938523
$ cortisol_zscore <dbl> 9.536603, 8.635113, NA, NA, NA, NA
$ metab_index <dbl> 0.5555556, 0.1111111, 0.0000000, 0.5000000, 0.4000000, 0.1000000
$ imm_index <dbl> 0.0000000, 1.0000000, 0.2500000, 0.0000000, 0.4000000, 0.1666667
$ neuro_index <dbl> 1, 1, NaN, NaN, NaN, NaN
$ numerator <lgl[,17]> <matrix[6 x 17]>
$ denominator <lgl[,16]> <matrix[6 x 16]>
所以我研究了这两个矩阵中的内容:
> df2$numerator
alt_zscore alb_zscore alp_zscore calcium_zscore uc_ratio_zscore sodium_zscore phos_zscore pot_zscore pcv_zscore glob_zscore glob_zscore
[1,] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE FALSE FALSE
[2,] FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[3,] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
[5,] FALSE TRUE FALSE FALSE FALSE FALSE TRUE TRUE FALSE TRUE FALSE
[6,] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
baso_per_ul_zscore esino_per_ul_zscore lympho_per_ul_zscore mono_per_ul_zscore neutro_per_ul_zscore cortisol_zscore
[1,] FALSE FALSE FALSE FALSE TRUE TRUE
[2,] FALSE FALSE FALSE FALSE FALSE FALSE
[3,] TRUE FALSE FALSE FALSE FALSE FALSE
[4,] FALSE FALSE FALSE FALSE FALSE FALSE
[5,] FALSE FALSE TRUE TRUE FALSE FALSE
[6,] FALSE TRUE FALSE FALSE FALSE TRUE
上面的矩阵有两列……是的……
glob_zscore
:
当然,分母中不存在重复项:
> colnames(df2$denominator)
[1] "alt_zscore" "alb_zscore" "alp_zscore" "calcium_zscore" "uc_ratio_zscore"
[6] "sodium_zscore" "phos_zscore" "pot_zscore" "pcv_zscore" "glob_zscore"
[11] "baso_per_ul_zscore" "esino_per_ul_zscore" "lympho_per_ul_zscore" "mono_per_ul_zscore" "neutro_per_ul_zscore"
[16] "cortisol_zscore"
因此,当你的分母达到最大值 16 时,你的分子的可能值为 17。(我知道你承认这是一个问题,但我不确定你是否深入到了这个细节,如果这就是你所知道的所有内容,那么很抱歉已经。)
对我来说最干净、最快的解决方案是添加另一个函数,然后重新对齐你的dysreg_index,你已经说过你不想这样做,但我还是这么做了,警告消息消失了,现在的逻辑应该是阻止你得到>1的结果。
glob_zscore()
函数:funcs <- list(
alt_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
alb_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
alp_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
calcium_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
uc_ratio_zscore = function(z) !is.na(z) & z < quantile(z, 0.25, na.rm = TRUE), # lower quartile (25%) is dysregulation
sodium_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
phos_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
pot_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
pcv_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
glob_zscore = function(z) !is.na(z) & z < quantile(z, 0.25, na.rm = TRUE), # GLOB: lower quartile (25%) is dysregulation for METABOLIC
glob_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), # GLOB: upper quartile (75%) is dysregulation for IMMUNE
baso_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
esino_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
lympho_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
mono_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
neutro_per_ul_zscore = function(z) !is.na(z) & z > quantile(z, 0.75, na.rm = TRUE), #upper quartile (75%) is dysregulation
cortisol_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.20, na.rm = TRUE), quantile(z, 0.80, na.rm = TRUE)), #lower (20%) or upper (80%) quintile is dysfunction
glob_zscore = function(z) !is.na(z) & !between(z, quantile(z, 0.25, na.rm = TRUE), quantile(z, 0.75, na.rm = TRUE)) # GLOB: lower (25%) or upper (75%) quartile is dysfunction
)
dysreg_index
df2 <- df %>%
mutate(
metab_index = { ## METABOLIC INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[1:10], pick(all_of(names(funcs[1:10])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[1:10]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
imm_index = { ## IMMUNE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[11:16], pick(all_of(names(funcs[11:16])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[11:16]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
neuro_index = { ## NEUROENDOCRINE INDEX
numerator <- mapply(function(fn, x) fn(x), funcs[17], pick(all_of(names(funcs[17])))) # order of funcs matters !
denominator <- (!is.na(pick(all_of(names(funcs[17]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
) %>%
mutate(
dysreg_index = { ## TOTAL DYSREGULATION INDEX (includes metabolic, immune, and neuroendocrine)
numerator <- mapply(function(fn, x) fn(x), funcs[c(1:9,12:18)], pick(all_of(names(funcs[c(1:9,12:18)]))))
denominator <- (!is.na(pick(all_of(names(funcs[c(1:9,12:18)]))))) # denominator is the number of non-NA elements in a row
rowSums(numerator) / rowSums(denominator)
}
)
> glimpse(df2)
Rows: 6
Columns: 20
$ alt_zscore <dbl> 1.15628571, 0.89933333, NA, -0.73070833, 0.09635714, -1.06795833
$ alb_zscore <dbl> 1.8885995, 0.1349005, NA, 0.6745000, -0.8094005, 0.6745000
$ alp_zscore <dbl> 2.9930937, -1.3902153, NA, -0.6498202, -0.2740156, -0.3043024
$ calcium_zscore <dbl> 1.096065, 0.449666, NA, -0.674500, -0.337250, -0.674500
$ uc_ratio_zscore <dbl> 0.691189771, 0.003955525, NA, -0.955924044, -0.545585329, -0.540779867
$ sodium_zscore <dbl> 0.9324893, -0.6745000, NA, -1.1803750, 0.3108298, -1.0117500
$ phos_zscore <dbl> -0.1037695, 1.2140999, NA, 1.3939664, -1.9327029, -1.3040336
$ pot_zscore <dbl> 1.0791997, 0.1348992, NA, -0.4047003, 1.2140989, -0.2698010
$ pcv_zscore <dbl> NA, NA, -0.2430186, 1.2141000, -0.9593995, -0.2023500
$ glob_zscore <dbl> -1.0791990, 0.3854283, NA, -0.9635717, -1.2140995, -0.9635717
$ baso_per_ul_zscore <dbl> NA, NA, NA, NA, NA, -1.264687
$ esino_per_ul_zscore <dbl> NA, NA, 2.0877381, -0.1087929, -0.2191233, 1.6862500
$ lympho_per_ul_zscore <dbl> NA, NA, -0.1731824, -1.1198841, -0.5250162, 0.5652952
$ mono_per_ul_zscore <dbl> NA, NA, 1.0194148, -0.3321598, 3.0153216, 0.8446441
$ neutro_per_ul_zscore <dbl> NA, NA, -0.8297827, -0.5602610, 1.3692018, -0.6938523
$ cortisol_zscore <dbl> 9.536603, 8.635113, NA, NA, NA, NA
$ metab_index <dbl> 0.5555556, 0.1111111, 0.0000000, 0.5000000, 0.4000000, 0.1000000
$ imm_index <dbl> 0.0000000, 1.0000000, 0.2500000, 0.0000000, 0.4000000, 0.1666667
$ neuro_index <dbl> 1, 1, NaN, NaN, NaN, NaN
$ dysreg_index <dbl> 0.6000000, 0.3000000, 0.2000000, 0.3571429, 0.4285714, 0.1333333
现在是星期五晚上,我在这件事上花费的时间比我愿意承认的还要多。我要喝啤酒......希望这有帮助。