我想找到两个大型数据帧之间的显着相关性(FDR < 0.05). How do I modify the code to run faster? The
cor.mtest(cor)
花了几个小时。
与在原始数据集上运行时不同,在小型数据集(例如下面的示例)上运行并不需要很长时间。
t.bval <- t(bval)
t.expr <- t(expr)
cor <- cor(t.bval, t.expr)
colnames(cor) <- rownames(bval)
mtest <- cor.mtest(cor)
adjusted.pval <- p.adjust(mtest$p, method = "fdr")
sig.cor <- cor * (adjusted.pval < 0.05)
输入:
> dim(bval)
[1] 9844 174
> dim(expr)
[1] 9844 174
> dput(t(bval)[1:5,1:5])
structure(c(0.651202886519379, 0.546932468275249, 0.77990670335116,
0.570367878904668, 0.490859703406345, 0.819127800066518, 0.896778785060273,
0.910419995766152, 0.862881750710962, 0.895738407255165, 0.409754021776346,
0.437562413240227, 0.404271790048482, 0.407834330819103, 0.411767445153759,
0.111173024032316, 0.0732396067281633, 0.0892561968208864, 0.0924103940922992,
0.0745261480840073, 0.727502771503466, 0.745126721066948, 0.72446467629277,
0.740169892735761, 0.730258358871385), dim = c(5L, 5L), dimnames = list(
c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01",
"TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT",
"AAAS", "AACS")))
> dput(t(expr)[1:5,1:5])
structure(c(3.11327062985085, 2.60736439071998, 2.51138760850467,
2.61757871821215, 2.99116809570192, 3.94027812075928, 3.7456302068644,
3.48141956715802, 3.70450896566585, 3.58083202637968, 3.50763791284198,
3.60784204549275, 3.45528021497798, 3.56318932299471, 3.53594747333992,
3.47014111307111, 3.4842944131813, 3.44268580438446, 3.5016009268885,
3.51798634683037, 3.38519953102602, 3.45448967582629, 3.42698278688777,
3.44786587002075, 3.38755257891847), dim = c(5L, 5L), dimnames = list(
c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01",
"TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT",
"AAAS", "AACS")))
在基于
cor.test
的方法中使用 outer
可能会更快。
> f <- \(x, y) unname(unlist(cor.test(t_bval[, x], t_expr[, y])[c('estimate', 'p.value')]))
> o <- outer(colnames(t_bval), colnames(t_expr), Vectorize(f, SIMPLIFY=FALSE))
> array(unlist(o), c(2, ncol(t_bval), ncol(t_bval)),
+ dimnames=list(c('estimate', 'p.value'), colnames(t_bval), colnames(t_expr))) |>
+ aperm(c(2, 3, 1))
, , estimate
A1BG A2M A4GALT AAAS AACS
A1BG -0.3340301 -0.2073497 -0.795970221 -0.95086519 0.02340409
A2M -0.6496883 -0.8823652 -0.002401227 -0.04327964 0.40087554
A4GALT -0.1639098 0.2617615 0.794526178 0.18380488 0.45462281
AAAS 0.3873550 0.5799259 -0.477426368 -0.42213266 -0.36907517
AACS -0.3583838 0.2423275 0.950708293 0.45917286 0.73001439
, , p.value
A1BG A2M A4GALT AAAS AACS
A1BG 0.5827465 0.73789833 0.10718028 0.01297749 0.9702037
A2M 0.2353765 0.04756878 0.99694267 0.94491186 0.5036101
A4GALT 0.7922419 0.67056113 0.10829500 0.76729686 0.4417647
AAAS 0.5194314 0.30539340 0.41607679 0.47894313 0.5409767
AACS 0.5536569 0.69450596 0.01303939 0.43661144 0.1614075
数据:
> dput(t_bval)
structure(c(0.651202886519379, 0.546932468275249, 0.77990670335116,
0.570367878904668, 0.490859703406345, 0.819127800066518, 0.896778785060273,
0.910419995766152, 0.862881750710962, 0.895738407255165, 0.409754021776346,
0.437562413240227, 0.404271790048482, 0.407834330819103, 0.411767445153759,
0.111173024032316, 0.0732396067281633, 0.0892561968208864, 0.0924103940922992,
0.0745261480840073, 0.727502771503466, 0.745126721066948, 0.72446467629277,
0.740169892735761, 0.730258358871385), dim = c(5L, 5L), dimnames = list(
c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01",
"TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT",
"AAAS", "AACS")))
> dput(t_expr)
structure(c(3.11327062985085, 2.60736439071998, 2.51138760850467,
2.61757871821215, 2.99116809570192, 3.94027812075928, 3.7456302068644,
3.48141956715802, 3.70450896566585, 3.58083202637968, 3.50763791284198,
3.60784204549275, 3.45528021497798, 3.56318932299471, 3.53594747333992,
3.47014111307111, 3.4842944131813, 3.44268580438446, 3.5016009268885,
3.51798634683037, 3.38519953102602, 3.45448967582629, 3.42698278688777,
3.44786587002075, 3.38755257891847), dim = c(5L, 5L), dimnames = list(
c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01",
"TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT",
"AAAS", "AACS")))