两个数据帧之间存在显着相关性

问题描述 投票:0回答:1

我想找到两个大型数据帧之间的显着相关性(FDR < 0.05). How do I modify the code to run faster? The

cor.mtest(cor)
花了几个小时。 与在原始数据集上运行时不同,在小型数据集(例如下面的示例)上运行并不需要很长时间。

t.bval <- t(bval)
t.expr <- t(expr)
cor <- cor(t.bval, t.expr)
colnames(cor) <- rownames(bval)
mtest <- cor.mtest(cor)
adjusted.pval <- p.adjust(mtest$p, method = "fdr")
sig.cor <- cor * (adjusted.pval < 0.05)

输入:

> dim(bval)
[1] 9844  174
> dim(expr)
[1] 9844  174

> dput(t(bval)[1:5,1:5])
structure(c(0.651202886519379, 0.546932468275249, 0.77990670335116, 
0.570367878904668, 0.490859703406345, 0.819127800066518, 0.896778785060273, 
0.910419995766152, 0.862881750710962, 0.895738407255165, 0.409754021776346, 
0.437562413240227, 0.404271790048482, 0.407834330819103, 0.411767445153759, 
0.111173024032316, 0.0732396067281633, 0.0892561968208864, 0.0924103940922992, 
0.0745261480840073, 0.727502771503466, 0.745126721066948, 0.72446467629277, 
0.740169892735761, 0.730258358871385), dim = c(5L, 5L), dimnames = list(
    c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT", 
    "AAAS", "AACS")))

> dput(t(expr)[1:5,1:5])
structure(c(3.11327062985085, 2.60736439071998, 2.51138760850467, 
2.61757871821215, 2.99116809570192, 3.94027812075928, 3.7456302068644, 
3.48141956715802, 3.70450896566585, 3.58083202637968, 3.50763791284198, 
3.60784204549275, 3.45528021497798, 3.56318932299471, 3.53594747333992, 
3.47014111307111, 3.4842944131813, 3.44268580438446, 3.5016009268885, 
3.51798634683037, 3.38519953102602, 3.45448967582629, 3.42698278688777, 
3.44786587002075, 3.38755257891847), dim = c(5L, 5L), dimnames = list(
    c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT", 
    "AAAS", "AACS")))
r correlation
1个回答
0
投票

在基于

cor.test
的方法中使用
outer
可能会更快。

> f <- \(x, y) unname(unlist(cor.test(t_bval[, x], t_expr[, y])[c('estimate', 'p.value')]))
> o <- outer(colnames(t_bval), colnames(t_expr), Vectorize(f, SIMPLIFY=FALSE))
> array(unlist(o), c(2, ncol(t_bval), ncol(t_bval)), 
+       dimnames=list(c('estimate', 'p.value'), colnames(t_bval), colnames(t_expr))) |> 
+   aperm(c(2, 3, 1))
, , estimate

             A1BG        A2M       A4GALT        AAAS        AACS
A1BG   -0.3340301 -0.2073497 -0.795970221 -0.95086519  0.02340409
A2M    -0.6496883 -0.8823652 -0.002401227 -0.04327964  0.40087554
A4GALT -0.1639098  0.2617615  0.794526178  0.18380488  0.45462281
AAAS    0.3873550  0.5799259 -0.477426368 -0.42213266 -0.36907517
AACS   -0.3583838  0.2423275  0.950708293  0.45917286  0.73001439

, , p.value

            A1BG        A2M     A4GALT       AAAS      AACS
A1BG   0.5827465 0.73789833 0.10718028 0.01297749 0.9702037
A2M    0.2353765 0.04756878 0.99694267 0.94491186 0.5036101
A4GALT 0.7922419 0.67056113 0.10829500 0.76729686 0.4417647
AAAS   0.5194314 0.30539340 0.41607679 0.47894313 0.5409767
AACS   0.5536569 0.69450596 0.01303939 0.43661144 0.1614075

数据:

> dput(t_bval)
structure(c(0.651202886519379, 0.546932468275249, 0.77990670335116, 
0.570367878904668, 0.490859703406345, 0.819127800066518, 0.896778785060273, 
0.910419995766152, 0.862881750710962, 0.895738407255165, 0.409754021776346, 
0.437562413240227, 0.404271790048482, 0.407834330819103, 0.411767445153759, 
0.111173024032316, 0.0732396067281633, 0.0892561968208864, 0.0924103940922992, 
0.0745261480840073, 0.727502771503466, 0.745126721066948, 0.72446467629277, 
0.740169892735761, 0.730258358871385), dim = c(5L, 5L), dimnames = list(
    c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT", 
    "AAAS", "AACS")))
> dput(t_expr)
structure(c(3.11327062985085, 2.60736439071998, 2.51138760850467, 
2.61757871821215, 2.99116809570192, 3.94027812075928, 3.7456302068644, 
3.48141956715802, 3.70450896566585, 3.58083202637968, 3.50763791284198, 
3.60784204549275, 3.45528021497798, 3.56318932299471, 3.53594747333992, 
3.47014111307111, 3.4842944131813, 3.44268580438446, 3.5016009268885, 
3.51798634683037, 3.38519953102602, 3.45448967582629, 3.42698278688777, 
3.44786587002075, 3.38755257891847), dim = c(5L, 5L), dimnames = list(
    c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01"), c("A1BG", "A2M", "A4GALT", 
    "AAAS", "AACS")))
© www.soinside.com 2019 - 2024. All rights reserved.