我正在使用lmer模型(来自lmerTest)来了解大小是否与基因表达显着相关,如果是这样,哪些特定基因与大小相关(也将'女性'和'笼'视为随机效应):
lmer(Expression ~ size*genes + (1|female) + (1|cage), data = df)
在摘要输出中,我的一个基因被用作截距(因为它在字母表中最高,'ctsk')。在阅读之后,建议我选择最高(或最低)表达的基因作为我的拦截来比较其他一切。在这种情况下,基因'star'表达最高。在重新调平我的数据并重新运行模型并以'star'作为截距之后,所有其他斜率现在在summary()输出中都很重要,尽管anova()输出是相同的。
我的问题是:
然后,我将使用lsmeans来确定斜率是否彼此显着不同。
这是一些可重现的代码:
df <- structure(list(size = c(13.458, 13.916, 13.356, 13.84, 14.15,
16.4, 15.528, 13.916, 13.458, 13.285, 15.415, 14.181, 13.367,
13.356, 13.947, 14.615, 15.804, 15.528, 16.811, 14.677, 13.2,
17.57, 13.947, 14.15, 16.833, 13.2, 17.254, 16.4, 14.181, 13.367,
14.294, 13.84, 16.833, 17.083, 15.847, 13.399, 14.15, 15.47,
13.356, 14.615, 15.415, 15.596, 15.847, 16.833, 13.285, 15.47,
15.596, 14.181, 13.356, 14.294, 15.415, 15.363, 15.4, 12.851,
17.254, 13.285, 17.57, 14.7, 17.57, 13.947, 16.811, 15.4, 13.399,
14.22, 13.285, 14.344, 17.083, 15.363, 14.677, 15.945), female = structure(c(7L,
12L, 7L, 11L, 12L, 9L, 6L, 12L, 7L, 7L, 6L, 12L, 8L, 7L, 7L,
11L, 9L, 6L, 10L, 11L, 8L, 10L, 7L, 12L, 10L, 8L, 10L, 9L, 12L,
8L, 12L, 11L, 10L, 10L, 9L, 8L, 12L, 6L, 7L, 11L, 6L, 9L, 9L,
10L, 7L, 6L, 9L, 12L, 7L, 12L, 6L, 6L, 6L, 8L, 10L, 7L, 10L,
11L, 10L, 7L, 10L, 6L, 8L, 11L, 7L, 6L, 10L, 6L, 11L, 9L), .Label = c("2",
"3", "6", "10", "11", "16", "18", "24", "25", "28", "30", "31",
"116", "119", "128", "135", "150", "180", "182", "184", "191",
"194", "308", "311", "313", "315", "320", "321", "322", "324",
"325", "329", "339", "342"), class = "factor"), Expression = c(1.10620339407889,
1.06152707257767, 2.03000185674761, 1.92971750056866, 1.30833983462599,
1.02760836165184, 0.960969703469363, 1.54706275342441, 0.314774666283256,
2.63330873720495, 0.895123048920455, 0.917716470037954, 1.3178821021651,
1.57879156856332, 0.633429011784367, 1.12641940390116, 1.0117475796626,
0.687813581350802, 0.923485880847423, 2.98926377892241, 0.547685277701021,
0.967691178046748, 2.04562285257417, 1.09072264997544, 1.57682235413366,
0.967061529758701, 0.941995966023426, 0.299517719292817, 1.8654758451133,
0.651369936708288, 1, 1.04407979584122, 0.799275069735012, 1.007255409328,
0.428129727802404, 0.93927930755046, 0.987394257033815, 0.965050972503591,
2.06719308587322, 1.63846508102874, 0.997380526962644, 0.60270197593643,
2.78682867333149, 0.552922632281237, 3.06702198884562, 0.890708510580522,
1.15168812515828, 0.929205084743164, 2.27254101826041, 1, 0.958147442333527,
1.05924173014089, 0.984356852670054, 0.623630720815415, 0.796864961771971,
2.4679841984147, 1.07248904053777, 1.79630829771291, 0.929642913565982,
0.296954006040077, 2.25741254504115, 1.17188536743493, 0.849778293699644,
2.32679163466857, 0.598119006609413, 0.975660099975423, 1.01494421228949,
1.14007557533352, 2.03638316428189, 0.777347547080068), cage = structure(c(64L,
49L, 56L, 66L, 68L, 48L, 53L, 49L, 64L, 56L, 55L, 68L, 80L, 56L,
64L, 75L, 69L, 53L, 59L, 66L, 63L, 59L, 64L, 68L, 59L, 63L, 50L,
48L, 68L, 80L, 49L, 66L, 59L, 50L, 48L, 63L, 68L, 62L, 56L, 75L,
55L, 81L, 48L, 59L, 56L, 62L, 81L, 68L, 56L, 49L, 55L, 62L, 55L,
63L, 50L, 56L, 59L, 75L, 59L, 64L, 59L, 55L, 63L, 66L, 56L, 53L,
50L, 62L, 66L, 81L), .Label = c("023", "024", "041", "042", "043",
"044", "044 bis", "045", "046", "047", "049", "051", "053", "058",
"060", "061", "068", "070", "071", "111", "112", "113", "123",
"126", "128", "14", "15", "23 bis", "24", "39", "41", "42", "44",
"46 bis", "47", "49", "51", "53", "58", "60", "61", "67", "68",
"70", "75", "76", "9", "D520", "D521", "D522", "D526", "D526bis",
"D533", "D535", "D539", "D544", "D545", "D545bis", "D546", "D561",
"D561bis", "D564", "D570", "D581", "D584", "D586", "L611", "L616",
"L633", "L634", "L635", "L635bis", "L637", "L659", "L673", "L676",
"L686", "L717", "L718", "L720", "L725", "L727", "L727bis"), class = "factor"),
genes = c("igf1", "gr", "ctsk", "ets2", "ctsk", "mtor", "igf1",
"sgk1", "sgk1", "ghr1", "ghr1", "gr", "ctsk", "ets2", "timp2",
"timp2", "ets2", "rictor", "sparc", "mmp9", "gr", "sparc",
"mmp2", "ghr1", "mmp9", "sparc", "mmp2", "timp2", "star",
"sgk1", "mmp2", "gr", "mmp2", "rictor", "timp2", "mmp2",
"mmp2", "mmp2", "mmp2", "rictor", "mtor", "ghr1", "star",
"igf1", "mmp9", "igf1", "igf2", "rictor", "rictor", "mmp9",
"ets2", "ctsk", "mtor", "ghr1", "mtor", "ets2", "ets2", "igf2",
"igf1", "sgk1", "sgk1", "ghr1", "sgk1", "igf2", "star", "mtor",
"igf2", "ghr1", "mmp2", "rictor")), .Names = c("size", "female",
"Expression", "cage", "genes"), row.names = c(1684L, 2674L, 10350L,
11338L, 10379L, 4586L, 1679L, 3637L, 3610L, 5537L, 5530L, 2676L,
10355L, 11313L, 8422L, 8450L, 11322L, 6494L, 9406L, 13262L, 2653L,
9407L, 12274L, 5564L, 13256L, 9394L, 12294L, 8438L, 750L, 3614L,
12303L, 2671L, 12293L, 6513L, 8437L, 12284L, 12305L, 12267L,
12276L, 6524L, 4567L, 5545L, 733L, 1700L, 13241L, 1674L, 7471L,
6528L, 6498L, 13266L, 11308L, 10347L, 4566L, 5541L, 4590L, 11315L,
11333L, 7482L, 1703L, 3607L, 3628L, 5529L, 3617L, 7483L, 722L,
4565L, 7476L, 5532L, 12299L, 6510L), class = "data.frame")
genes <- as.factor(df$genes)
library(lmerTest)
fit1 <- lmer(Expression ~ size * genes +(1|female) + (1|cage), data = df)
anova(fit1)
summary(fit1) # uses the gene 'ctsk' for intercept, so re-level to see what happens if I re-order based on highest value (sgk1):
df$genes <- relevel(genes, "star")
# re-fit the model with 'star' as the intercept:
fit1 <- lmer(Expression ~ size * genes +(1|female) + (1|cage), data = df)
anova(fit1) # no difference here
summary(fit1) # lots of difference
我的样本数据很长,因为模型不会运行 - 希望这没关系!
虽然可以解释拟合模型中的系数,但这并不是最富有成效或最富有成效的方法。相反,只需使用默认使用的任何对比方法来拟合模型,并使用适当的事后分析进行跟进。
为此,我建议使用emmeans(估计的边际均值)包,这是lsmeans的延续,所有未来的发展都将在那里进行。该软件包有几个小插曲,最适合你情况的是vignette("interactions")
,你可以查看here - 特别是关于与协变量交互的部分。
简而言之,比较截距可能会产生误导,因为这些是size = 0
的预测,这是一个推断;而且,正如你在一个问题中所建议的,这里的真正意义可能是比斜率更多地比较斜率。为此目的,有一个emtrends()
函数(或者,如果你愿意,它的别名lstrends()
)。
我还强烈建议您显示模型预测图,以便可以看到正在发生的事情。这可以通过以下方式完成
library(emmeans)
emmip(fit1, gene ~ size, at = list(size = range(df$size)))