我正在尝试对下面的数据进行混合模型拟合。
df.urbana <- structure(list(Location = structure(c(2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Monmouth",
"Urbana"), class = "factor"), treatment = structure(c(1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("CC",
"CCS", "CS", "SCS"), class = "factor"), block = structure(c(1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L), .Label = c("1",
"2", "3", "4"), class = "factor"), B.glucosidase = c(0.845077,
1.011463, 0.857032, 0.989803, 0.859022, 0.919467, 1.01717, 0.861689,
0.972332, 0.952922, 0.804431, 0.742634, 1.195837, 1.267285, 1.08571,
1.20097), Protein = c(7933.333333, 7000, 6352.982456, 8153.684211,
6077.894737, 4939.649123, 5002.807018, 6489.122807, 4694.035088,
5901.052632, 4303.859649, 6768.421053, 6159.298246, 6090.526316,
4939.649123, 5262.45614), POX.C = c(683.3528, 595.9173, 635.4315,
672.4234, 847.2944, 745.5665, 778.3548, 735.8141, 395.2647, 570.4148,
458.0383, 535.3851, 678.0293, 670.7419, 335.2923, 562.5674),
yield = c(5583L, 5442L, 5693L, 5739L, 5045L, 4902L, 5006L,
5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L, 4608L
)), row.names = 17:32, class = "data.frame")
然后,我在我的数据中针对所有四种处理(CC,CCS,CS和SCS)计算了成对的p值,以获得p值表(p-table
),如下所示:
mod.yield.U <- lmerTest::lmer(yield ~ treatment + (1|block),data=df.urbana)
summary(mod.yield.U)
p.value.yield.U <- emmeans::emmeans(mod.yield.U, pairwise ~ treatment)
p.value.yield.U <- (summary(p.value.yield.U, adjust = "none") %>% # default adjust is tukey
purrr::pluck("contrasts") %>%
as.data.frame())[c(1,6)]
colnames(p.value.yield.U) <- c("contrasts.contrast", "Yield_Urbana")
mod.B.glucosidase.U <- lmerTest::lmer(B.glucosidase ~ treatment + (1|block),data=df.urbana)
summary(mod.B.glucosidase.U)
p.value.B.glucosidase.U <- emmeans::emmeans(mod.B.glucosidase.U, pairwise ~ treatment)
p.value.B.glucosidase.U <- (summary(p.value.B.glucosidase.U, adjust = "none") %>% # default adjust is tukey
purrr::pluck("contrasts") %>%
as.data.frame())[c(1,6)]
colnames(p.value.B.glucosidase.U) <- c("contrasts.contrast", "B.glucosidase_Urbana")
mod.Protein.U <- lmerTest::lmer(Protein ~ treatment + (1|block), data=df.urbana)
summary(mod.Protein.U)
p.value.Protein.U <- emmeans::emmeans(mod.Protein.U, pairwise ~ treatment)
p.value.Protein.U <- (summary(p.value.Protein.U, adjust = "none") %>% # default adjust is tukey
purrr::pluck("contrasts") %>%
as.data.frame())[c(1,6)]
colnames(p.value.Protein.U) <- c("contrasts.contrast", "Protein_Urbana")
mod.POX.C.U <- lmerTest::lmer(POX.C ~ treatment + (1|block),data=df.urbana)
summary(mod.POX.C.U)
p.value.POX.C.U <- emmeans::emmeans(mod.POX.C.U, pairwise ~ treatment)
p.value.POX.C.U <- (summary(p.value.POX.C.U, adjust = "none") %>% # default adjust is tukey
purrr::pluck("contrasts") %>%
as.data.frame())[c(1,6)]
colnames(p.value.POX.C.U) <- c("contrasts.contrast", "POX.C_Urbana")
# merge all
p_table <- Reduce(
function(x, y, ...) merge(x, y, by = "contrasts.contrast", ...),
list(p.value.yield.U, p.value.Protein.U, p.value.POX.C.U, p.value.B.glucosidase.U)
)
我也已经按以下方法计算了最小二乘均值(LS_MEAN
表:
mod.yield.U <- lmerTest::lmer(yield ~ treatment + (1|block),data=df.urbana)
summary(mod.yield.U)
LS_MEAN.yield.U <- emmeans::emmeans(mod.yield.U, pairwise ~ treatment)
LS_MEAN.yield.U <- as.data.frame(LS_MEAN.yield.U[1])
LS_MEAN.yield.U <- as.data.frame(LS_MEAN.yield.U[c("emmeans.treatment", "emmeans.emmean")])
colnames(LS_MEAN.yield.U) <- c("emmeans.treatment", "Yield_Urbana")
mod.B.glucosidase.U <- lmerTest::lmer(B.glucosidase ~ treatment + (1|block),data=df.urbana)
summary(mod.B.glucosidase.U)
LS_MEAN.B.glucosidase.U <- emmeans::emmeans(mod.B.glucosidase.U, pairwise ~ treatment)
LS_MEAN.B.glucosidase.U <- as.data.frame(LS_MEAN.B.glucosidase.U[1])
LS_MEAN.B.glucosidase.U <- as.data.frame(LS_MEAN.B.glucosidase.U[c("emmeans.treatment", "emmeans.emmean")])
colnames(LS_MEAN.B.glucosidase.U) <- c("emmeans.treatment", "B.glucosidase_Urbana")
mod.Protein.U <- lmerTest::lmer(Protein ~ treatment + (1|block), data=df.urbana)
summary(mod.Protein.U)
LS_MEAN.Protein.U <- emmeans::emmeans(mod.Protein.U, pairwise ~ treatment)
LS_MEAN.Protein.U <- as.data.frame(LS_MEAN.Protein.U[1])
LS_MEAN.Protein.U <- as.data.frame(LS_MEAN.Protein.U[c("emmeans.treatment", "emmeans.emmean")])
colnames(LS_MEAN.Protein.U) <- c("emmeans.treatment", "Protein_Urbana")
mod.POX.C.U <- lmerTest::lmer(POX.C ~ treatment + (1|block),data=df.urbana)
summary(mod.POX.C.U)
LS_MEAN.POX.C.U <- emmeans::emmeans(mod.POX.C.U, pairwise ~ treatment)
LS_MEAN.POX.C.U <- as.data.frame(LS_MEAN.POX.C.U[1])
LS_MEAN.POlX.C.U <- as.data.frame(LS_MEAN.POX.C.U[c("emmeans.treatment", "emmeans.emmean")])
colnames(LS_MEAN.POX.C.U) <- c("emmeans.treatment", "POX.C_Urbana")
# merge all
LS_MEAN <- Reduce(
function(x, y, ...) merge(x, y, by = "emmeans.treatment", ...),
list(LS_MEAN.yield.U, LS_MEAN.Protein.U, LS_MEAN.POX.C.U, LS_MEAN.B.glucosidase.U)
)
我希望得到的最终结果是一个最小二乘均值的表,该表带有字母符号来表示重要的处理组,如下所示(注意:下面的预期表中没有Yield的数据)。用这种格式我能得到最小二乘均值是什么?感谢您的提前帮助!
如果您使过程自动化,这可能会更有趣。您可以使用lapply()
遍历不同的因变量。
将因变量名称放入向量Y
中,并创建基本公式fo
。在每次迭代中,使用update()
更改公式中的因变量。 lapply()
将循环遍历四个y
,结果是列表Mod.U
。
Y <- c("yield", "Protein", "POX.C", "B.glucosidase")
fo <- y ~ treatment + (1|block)
Mod.U <- lapply(Y, function(y) lmerTest::lmer(update(fo, paste(y, "~ .")), data=df.urbana))
((请注意,消息boundary (singular) fit: see ?isSingular
可能在某处有singular fit。)
p值和均值计算可以相同的方式进行。 lapply()
现在将遍历列表Mod.U
的元素。 setNames()
创建您的列名。 (名称有些隐藏,但是str(Mod.U)
显示可以在names(mod.U@frame)[1]
中找到它们。)>
P.value.U <- lapply(Mod.U, function(mod.U) (emmeans::emmeans(mod.U, pairwise ~ treatment) %>% summary(adjust = "none") %>% purrr::pluck("contrasts") %>% as.data.frame)[c(1, 6)] %>% setNames(c("contrasts.contrast", paste0(names(mod.U@frame)[1], "_urbana"))))
由于我们已经有了列表,现在也很容易
Reduce()
。
p_table <- Reduce(function(x, y, ...) merge(x, y, by = "contrasts.contrast", ...), P.value.U) # contrasts.contrast yield_urbana Protein_urbana POX.C_urbana B.glucosidase_urbana # 1 CC - CCS 7.977935e-05 0.005002692 0.084083738 0.8581554114 # 2 CC - CS 7.676418e-06 0.002530699 0.043628964 0.3799550686 # 3 CC - SCS 3.416131e-06 0.004771013 0.235380612 0.0023712583 # 4 CCS - CS 4.505872e-02 0.664550854 0.002028628 0.4785341647 # 5 CCS - SCS 9.777084e-03 0.976231386 0.010602656 0.0018094630 # 6 CS - SCS 3.726498e-01 0.685994660 0.310848786 0.0006411248
我们以类似的方式进行操作。
LS_MEAN.U <- lapply(Mod.U, function(mod.U) (emmeans::emmeans(mod.U, pairwise ~ treatment)[1] %>% as.data.frame)[c("emmeans.treatment", "emmeans.emmean")] %>% setNames(c("emmeans.treatment", paste0(names(mod.U@frame)[1], "_urbana")))) LS_MEAN <- Reduce(function(x, y, ...) merge(x, y, by = "emmeans.treatment", ...), LS_MEAN.U) # emmeans.treatment yield_urbana Protein_urbana POX.C_urbana B.glucosidase_urbana # 1 CC 5614.25 7360.000 646.7812 0.9258438 # 2 CCS 5009.75 5627.368 776.7575 0.9143370 # 3 CS 4802.75 5416.842 489.7757 0.8680797 # 4 SCS 4719.25 5612.982 561.6577 1.1874505
[现在,我仍然不确定预期结果表(将其称为
out
)如何引用p_table
表,但是您可以随意使用paste0()
创建后缀。formatC()
调整为所需的位数。
out <- LS_MEAN[-2]
out$Protein_urbana <- paste0(formatC(LS_MEAN$Protein_urbana, format="f", digits=1),
c("a", "ab", "b", "ab"))
out$POX.C_urbana <- paste0(formatC(LS_MEAN$POX.C_urbana, format="f", digits=2),
c("ab", "a", "b", "b"))
out$B.glucosidase_urbana <- paste0(formatC(LS_MEAN$B.glucosidase_urbana, format="f", digits=3),
c("b", "b", "b", "a"))
names(out) <- c("Treatment", "Protein_U", "POX.C_U", "B.glucosidase_U")
out
# Treatment Protein_U POX.C_U B.glucosidase_U
# 1 CC 7360.0a 646.78ab 0.926b
# 2 CCS 5627.4ab 776.76a 0.914b
# 3 CS 5416.8b 489.78b 0.868b
# 4 SCS 5613.0ab 561.66b 1.187a