ggplot2 条形图多重重叠置信区间

Question

我正在尝试制作条形图，在 R 中使用 geom_errorbar 时遇到了多个重叠误差条的问题。

## Use ggplot for bargraph
dat3$EDU_B <- as.factor(dplyr::recode(dat3$EDU_B,
                                      "-0.5" = "No BA",
                                      "0.5" = "BA or Higher"))

dat3$DISC_LB <- as.factor(dplyr::recode(dat3$DISC_LB,
                                      "-0.5" = "None",
                                      "0.5" = "Once or More"))

m_graph <- glm(Vac~AGE+SEX+R_Ethgp+hea+SUPP+EDU_B*DISC_LB, data=dat3, family="binomial"(link="logit"))

d_graph<- expand.grid(EDU_B=c("No BA", "BA or Higher"), 
                      DISC_LB = c("None", "Once or More"),
                      AGE = mean(dat3$AGE, na.rm=TRUE), SEX = as.factor(c("1", "2")), R_Ethgp = as.factor(c("1", "2", "3")),
                      hea=mean(dat3$hea, na.rm=TRUE),
                      SUPP=mean(dat3$SUPP, na.rm=TRUE))


d_graph <- ggplotPredict(m_graph, d_graph)

colours <- setNames(c("gray", "black"), 
                    c("None", "Once or More"))

ggplot(d_graph, aes(x = EDU_B, y = Predicted, fill = DISC_LB)) +
geom_bar(stat = 'identity', position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = CILo, ymax = CIHi), position = position_dodge(width = .5), width = .25) +
  labs(x = 'EDU_B',
       y = 'Vac',
       fill = "DISC_LB") +
  theme_bw(base_size = 12) +
  theme(axis.line = element_line(color="black"),
        axis.ticks = element_line(color="black"), panel.border = element_blank(), 
        legend.background = element_blank(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  scale_fill_manual(values=colours)

此代码显示以下条形图：

barplot outcome

dput(d_graph) 结果如下：

structure(list(Predicted = c(0.741733907298891, 0.893756128438394, 
0.753034460658107, 0.844328967902538, 0.690516055803563, 0.867292483919938, 
0.703160805745206, 0.808195916427015, 0.72652210305617, 0.886123699935912, 
0.738253376025301, 0.83380602202825, 0.673614979456173, 0.858060859719625, 
0.686636893380215, 0.795821126990505, 0.756261079495159, 0.900874986909593, 
0.767125555910493, 0.8542194901226, 0.706785307496624, 0.875938400246907, 
0.719035804605874, 0.819892436521564), CILo = c(0.715042531267134, 
0.879798887274131, 0.723169362160149, 0.824203005032667, 0.662243435508124, 
0.850948845952362, 0.671328969582244, 0.78632684692288, 0.683373949583467, 
0.861484206821811, 0.696028073246912, 0.801850170598328, 0.629603044714273, 
0.830165337690498, 0.64352199113624, 0.761701452826876, 0.717667779348929, 
0.881149420462295, 0.730439261597285, 0.827893111224002, 0.666379378466323, 
0.853326521810591, 0.680743413133101, 0.791428074348007), CIHi = c(0.766740741785983, 
0.906265380398855, 0.780654887628222, 0.862535118114526, 0.717431853735625, 
0.882092336078731, 0.733135236691968, 0.828315421381777, 0.765805349072594, 
0.906854159078416, 0.776496600435928, 0.861498477136563, 0.714766483640827, 
0.882025639872601, 0.726751381800773, 0.826170411398003, 0.791114377817812, 
0.917632740850175, 0.800184797380489, 0.877116574262168, 0.744176233516029, 
0.895491256713494, 0.754392395912261, 0.84523184833694), SE = c(0.0266913760317568, 
0.0139572411642636, 0.0298650984979582, 0.0201259628698712, 0.0282726202954383, 
0.0163436379675761, 0.0318318361629623, 0.021869069504135, 0.0431481534727024, 
0.0246394931141003, 0.0422253027783889, 0.0319558514299214, 0.0440119347418992, 
0.0278955220291265, 0.0431149022439746, 0.0341196741636288, 0.0385933001462304, 
0.0197255664472985, 0.0366862943132076, 0.0263263788985977, 0.0404059290303008, 
0.0226118784363158, 0.0382923914727727, 0.0284643621735564), 
    EDU_B = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), levels = c("No BA", 
    "BA or Higher"), class = "factor"), DISC_LB = structure(c(1L, 
    1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 
    1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), levels = c("None", "Once or More"
    ), class = "factor"), AGE = c(1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14, 1.53750183316577e-14, 1.53750183316577e-14, 
    1.53750183316577e-14), SEX = structure(c(1L, 1L, 1L, 1L, 
    2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L), levels = c("1", "2"), class = "factor"), 
    R_Ethgp = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
    ), levels = c("1", "2", "3"), class = "factor"), hea = c(-2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15, -2.95655099318715e-15, 
    -2.95655099318715e-15, -2.95655099318715e-15), SUPP = c(3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16, 3.04891640608375e-16, 
    3.04891640608375e-16, 3.04891640608375e-16)), class = "data.frame", row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24"
))

我该如何解决这个问题？

谢谢！

我希望每个条只有一个错误条，但我运行的代码显示多个重叠条。

Answer 1

正如我在评论中已经猜测的那样，问题在于数据集中每个

EDU_B

和

DISC_LB

的组合有多行，即总共 6 行反映

SEX

和

R_Ethgp

的组合。

解决此问题的一个选项是使用分面，如下所示或过滤数据集或......

library(ggplot2)

ggplot(d_graph, aes(x = EDU_B, y = Predicted, fill = DISC_LB)) +
  geom_bar(stat = "identity", position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = CILo, ymax = CIHi),
    position = position_dodge(width = .5), width = .25
  ) +
  labs(
    x = "EDU_B",
    y = "Vac",
    fill = "DISC_LB"
  ) +
  theme_bw(base_size = 12) +
  theme(
    axis.line = element_line(color = "black"),
    axis.ticks = element_line(color = "black"),
    panel.border = element_blank(),
    legend.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  ) +
  scale_fill_manual(values = colours) +
  facet_grid(SEX ~ R_Ethgp, labeller = label_both)

编辑如果您只想显示两个变量将所有其他协变量固定在某个平均值的效果，那么您必须相应地设置数据。

谷歌搜索时找不到功能

ggplotPredict

。相反，我在下面的代码中使用

ggeffects::ggpredict

和一些随机的假示例数据：

library(ggeffects)
library(ggplot2)

### Example data
set.seed(1)

dat3 <- expand.grid(
  EDU_B = c("No BA", "BA or Higher"),
  DISC_LB = c("None", "Once or More"),
  AGE = c(1:10),
  SEX = factor(c("1", "2")),
  R_Ethgp = factor(c("1", "2", "3")),
  hea = c(1:10),
  SUPP = c(1:10),
  id = 1:10
)
dat3$Vac <- sample(0:1, nrow(dat3), replace = TRUE)
###

m_graph <- glm(Vac ~ AGE + SEX + R_Ethgp + hea + SUPP + EDU_B * DISC_LB,
  data = dat3, family = "binomial"(link = "logit")
)

d_graph <- ggeffects::ggpredict(m_graph, terms = c("EDU_B", "DISC_LB"))

colours <- setNames(
  c("gray", "black"),
  c("None", "Once or More")
)

ggplot(d_graph, aes(x = x, y = predicted, fill = group)) +
  geom_bar(stat = "identity", position = position_dodge(width = .5), width = .5) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
    position = position_dodge(width = .5), width = .25
  ) +
  labs(
    x = "EDU_B",
    y = "Vac",
    fill = "DISC_LB"
  ) +
  theme_bw(base_size = 12) +
  theme(
    axis.line = element_line(color = "black"),
    axis.ticks = element_line(color = "black"), panel.border = element_blank(),
    legend.background = element_blank(),
    panel.grid.major = element_blank(), panel.grid.minor = element_blank()
  ) +
  scale_fill_manual(values = colours)

ggplot2 条形图多重重叠置信区间

问题描述投票：0回答：1

1个回答

最新问题

ggplot2 条形图多重重叠置信区间

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1