我正在尝试创建一个像这样的条形图,其中条形的高度不同(尽管图片中的长度)。
但是我的代码创建了这个
我认为
geom_col
是正确的选择,但我不知道代码的其余部分有什么问题。柱子的高度不应该相同。
我的代码
# Add a new column to indicate age group
young$age_group <- "Young"
old$age_group <- "Old"
# Combine the data frames
combined_df <- bind_rows(young, old)
# Plot
ggplot(combined_df, aes(x = age_group, y = implvdm, fill = gndr)) +
geom_col(position = "dodge") +
labs(x = "Age Group", y = "implvdm", fill = "Gender") +
theme_minimal()
样本数据
young <- structure(list(gndr = c("woman", "man", "man", "man", "woman",
"man", "man", "woman", "woman", "man"), yrbrn = c(2003L, 1999L,
1998L, 1999L, 2002L, 1999L, 2001L, 2004L, 2004L, 1999L), agea = c(18L,
22L, 23L, 22L, 19L, 22L, 21L, 17L, 18L, 22L), impdema = c(6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), implvdm = c(10L, 7L, 9L,
7L, 10L, 10L, 9L, 10L, 8L, 10L), accalaw = c(0L, 2L, 1L, 0L,
5L, 3L, 3L, 0L, 2L, 0L), imwbcnt = c(9L, 2L, 7L, 4L, 0L, 10L,
2L, 3L, 7L, 6L), euftf = c(4L, 3L, 7L, 4L, 6L, 5L, 8L, 5L, 5L,
9L), freehms = c(1L, 3L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L), gincdif = c(2L,
3L, 2L, 3L, 2L, 3L, 2L, 3L, 2L, 3L), prtclffi = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), row.names = c(NA,
-10L), class = "data.frame")
old <- structure(list(gndr = c("man", "woman", "woman", "man", "woman",
"man", "woman", "woman", "woman", "woman"), yrbrn = c(1943L,
1975L, 1974L, 1977L, 1959L, 1993L, 1990L, 1993L, 1958L, 1971L
), agea = c(78L, 46L, 47L, 44L, 63L, 27L, 31L, 29L, 63L, 50L),
impdema = c(6L, 6L, 6L, 5L, 6L, 6L, 6L, 3L, 6L, 1L), implvdm = c(10L,
9L, 9L, 6L, 8L, 10L, 4L, 8L, 9L, 10L), accalaw = c(0L, 8L,
2L, 2L, 2L, 0L, 3L, 3L, 9L, 0L), imwbcnt = c(9L, 1L, 3L,
3L, 5L, 5L, 7L, 4L, 3L, 7L), euftf = c(8L, 2L, 4L, 3L, 4L,
3L, 7L, 5L, 4L, 5L), freehms = c(2L, 2L, 4L, 2L, 2L, 1L,
4L, 2L, 4L, 1L), gincdif = c(2L, 2L, 2L, 3L, 2L, 4L, 1L,
2L, 2L, 3L), prtclffi = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_)), row.names = c(NA, -10L), class = "data.frame")
如果要计算平均值和标准误差,可以使用
state_summary
代替。例如
ggplot(combined_df, aes(x = age_group, y = implvdm, fill = gndr)) +
stat_summary(position = "dodge", geom="bar") +
stat_summary(fun.data = mean_se, geom = "errorbar", position=position_dodge(.9), width=.2) +
labs(x = "Age Group", y = "implvdm", fill = "Gender") +
theme_minimal()
产生
您还可以在绘图之前使用 dplyr
自行预先计算值
combined_df %>%
summarize(mean=mean(implvdm), se=sd(implvdm)/sqrt(length(implvdm)), .by=c(age_group, gndr)) %>%
ggplot(aes(x = age_group, fill = gndr)) +
geom_col(aes(y = mean), position="dodge") +
geom_errorbar(aes(ymin=mean-se, ymax=mean+se), position=position_dodge(.9), width=.3) +
labs(x = "Age Group", y = "implvdm", fill = "Gender") +
theme_minimal()