棒棒糖图表尺寸和线宽参数失败且标签未出现

问题描述 投票:0回答:1

我正在使用

ggplot2
创建一个棒棒糖图表,比较美国大学学费和家庭收入中位数(针对所有种族,特别是黑人家庭)。为了使图表更易于阅读,我想将家庭收入的两个条形的线宽和点大小设置为 1.3 和 5,同时将其他条形(学费和成本)的线宽和点大小设置为 0.7 和 2然而,由于某种原因,R 将我的参数应用于黑人家庭和佛罗里达大学的条形图(见图),而不是将它们应用于两个收入条形图,但我不知道如何修复它。

此外,我设置的 x 和 y 标签不会应用于图表。相反,左上角有一个小

namerank
盘旋,右下角有一个
y
徘徊。我也不知道如何让他们消失。

当前图表

这是我的代码:

clg_fee|>
  arrange(costatt)|>
  mutate(namerank = factor(namerank, namerank))|>
  ggplot() +
  geom_segment(
    aes(x=namerank, 
        xend=namerank, 
        y=0, 
        yend=costatt, 
       color = ifelse(namerank %in% c("Real Median Household Income (2022)", 
                                   "Real Median Household Income (Black, 2022)"), 
                   "Median Household Income","Cost of Attendance (out-of-state)")),
    linewidth = ifelse(clg_fee$namerank %in% c("Real Median Household Income (2022)", 
                                       "Real Median Household Income (Black, 2022)"), 
                       1.3,0.7) #cost of attendance and income
  )+
  geom_segment(
    aes(x=namerank, 
        xend=namerank, 
        y=0, 
        yend=out_state, 
        color = "Tuition (out-of-state)"),
    linewidth = 0.7 #out_state tuition
    )+
  geom_point(aes(x = namerank, 
                 y=out_state, 
                 color="Tuition (out-of-state)"),
             size = 2)+ #out_state tuition
  geom_point(aes(x = namerank, y = costatt,
                 color = ifelse(namerank %in% c("Real Median Household Income (2022)", 
                                            "Real Median Household Income (Black, 2022)"), 
                            "Median Household Income","Cost of Attendance (out-of-state)")),
             size = ifelse(clg_fee$namerank %in% c("Real Median Household Income (2022)", 
                                            "Real Median Household Income (Black, 2022)"), 
                            5, 2))+ #cost of attendance and income
  geom_segment(
    aes(x=namerank, 
        xend=namerank, 
        y=0, 
        yend=in_state, 
        color = "Tuition (in-state)"),
    linewidth = 0.7 #in_state
  )+ 
  geom_point(aes(x = namerank, y=in_state, color = "Tuition (in-state)"), size = 2)+ #in_state
  coord_flip() +
  scale_y_continuous(labels = scales::label_number(scale_cut = scales::cut_short_scale(), suffix = "$"))+
  scale_color_manual(
    values = c(
      "Tuition (out-of-state)" = "#779ECB",
      "Tuition (in-state)" = "#77DD77",
      "Median Household Income" = "orange",
      "Cost of Attendance (out-of-state)" = "#757575"
    )
  )+
  theme_ipsum()+
  theme(legend.position = "top")+
  labs(
    xlab = "",
    ylab = "Undergraduate costs and tuition",
    color = "",
    title = "University costs are far from affordable",
    caption = "Tuition fees source: Visual Capitalist
    Note that in-state tuition data is unavailable for most universities \n
    Cost of attendance source: University websites
    Note that official estimations of cost of attendance are unavailable for Boston College and Northeastern"
  )

这是我的数据: 由于我想按降序排列大学的入学成本并将家庭收入放在同一排名中,因此我在数据框中插入了美国收入中位数作为两行,并将收入值放在

costatt
下(代表就学成本)出席)

structure(list(namerank = c("Real Median Household Income (2022)", 
"Real Median Household Income (Black, 2022)", "University of Southern California(Rank28)", 
"Brown University(Rank9)", "Duke University(Rank7)", "University of Pennsylvania(Rank6)", 
"Cornell University(Rank12)", "Northwestern University(Rank9)", 
"University of Chicago(Rank12)", "Columbia University(Rank12)", 
"Dartmouth College(Rank18)", "Georgetown University(Rank22)", 
"Yale University(Rank5)", "Vanderbilt University(Rank18)", "Carnegie Mellon University(Rank24)", 
"Johns Hopkins University(Rank9)", "California Institute of Technology(Rank7)", 
"Washington University, St. Louis(Rank24)", "University of Notre Dame(Rank20)", 
"Stanford University(Rank3)", "Emory University(Rank24)", "Massachusetts Institute of Technology(Rank2)", 
"Princeton University(Rank1)", "Harvard University(Rank3)", "University of Virginia(Rank24)", 
"Rice University(Rank17)", "University of Michigan, Ann Arbor(Rank21)", 
"University of California, San Diego(Rank28)", "University of California, Berkeley(Rank15)", 
"University of California, LA(Rank15)", "University of California, Davis(Rank28)", 
"University of North Carolina at Chapel Hill(Rank22)", "University of Florida(Rank28)"
), rank = c(NA, NA, 28, 9, 7, 6, 12, 9, 12, 12, 18, 22, 5, 18, 
24, 9, 7, 24, 20, 3, 24, 2, 1, 3, 24, 17, 21, 28, 15, 15, 28, 
22, 28), school_name = c(NA, NA, "University of\r\r\r\nSouthern California", 
"Brown University", "Duke University", "University of\r\r\r\nPennsylvania", 
"Cornell University", "Northwestern University", "University of Chicago", 
"Columbia University", "Dartmouth College", "Georgetown University", 
"Yale University", "Vanderbilt University", "Carnegie Mellon University", 
"Johns Hopkins\r\r\r\nUniversity", "California Institute\r\r\r\nof Technology", 
"Washington\r\r\r\nUniversity, St. Louis", "University of Notre Dame", 
"Stanford University", "Emory University", "Massachusetts\r\r\r\nInstitute of\r\r\r\nTechnology", 
"Princeton University", "Harvard University", "University of Virginia", 
"Rice University", "University of\r\r\r\nMichigan, Ann Arbor", 
"University of\r\r\r\nCalifornia, San Diego", "University of\r\r\r\nCalifornia, Berkeley", 
"University of\r\r\r\nCalifornia, LA", "University of\r\r\r\nCalifornia, Davis", 
"University of North\r\r\r\nCarolina at Chapel Hill", "University of Florida"
), state = c(NA, NA, "California", "Rhode Island", "North Carolina", 
"Pennsylvania", "New York", "Illinois", "Illinois", "New York", 
"New Hampshire", "Washington, DC", "Connecticut", "Tennessee", 
"Pennsylvania", "Maryland", "California", "Missouri", "Indiana", 
"California", "Georgia", "Massachusetts", "New Jersey", "Massachusetts", 
"Virginia", "Texas", "Michigan", "California", "California", 
"California", "California", "North Carolina", "Florida"), out_state = c(NA, 
NA, 68237, 68230, 66172, 66104, 66014, 65997, 65619, 65524, 65511, 
65082, 64700, 63946, 63829, 63340, 63255, 62982, 62693, 62484, 
60774, 60156, 59710, 59076, 58950, 58128, 57273, 48630, 48465, 
46326, 46043, 39338, 28658), in_state = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, 22323, NA, 17786, 16056, 15891, 13752, 15266, 8998, 
6381), costatt = c(74580, 52860, 95225, 91676, 88938, 92228, 
83296, 91290, 89040, 88942, 91312, 88782, 90975, 89590, 73000, 
86065, 80028, 87644, 86125, 92892, 88414, 82720, 86700, 91166, 
91440, 86279, 76294, 77886, 78582, 67959, 78996, 66372, 45808
)), class = "data.frame", row.names = c(NA, -33L))

起初我没有

linewidth
size
参数,并且情节创建没有任何问题。添加参数后,R 开始警告我,即使我彻底检查了管道,也找不到
namerank
。我在我的规模参数中添加了
clg_fee$
- 因此
size = ifelse(clg_fee$namerank etc. etc.)
它解决了问题,但现在我突出显示了黑人家庭的中位收入和佛罗里达大学,而不是中位收入的两个条形图。

对于标签,我尝试在标签参数中设置

xlab = NULL
,但这不起作用。

r ggplot2 geom-bar geom-point
1个回答
0
投票

问题是您在将数据传递到

ggplot()
之前对数据进行了重新排序,但使用基于原始“无序”数据集的
linewidth
设置了
size
ifelse
。相反,我建议根据美学进行映射,并使用
linewidth
size
设置您所需的
scale_xxx_identity
scale_xxx_manual
值,就像我在下面的代码中所做的那样。这两种方法都需要稍微多一点的工作,但不太容易出错:

注意:由于类别标签很长,我将标题和图例与

"plot"
(而不是
"panel"
)对齐,至少
legend.location
需要
ggplot2 >= 3.5.0

library(ggplot2)
library(dplyr, warn = FALSE)
library(hrbrthemes)

clg_fee |>
  arrange(costatt) |>
  mutate(namerank = factor(namerank, namerank)) |>
  mutate(
    costatt_or_income = ifelse(
      grepl("^Real Median", namerank),
      "Median Household Income", "Cost of Attendance (out-of-state)"
    )
  ) |>
  ggplot(aes(x = namerank, xend = namerank)) +
  geom_segment(
    aes(
      y = 0,
      yend = costatt,
      color = costatt_or_income,
      linewidth = costatt_or_income
    )
  ) +
  geom_point(
    aes(
      y = costatt,
      color = costatt_or_income,
      size = costatt_or_income
    )
  ) +
  geom_segment(
    aes(
      y = 0,
      yend = out_state,
      color = "Tuition (out-of-state)",
      linewidth = "Tuition (out-of-state)"
    )
  ) +
  geom_point(
    aes(
      y = out_state,
      color = "Tuition (out-of-state)",
      size = "Tuition (out-of-state)"
    )
  ) +
  geom_segment(
    aes(
      y = 0,
      yend = in_state,
      color = "Tuition (in-state)",
      linewidth = "Tuition (in-state)"
    )
  ) +
  geom_point(aes(
    y = in_state, color = "Tuition (in-state)", size = "Tuition (in-state)"
  )) +
  coord_flip() +
  scale_y_continuous(labels = scales::label_number(
    scale_cut = scales::cut_short_scale(), suffix = "$"
  )) +
  scale_color_manual(
    values = c(
      "Tuition (out-of-state)" = "#779ECB",
      "Tuition (in-state)" = "#77DD77",
      "Median Household Income" = "orange",
      "Cost of Attendance (out-of-state)" = "#757575"
    )
  ) +
  scale_linewidth_manual(
    values = c(
      "Median Household Income" = 1.3,
      "Cost of Attendance (out-of-state)" = .7,
      "Tuition (out-of-state)" = .7,
      "Tuition (in-state)" = .7
    ),
    guide = "none"
  ) +
  scale_size_manual(
    values = c(
      "Median Household Income" = 5,
      "Cost of Attendance (out-of-state)" = 2,
      "Tuition (out-of-state)" = 2,
      "Tuition (in-state)" = 2
    ),
    guide = "none"
  ) +
  theme_ipsum() +
  theme(
    legend.position = "top",
    plot.title.position = "plot",
    legend.location = "plot"
  ) +
  labs(
    xlab = "",
    ylab = "Undergraduate costs and tuition",
    color = NULL,
    title = "University costs are far from affordable",
    caption = "Tuition fees source: Visual Capitalist
    Note that in-state tuition data is unavailable for most universities \n
    Cost of attendance source: University websites
    Note that official estimations of cost of attendance are unavailable for Boston College and Northeastern"
  )

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.