分类变量的分布图

问题描述 投票:0回答:1

我有以下数据表

dt
并希望为每个独特的
nrOrders
构建/创建
delivYear
的密度图:

structure(list(delivYear = c("2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020"), acquiYear = c("2014", "2014", 
"2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014", 
"2014", "2014", "2015", "2015", "2015", "2015", "2015", "2015", 
"2015", "2015", "2015", "2015", "2015", "2015", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2017", "2017", "2017", "2017", "2017", "2017", 
"2017", "2017", "2017", "2017", "2017", "2017", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2017", "2017", 
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017", 
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2016", "2016", "2016", "2016", "2016", "2016", 
"2016", "2016", "2016", "2016", "2016", "2016", "2017", "2017", 
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017", 
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018", 
"2018", "2018", "2018", "2018", "2018", "2018", "2019", "2019", 
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019", 
"2019", "2019", "2020", "2020", "2020", "2020", "2020", "2020", 
"2020", "2020", "2020", "2020", "2020", "2020"), month = structure(c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 
9L, 10L, 11L, 12L), .Label = c("Jan", "Feb", "Mar", "Apr", "May", 
"Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "factor"), 
    nrOrders = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 2, 4, 5, 
    3, 7, 3, 5, 4, 3, 7, 8, 7, 2, 24, 16, 33, 9, 27, 16, 10, 
    27, 9, 31, 35, 11, 11, 25, 15, 18, 19, 19, 8, 27, 34, 43, 
    51, 0, 11, 2, 0, 0, 0, 0, 0, 4, 5, 1, 0, 8, 1, 18, 19, 10, 
    31, 7, 5, 19, 3, 18, 12, 2, 9, 24, 11, 12, 13, 10, 14, 17, 
    24, 20, 14, 13, 4, 0, 27, 6, 5, 13, 14, 13, 20, 17, 64, 3, 
    6, 4, 8, 1, 5, 3, 2, 2, 3, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 
    1, 0, 1, 0, 7, 1, 15, 8, 1, 16, 7, 3, 5, 14, 9, 5, 12, 16, 
    0, 13, 5, 0, 11, 7, 12, 12, 5, 35, 4, 6, 11, 11, 6, 19, 6, 
    22, 19, 52, 61, 44, 4, 6, 9, 1, 6, 2, 2, 1, 1, 0, 0, 0)), row.names = c(NA, 
-168L), class = c("data.table", "data.frame"))

分布/密度图的问题如下: 每个交货年份

nrOrders
的订单数量
delivYear
如何分布在
month
上? 我不知道该怎么做,因为没有连续变量。

如何绘制这个问题的分布/密度图?

r ggplot2 plotly distribution density-plot
1个回答
0
投票

由于您的月份变量是分类变量,因此您可能需要考虑使用

geom_area()
的堆积面积图。另外,如果我理解正确,您应该在继续绘图之前先总结每个月的订单数量:

dt %>% 
  group_by(month,delivYear) %>% 
  summarise(sumOrders = sum(nrOrders)) %>%
  ggplot() + 
  geom_area(aes(x = month, 
                           y = sumOrders, 
                           group = delivYear, 
                           fill = delivYear, 
                           alpha=0.7), 
                       position = "identity") +
  theme_classic()

输出:

© www.soinside.com 2019 - 2024. All rights reserved.