背景我有这个瀑布图,有两个不同的类别:超普通和普通。我想将它们堆叠在一起并创建这个“楼梯/瀑布”运动。但是,我的函数出现了问题,输出如下:
因此,第一件事是错误的,它使第一个堆叠的条形完全错误。接下来,沿着 x 值没有“阶梯运动”。
可重现的代码在这里:
waterfall_stacked <- function(data, x_title = "Coupon Rate Group", y_title = "Value", graph_title = "Coupon Redemption Data") {
# Ensure the data is ordered correctly
data <- data %>%
arrange(KuponRenteGrouped, Category)
# Calculate cumulative sums and start/end positions
data <- data %>%
group_by(KuponRenteGrouped) %>%
mutate(
CumulativeValue = cumsum(Value)
) %>%
ungroup() %>%
mutate(
Start = lag(CumulativeValue, default = 0),
End = CumulativeValue
)
# Set factor levels to control x-axis order
levels_order <- c("[-0.5, 0 and 0.5%]", "[2.0 and 2.5%]", "[3.0 and 3.5%]", "1.00%", "1.50%", "4.00%",
"5.00%", "6.00%", "Other", "Total")
data$KuponRenteGrouped <- factor(data$KuponRenteGrouped, levels = levels_order)
# Add IDs for plotting
data$id <- as.numeric(factor(data$KuponRenteGrouped, levels = levels_order))
# Adjust colors for the bars
colors <- c("Ordinary" = "#c8f464", "Extra ordinary" = "#ff6969", "Ordinary Total" = "#55646e", "Extra ordinary Total" = "#6e5564")
# Adjust the labels for total rows
data <- data %>%
mutate(Category = ifelse(KuponRenteGrouped == "Total" & Category == "Ordinary", "Ordinary Total",
ifelse(KuponRenteGrouped == "Total" & Category == "Extra ordinary", "Extra ordinary Total", Category)))
# Generate breaks and labels for the x-axis
breaks <- as.numeric(factor(levels_order, levels = levels_order))
labels <- levels_order
# Plot
p <- ggplot(data, aes(x = id, fill = Category)) +
geom_rect(aes(xmin = id - 0.4, xmax = id + 0.4, ymin = Start, ymax = End), color = "black", alpha = 0.95) +
geom_segment(aes(x = id + 0.4, xend = id + 0.6, y = End, yend = End), color = "black") +
scale_x_continuous(breaks = breaks, labels = labels) +
scale_fill_manual(values = colors) +
scale_y_continuous(labels = scales::comma) +
labs(title = graph_title, x = x_title, y = y_title) +
theme_minimal() +
theme(legend.position = "top",
axis.text.x = element_text(angle = 45, hjust = 1, size = 12, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))
return(p)
}
CouponRedemptionData_long <- structure(list(KuponRenteGrouped = c("[-0.5, 0 and 0.5%]", "[-0.5, 0 and 0.5%]",
"[2.0 and 2.5%]", "[2.0 and 2.5%]", "[3.0 and 3.5%]", "[3.0 and 3.5%]",
"1.00%", "1.00%", "1.50%", "1.50%", "4.00%", "4.00%", "5.00%",
"5.00%", "6.00%", "6.00%", "Other", "Other", "Total", "Total"
), Category = c("Ordinary", "Extra ordinary", "Ordinary", "Extra ordinary",
"Ordinary", "Extra ordinary", "Ordinary", "Extra ordinary", "Ordinary",
"Extra ordinary", "Ordinary", "Extra ordinary", "Ordinary", "Extra ordinary",
"Ordinary", "Extra ordinary", "Ordinary", "Extra ordinary", "Ordinary",
"Extra ordinary"), Value = c(-2486.07213523463, -1.63034770324489,
-909.558490247313, -5.05899091123594, -441.215118338973, -44.6833745314423,
-1864.32516031896, -6.66980344773318, -989.904080278175, -2.22086649188098,
-489.756623652007, -72.4061903560187, -774.611623995622, -2311.28579738599,
-56.5953120488995, -1960.36281564223, -10.7295231283663, -2.47831507741966,
-8022.76806724294, -4406.7965015472)), row.names = c(NA, -20L
), class = c("tbl_df", "tbl", "data.frame"))
waterfall_stacked(CouponRedemptionData_long)
我之前已使此功能仅适用于单个类别:
waterfall <- function(data, x_title = "X-axis Label", y_title = "Y-axis Label", graph_title = "Title", date_format = "%b %y") {
desc_col <- names(data)[1]
amount_col <- names(data)[2]
data[[desc_col]] <- factor(data[[desc_col]], levels = data[[desc_col]])
data$id <- seq_along(data[[amount_col]])
data$type <- ifelse(data[[amount_col]] > 0, "increase", "decrease")
data[nrow(data), "type"] <- "net" # Set last as net
data$end <- cumsum(data[[amount_col]])
data$end <- c(head(data$end, -1), 0)
data$start <- c(0, head(data$end, -1))
data$type <- factor(data$type, levels = c("decrease", "increase", "net"))
colors <- get_default_colors() # Get colors from function
p <- ggplot(data, aes(x = as.numeric(id), fill = type)) +
geom_rect(aes(xmin = id - 0.45, xmax = id + 0.45, ymin = end, ymax = start)) +
scale_x_continuous(breaks = data$id, labels = data[[desc_col]]) +
scale_y_continuous(labels = scales::comma) + # Use comma for thousands separator
scale_fill_manual(values = c("decrease" = colors[7], # Red-pink for decrease
"increase" = colors[3], # Lime green for increase
"net" = colors[1])) + # Dark blue for net
geom_text(aes(x = id, y = ifelse(type == "increase", end, start), label = scales::comma(data[[amount_col]]), vjust = ifelse(data[[amount_col]] > 0, -0.3, 1.3)), size = 3, fontface = "bold") +
labs(title = graph_title, x = x_title, y = y_title) +
theme_minimal() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 45, hjust = 1, size = 12, face = "bold"),
axis.text.y = element_text(size = 12, face = "bold"),
axis.title.x = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 14, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 16, face = "bold"))
return(p)
}
CouponResultTotalRedemptions<- structure(list(KuponRenteGrouped = c("0.0", "1.00%", "1.50%",
"2.0", "3.0", "4.00%", "5.00%", "6.00%", "Other", "Total redemptions"
), TotalRedemptions = c(-2487.702491, -1870.994976, -992.124955,
-914.617489, -485.89851, -562.16284, -3085.897447, -2017.09004,
-13.337623, -12429.826371)), row.names = c(NA, -10L), class = "data.frame")
RedemptionsWaterfall <- waterfall(CouponResultTotalRedemptions, "Coupon rates", "", "Redemptions over the latest notice period")
它具有我需要的楼梯结构 - 但此函数无法处理我尝试合并到分类瀑布图中的类别。有什么想法可以解决这个问题吗?
确保在计算总和后,不要(重新)安排映射到 x 轴的任何内容。将中间数据帧打印到控制台(在最后一个 mutate 之后添加 magrittr 的 %T>% print() )并检查 Start 和 End 是否按 id 正确排序。