我正在尝试通过桑基图可视化我的数据。
我有以下数据框:
sankey1 <- structure(list(pat_id = c(10037, 10264, 10302, 10302, 10302,
10344, 10482, 10482, 10482, 10613, 10613, 10613, 10628, 10851,
11052, 11203, 11214, 11214, 11566, 11684, 11821, 11945, 11945,
11952, 11952, 12122, 12183, 12774, 13391, 13573, 13643, 14298,
14556, 14556, 14648, 14862, 14935, 14935, 14999, 15514, 15811,
16045, 16045, 16190, 16190, 16190, 16220, 16220, 16220, 16220
), contactnummer = c(1, 1, 1, 2, 3, 1, 1, 2, 3, 1, 2, 3, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1,
1, 1, 2, 1, 1, 1, 1, 2, 1, 2, 3, 1, 2, 3, 99), Combo2 = c(1,
1, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1,
2, 4, 4, 1, 5, 1, 1, 1, 1, 3, 3, 1, 5, 1, 1, 3, 1, 1, 1, 1, 1,
3, 6, 3, 1, 1, 1, 1), treatment = c(99, 0, 0, 1, 1, 0, 99, 99,
99, 99, 99, 1, 1, 0, 1, 99, 99, 99, 0, 99, 99, 0, 0, 0, 1, 99,
99, 0, 0, 0, 0, 0, 1, 1, 1, 99, 99, 1, 0, 0, 1, 0, 0, 0, 1, 1,
99, 99, 99, 99)), row.names = c(NA, 50L), class = c("data.table",
"data.frame"))
一个ID号(“pat_id”)可以有多行,每行是一个联系人(“contactnummer”) 我的目标是可视化哪些组合(“combo2”)导致哪些治疗(“治疗”)以及接触方式。
我希望通过桑基图将其可视化(https://r-graph-gallery.com/321-introduction-to-interactive-sankey-diagram-2.html)。
理想情况下,所需的输出看起来类似于这样: 其中左侧的箭头应该可视化组合,这可以导致 3 种不同的治疗。然后向右显示其中哪一个有第二次接触,箭头再次显示导致 3 种治疗的组合。
我尝试使用以下脚本,但没有成功。
library(networkD3)
library(d3Network)
# Create a data frame for the Sankey diagram
sankey_data <- sankey %>%
group_by(pat_id, Combo2, treatment, contactnummer) %>%
summarise(Count = n()) %>%
mutate(Target = lead(treatment), Value = Count) %>%
filter(!is.na(Target))
# Create a list of unique nodes with color attributes
combo2_nodes <- unique(sankey_data$Combo2)
treatment_nodes <- unique(sankey_data$treatment)
nodes <- data.frame(
name = c(combo2_nodes, treatment_nodes),
color = c(rep("Combo2", length(combo2_nodes)), rep("Treatment", length(treatment_nodes)))
)
# Create a list of links
links <- data.frame(
source = match(sankey_data$Combo2, nodes$name) - 1,
target = match(sankey_data$Target, nodes$name) - 1,
value = sankey_data$Value
)
# Create the Sankey diagram with color attributes
sankey_plot <- sankeyNetwork(
Links = links,
Nodes = nodes,
Source = "source",
Target = "target",
Value = "value",
NodeID = "name",
units = "Count",
NodeGroup = "color" # Specify the color attribute
)
# Display the plot
sankey_plot
但这并没有创造出我想要的效果。我对桑基图非常缺乏经验。有什么建议吗?
# load necessary libraries
library(networkD3)
library(d3Network)
library(dplyr)
# define source and target
links <- data.frame(source = paste0('combo_',sankey1$Combo2),
target =paste0('treatment_',sankey1$treatment))
# getting unique nodes
nodes <- data.frame(name = as.character(unique(c(links$source, links$target))))
# now convert as character
links$source <- as.character(links$source)
links$target<- as.character(links$target)
# matching links and node, then indexing to 0
links$source <- match(links$source, nodes$name) - 1
links$target <- match(links$target, nodes$name) - 1
# group by (we are grouping by number of rows)
links <- links %>% group_by(source, target) %>% tally()
# plot it!
sankeyNetwork(Links = links
, Nodes = nodes
, Source = 'source'
, Target = 'target'
, Value = 'n'
, NodeID = 'name'
,fontSize = 15)
你可以这样得到一个情节:
希望有帮助!