网络 D3 Sankey 图链接数据帧创建:绕过具有 NA 的节点,并自动修正值?

问题描述 投票:0回答:1

我正在尝试使用 NetworkD3 创建桑基图来绘制通过 A&E 部门的患者流程,并带有示例数据框:

`First_Contact <- c("UTC", "UTC", "111", "111")Second_Contact <- c(NA, "ED - ED RV", "UTC", "UTC")Third_Contact <- c(NA, NA, "ED - ED RV", "ED - ED RV")Final_Pathway_Outcome <- c("Discharged", "Discharged", "Discharged", "Discharged")

df <- data.frame(First_Contact, Second_Contact, Third_Contact, Final_Pathway_Outcome)

其中 NA 表示患者在出院前不需要进行进一步的患者接触,即第 1 行患者去了 UTC,然后出院)

我正在遵循 https://rpubs.com/droach/CPP526-codethrough 上的代码通过,使用下面的代码效果良好,但遇到两个问题:

  1. 当患者无需经过第二次第三次接触即可达到最终途径结果时,我的原始数据框的旅程为 UTC -> NA -> NA -> 出院。

    filter(!is.na(target))
    正在过滤掉以 NA 为目标的行,但在我的数据透视表中,我目前正在手动将源列中的 NA 替换为上一行中的适当目标。有没有办法更优雅地做到这一点?

  2. 我的数据集将比上面的示例大得多,并且此代码当前单独处理每一行,甚至是重复的行。有没有办法自动聚合这些并相应地调整值?

##Adding row numbers and pivoting data
links.df <- df %>%
    mutate(row = row_number()) %>%
    pivot_longer(cols= -row, names_to="column", values_to="source")
##Creating target column and specifying link order
links.df <- links.df %>%
    mutate(column= match(column, names(trial))) %>%
    group_by(row) %>%
    mutate(target= lead(source, order_by= column)) %>%
    filter(!is.na(target)) %>%
    ungroup()
##Differentiating between areas in each contact
links.df <- links.df %>%
    mutate(source = paste0(source, "", column)) %>%
    mutate(target= paste0(target, "", column+1)) %>%
    select(row, column, source, target)
##Extra modification to swap the value of "NAs" with the target from the previous row. Currently doing this manually
links.df[2,3] <- "UTC_2"links.df[5,3] <- "ED - ED RV_3"
##Creating data frame for nodes
nodes.df <- data.frame(name=unique(c(links.df$source, links.df$target)))nodes.df$label <- sub('_[0-9]*$', '', nodes.df$name)
##Providing instructions for Sankey Diagram (source and target ids)
links.df$source_id <- match(links.df$source, nodes.df$name) -1
links.df$target_id <- match(links.df$target, nodes.df$name) -1
links.df$value <- 1
##Plotting Sankey
sankeyNetwork(Links= links.df,Nodes = nodes.df,Source= 'source_id',Target= 'target_id',Value= 'value',NodeID= 'label',fontSize= 16,iterations=0)`

可重现代码:

library(dplyr)
library(tidyr)
library(networkD3)

First_Contact <- c("UTC", "UTC", "111", "111")
Second_Contact <- c(NA, "ED - ED RV", "UTC", "UTC")
Third_Contact <- c(NA, NA, "ED - ED RV", "ED - ED RV")
Final_Pathway_Outcome <- c("Discharged", "Discharged", "Discharged", "Discharged")

df <- data.frame(First_Contact, Second_Contact, Third_Contact, Final_Pathway_Outcome)

##Adding row numbers and pivoting data
links.df <- df %>%
  mutate(row = row_number()) %>%
  pivot_longer(cols= -row, names_to="column", values_to="source")

##Creating target column and specifying link order
links.df <- links.df %>%
  mutate(column= match(column, names(df))) %>%
  group_by(row) %>%
  mutate(target= lead(source, order_by= column)) %>%
  filter(!is.na(target)) %>%
  ungroup()

##Differentiating between areas in each contact
links.df <- links.df %>%
  mutate(source = paste0(source, "", column)) %>%
  mutate(target= paste0(target, "", column+1)) %>%
  select(row, column, source, target)

##Extra modification to swap the value of "NAs" with the target from the previous row. Currently doing this manually
links.df[2,3] <- "UTC_2"
links.df[5,3] <- "ED - ED RV_3"

nodes.df <- 
  data.frame(
    name = unique(c(links.df$source, links.df$target)),
    label = unique(c(links.df$source, links.df$target))
  )

##Providing instructions for Sankey Diagram (source and target ids)
links.df$source_id <- match(links.df$source, nodes.df$name) -1
links.df$target_id <- match(links.df$target, nodes.df$name) -1
links.df$value <- 1

##Plotting Sankey
sankeyNetwork(Links= links.df,Nodes = nodes.df,Source= 'source_id',Target= 'target_id',Value= 'value',NodeID= 'label',fontSize= 16,iterations=0)

r sankey-diagram htmlwidgets networkd3
1个回答
0
投票

tidy::fill()
是用先前值(向上或向下)填充 data.frame 列中
NA
值的便捷方法。

dplyr::summarise()
可用于聚合重复链接并使用
dplyr::n()

设置值

在填写

source
target
列之前,我还在
source
名称中添加了列号,以保持节点的顺序/位置。

library(dplyr)
library(tidyr)
library(networkD3)

First_Contact <- c("UTC", "UTC", "111", "111")
Second_Contact <- c(NA, "ED - ED RV", "UTC", "UTC")
Third_Contact <- c(NA, NA, "ED - ED RV", "ED - ED RV")
Final_Pathway_Outcome <- c("Discharged", "Discharged", "Discharged", "Discharged")

df <- data.frame(First_Contact, Second_Contact, Third_Contact, Final_Pathway_Outcome)

links.df <-
  df %>%
  mutate(row = row_number()) %>%
  pivot_longer(cols = -row, names_to = "column", values_to = "source") %>% 
  mutate(column = match(column, names(df))) %>%
  mutate(source = ifelse(!is.na(source), paste0(source, "-", column), NA)) %>%
  group_by(row) %>%
  mutate(target = lead(source, order_by = column)) %>% 
  fill(source, .direction = "down") %>% 
  filter(!is.na(target)) %>%
  ungroup() %>% 
  summarise(value = n(), .by = c("source", "target"))

nodes.df <- data.frame(name = unique(c(links.df$source, links.df$target)))
nodes.df <- mutate(nodes.df, label = sub("-[0-9]*$", "", name))

links.df$source_id <- match(links.df$source, nodes.df$name) -1
links.df$target_id <- match(links.df$target, nodes.df$name) -1

df
#>   First_Contact Second_Contact Third_Contact Final_Pathway_Outcome
#> 1           UTC           <NA>          <NA>            Discharged
#> 2           UTC     ED - ED RV          <NA>            Discharged
#> 3           111            UTC    ED - ED RV            Discharged
#> 4           111            UTC    ED - ED RV            Discharged
links.df
#> # A tibble: 6 × 5
#>   source       target       value source_id target_id
#>   <chr>        <chr>        <int>     <dbl>     <dbl>
#> 1 UTC-1        Discharged-4     1         0         5
#> 2 UTC-1        ED - ED RV-2     1         0         1
#> 3 ED - ED RV-2 Discharged-4     1         1         5
#> 4 111-1        UTC-2            2         2         3
#> 5 UTC-2        ED - ED RV-3     2         3         4
#> 6 ED - ED RV-3 Discharged-4     2         4         5
nodes.df
#>           name      label
#> 1        UTC-1        UTC
#> 2 ED - ED RV-2 ED - ED RV
#> 3        111-1        111
#> 4        UTC-2        UTC
#> 5 ED - ED RV-3 ED - ED RV
#> 6 Discharged-4 Discharged

sankeyNetwork(
  Links = links.df,
  Nodes = nodes.df,
  Source = 'source_id',
  Target = 'target_id',
  Value = 'value',
  NodeID = 'label',
  fontSize = 16,
  iterations = 0,
  sinksRight = FALSE
)

© www.soinside.com 2019 - 2024. All rights reserved.