将标签移至桑基图的外部,并在两侧按从大到小的顺序

问题描述 投票:0回答:1

这是我第一次使用 R,很抱歉犯了一些愚蠢的错误。我需要制作桑基图来显示大都市区城市之间的流量,但我对图表的外观不满意。我希望标签位于桑基之外,并且城市按流量从大到小进行组织

这是我的代码(抱歉,很乱,我害怕删除步骤)

library(lodown)
library(magrittr)
library(dplyr)
library(stringr)
library(fs)
library(SAScii)
library(readr)
library(purrr)
library(survey)
library(tidyr)
library(ggplot2)
library(scales)
library(ggtext)
library(questionr)
library(networkD3)

# Search data and download
catalog <- lodown::get_catalog(data_name = "censo", output_dir = "data") %>%
  dplyr::filter(year == 2010, stringr::str_detect(state, "al")) %>%
  lodown::lodown(data_name = "censo")

# Dowloaded data
fs::dir_tree(path = "data")

# Variables imported
vars_censo <- c("v0001", "v0002", "v1004", "v1006", "v6529", "v6530", "v6531", "v6532", "v0010",    "v0606", "v0636", "v6362", "v6364", "v0660", "v6602", "v6604", "v0661", "v0662","v0601")

# Convert SAS to R
sas_input <- SAScii::parse.SAScii(catalog$pes_sas) %>%
  dplyr::mutate(varname = stringr::str_to_lower(varname))

# Import TXT
raw_censo <- readr::read_fwf(
 file = catalog$pes_file,
  col_positions = readr::fwf_widths(
    widths = abs(sas_input$width),
    col_names = sas_input$varname
  ),
  col_types = paste0(
ifelse(
  !(sas_input$varname %in% vars_censo),
  "_",
  ifelse(sas_input$char, "c", "d")
),
collapse = ""
  )
)

# filtrer cities
df.rmm <- data.frame(
  raw_censo %>%
filter(v0002 == "00409" | v0002 == "00508" | v0002 == "00607" | v0002 == "02207" | v0002 == "04302" | v0002 == "04708" | v0002 == "05200" | v0002 == "05507" | v0002 == "06448" | v0002 == "06901" | v0002 == "07701" | v0002 == "07909" | v0002 == "08907"))

# rename (column)
df.rmm <- rename(df.rmm,    UF=v0001,municipio=v0002,RM=v1004,urbrur=v1006,rend.dom=v6529,rend.dom.sm=v6530,rend.pc=v6531,rend.pc.sm=v6532,peso=v0010,cor=v0606, est.no.mun=v0636, UFesc= v6364, mun.esc = v6364, trab.no.mun= v0660, uf.trab=v6602, mun.trab=v6604, commute=v0661, tempo.desl=v0662, sexo=v0601)

# rename cities (lines)
df.rmm$municipio[df.rmm$municipio=="00409"] <- "Atalaia, 5,6%"
df.rmm$municipio[df.rmm$municipio=="00508"] <- "Barra de Santo Antônio, 2,9%"
df.rmm$municipio[df.rmm$municipio=="00607"] <- "Barra de São Miguel, 0.8%"
df.rmm$municipio[df.rmm$municipio=="02207"] <- "Coqueiro Seco, 3,1%"
df.rmm$municipio[df.rmm$municipio=="04302"] <- "Maceió, 22,3%"
df.rmm$municipio[df.rmm$municipio=="04708"] <- "Marechal Deodoro, 10,5%"
df.rmm$municipio[df.rmm$municipio=="05200"] <- "Messias, 5,4%"
df.rmm$municipio[df.rmm$municipio=="05507"] <- "Murici, 3,2%"
df.rmm$municipio[df.rmm$municipio=="06448"] <- "Paripueira, 3,6%"
df.rmm$municipio[df.rmm$municipio=="06901"] <- "Pilar, 5,3%"
df.rmm$municipio[df.rmm$municipio=="07701"] <- "Rio Largo, 26,3%"
df.rmm$municipio[df.rmm$municipio=="07909"] <- "Santa Luzia do Norte, 3%"
df.rmm$municipio[df.rmm$municipio=="08907"] <- "Satuba, 8,2%"

# criates new dataframe
df.rmm2 <- df.rmm[!is.na(df.rmm$commute),]

# filters municipalities
df.rmm3 <- data.frame(
  df.rmm2 %>%
filter(mun.trab == "2700409" | mun.trab == "2700508" | mun.trab == "2700607" | mun.trab == "2702207" | mun.trab == "2704302" | mun.trab == "2704708" | mun.trab == "2705200" | mun.trab == "2705507" | mun.trab == "2706448" | mun.trab == "2706901" | mun.trab == "2707701" | mun.trab == "2707909" | mun.trab == "2708907"))

# rename destiny (columns)
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700409"] <- "Atalaia, 3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700508"] <- "Barra de Santo Antônio, 0,6%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2700607"] <- "Barra de São Miguel, 1,5%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2702207"] <- "Coqueiro Seco, 0,2%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2704302"] <- "Maceió, 63,8%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2704708"] <- "Marechal Deodoro, 7%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2705200"] <- "Messias, 1,9%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2705507"] <- "Murici, 1%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2706448"] <- "Paripueira, 1,3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2706901"] <- "Pilar, 5%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2707701"] <- "Rio Largo, 12,3%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2707909"] <- "Santa Luzia do Norte, 0,7%"
df.rmm3$mun.trab[df.rmm3$mun.trab=="2708907"] <- "Satuba, 1,8%"

# criates OD table
#od_table<-table(df.rmm3$municipio,df.rmm3$mun.trab)
#od_table

# criates variable pesoajust = peso / 10000000000000
df.rmm3["pesoajust"]<-df.rmm3$peso/10000000000000

# waited table
tabelaod <- wtd.table(x = df.rmm3$municipio, y = df.rmm3$mun.trab, weights = df.rmm3$pesoajust)%>%
  round()
as.data.frame.matrix(tabelaod)
write.table(tabelaod, file = "tabelaod", sep = " ", na = "", quote = TRUE, row.names = TRUE, eol = "\r\n")



# First Sankey diagram
#dataForSankey <- df.rmm3%>%dplyr::select(mun.trab, municipio)
#hchart(data_to_sankey(dataForSankey), "sankey", name = "OD")

od <- data.frame( wtd.table(x = df.rmm3$municipio, y = df.rmm3$mun.trab, weights = df.rmm3$pesoajust)%>%
                round())

od$Var <- NULL


# A connection data frame is a list of flows with intensity for each flow
links <- data.frame(
  source= c(od$Var1), 
  target= c(od$Var2),
  value= c(od$Freq))

# Add a 'group' column to each connection:
links$group <- as.factor(c("Atalaia, 5,6%","Barra de Santo Antônio, 2,9%","Barra de São Miguel, 0,8%","Coqueiro Seco, 3,1%","Maceió, 22,3%","Marechal Deodoro, 10,5%", "Messias, 5,4%", "Murici, 3,2%","Paripueira, 3,6%","Pilar, 5,3%", "Rio largo, 26,3%", "Santa Luzia do Norte, 3%", "Satuba, 8,2%"))

# From these flows we need to create a node data frame: it lists every entities involved in the flow
nodes <- data.frame(
  name=c(as.character(links$source), 
     as.character(links$target)) %>% unique()
)
# Add a 'group' column to each connection:
nodes$group <- as.factor(c("Atalaia, 5,6%","Barra de Santo Antônio, 2,9%","Barra de São Miguel, 0,8%","Coqueiro Seco, 3,1%","Maceió, 22,3%","Marechal Deodoro, 10,5%", "Messias, 5,4%", "Murici, 3,2%","Paripueira, 3,6%","Pilar, 5,3%", "Rio largo, 26,3%", "Santa Luzia do Norte, 3%", "Satuba, 8,2%"))


# With networkD3, connection must be provided using id, not using real name like in the links dataframe.. So we need to reformat it.
links$IDsource <- match(links$source, nodes$name)-1 
links$IDtarget <- match(links$target, nodes$name)-1

# Make the Network
p <- sankeyNetwork(Links = links, Nodes = nodes,
               Source = "IDsource", Target = "IDtarget",
               Value = "value", NodeID = "name", LinkGroup = "group",
               fontSize = 10, dragY = TRUE, NodeGroup = "group",
               showNodeValues = FALSE)
p

this is what I get

this is the order I would like

我想将标签移到外面,这样更容易阅读。

r d3.js sankey-diagram htmlwidgets networkd3
1个回答
0
投票

od
对象开始,您可以按节点的总值对节点 data.frame 进行排序,并设置
iterations = 0
以禁用
sankeyNetwork()
所做的节点算法放置,例如

links <- 
  od %>% 
  rename(source = Var1, target = Var2, value = Freq)

nodes <-
  links %>% 
  pivot_longer(c(source, target), names_to = NULL, values_to = "name") %>% 
  summarise(value = sum(value), .by = name) %>% 
  arrange(desc(value))

nodes
#> # A tibble: 26 × 2
#>    name                    value
#>    <fct>                   <dbl>
#>  1 Maceió, 63,8%           15642
#>  2 Rio Largo, 26,3%         6443
#>  3 Maceió, 22,3%            5459
#>  4 Rio Largo, 12,3%         3013
#>  5 Marechal Deodoro, 10,5%  2575
#>  6 Satuba, 8,2%             2005
#>  7 Marechal Deodoro, 7%     1711
#>  8 Atalaia, 5,6%            1384
#>  9 Messias, 5,4%            1323
#> 10 Pilar, 5,3%              1294
#> # ℹ 16 more rows

links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1

sankeyNetwork(
  Links = links, 
  Nodes = nodes,
  Source = "source_id",
  Target = "target_id",
  Value = "value",
  NodeID = "name",
  LinkGroup = "source",
  fontSize = 10,
  NodeGroup = "name",
  iterations = 0
)

© www.soinside.com 2019 - 2024. All rights reserved.