具有层次聚类的堆积条形图(树状图)

问题描述 投票:0回答:4

我正在尝试获得类似的东西,但不幸的是,我找不到任何可以使我使用树状图绘制堆积条形图的软件包,如下所示:

有谁知道怎么做吗?

r bar-chart dendrogram dendextend
4个回答
3
投票

第一次尝试找到答案 - 但需要更多的工作才能使其真正发挥作用。具体来说,需要更仔细地考虑元素位置的对齐(以及它们的顺序)。

# library
library(ggplot2)

# create a dataset
specie=c(rep("sorgho" , 3) , rep("poacee" , 3) , rep("banana" , 3) , rep("triticum" , 3) )
condition=rep(c("normal" , "stress" , "Nitrogen") , 4)
value=abs(rnorm(12 , 0 , 15))
data=data.frame(specie,condition,value)


dend <- as.dendrogram(hclust(dist(with(data, tapply(value, specie, mean)))))

data$specie <- factor(data$specie, levels = labels(dend))

# Stacked Percent
library(dendextend)
p1 <- ggplot(dend, horiz = T) 
p2 <- ggplot(data, aes(fill=condition, y=value, x=specie)) + 
    geom_bar( stat="identity", position="fill") + coord_flip()

library(cowplot)
plot_grid(p1, p2, align = "h")


2
投票

这是我的 Roman_G 脚本版本。它在条形内显示百分比,并使用

vegan::reorder.hclust
对树状图的分支重新排序,以便第一列具有最高值的行倾向于放置在顶部,而最后一列具有最高值的行倾向于放置在顶部往往被放置在底部。我还删除了额外的边距、刻度线和轴。

library(tidyverse)
library(ggdendro)
library(vegan)
library(colorspace)
library(cowplot)

t=read.table(text="Spain_EN 0.028152 0.971828 0.000010 0.000010
Norway_Mesolithic 0.784705 0.083387 0.000010 0.131898
Russia_Sunghir4 0.000010 0.000010 0.999970 0.000010
Iran_Wezmeh_N 0.000010 0.492331 0.383227 0.124433
Russia_DevilsCave_N 0.000010 0.000010 0.000010 0.999970
Italy_North_Villabruna_HG 0.999970 0.000010 0.000010 0.000010
Russia_HG_Karelia 0.527887 0.133179 0.072342 0.266593
Russia_Yana_UP 0.000010 0.000014 0.999966 0.000010
Georgia_Kotias 0.000010 0.537322 0.381313 0.081355
China_SEastAsia_Island_EN 0.000010 0.000010 0.148652 0.851328
Turkey_N 0.000010 0.999970 0.000010 0.000010
USA_Ancient_Beringian 0.008591 0.000010 0.095008 0.896391
Russia_Sidelkino_HG 0.624076 0.045350 0.105615 0.224958
Russia_Kolyma_M 0.020197 0.000010 0.000010 0.979783
China_Tianyuan 0.000010 0.000010 0.423731 0.576249",row.names=1)

hc=hclust(dist(t),method="ward.D2")
hc=reorder(hc,wts=-as.matrix(t)%*%seq(ncol(t))^2) # vegan::reorder.hclust
tree=ggdendro::dendro_data(as.dendrogram(hc),type="rectangle")

p1=ggplot(ggdendro::segment(tree))+
geom_segment(aes(x=y,y=x,xend=yend,yend=xend),lineend="round",size=.4)+
scale_x_continuous(expand=expansion(add=c(0,.01)))+ # don't crop half of line between top-level nodes
scale_y_continuous(limits=.5+c(0,nrow(t)),expand=c(0,0))+
theme(
  axis.text=element_blank(),
  axis.ticks=element_blank(),
  axis.ticks.length=unit(0,"pt"), # remove extra space occupied by ticks
  axis.title=element_blank(),
  panel.background=element_rect(fill="white"),
  panel.grid=element_blank(),
  plot.margin=margin(5,5,5,0)
)

t=t[hc$labels[hc$order],]
t2=data.frame(V1=rownames(t)[row(t)],V2=colnames(t)[col(t)],V3=unname(do.call(c,t)))
lab=round(100*t2$V3)
lab[lab==0]=""

p2=ggplot(t2,aes(x=factor(V1,level=rownames(t)),y=V3,fill=V2))+
geom_bar(stat="identity",width=1,position=position_fill(reverse=T))+
geom_text(aes(label=lab),position=position_stack(vjust=.5,reverse=T),size=3.5)+
coord_flip()+
scale_x_discrete(expand=c(0,0))+
scale_y_discrete(expand=c(0,0))+
scale_fill_manual(values=colorspace::hex(HSV(head(seq(0,360,length.out=ncol(t)+1),-1),.5,1)))+
theme(
  axis.text=element_text(color="black",size=11),
  axis.text.x=element_blank(),
  axis.ticks=element_blank(),
  axis.title=element_blank(),
  legend.position="none",
  plot.margin=margin(5,0,5,5)
)

cowplot::plot_grid(p2,p1,rel_widths=c(1,.4))
ggsave("a.png",height=.25*nrow(t),width=7)

还有来自

scale_x_dendrogram
scale_y_dendrogram
ggh4x
,它们使用
ggdendro::dendro_data
https://teunbrand.github.io/ggh4x/articles/PositionGuides.html#dendrograms。然而,我无法让它们使用
coord_flip
来处理水平堆叠条。

library(ggh4x)

t=head(USArrests,20)
t2=data.frame(V1=rownames(t)[row(t)],V2=colnames(t)[col(t)],V3=unname(do.call(c,t)))
hc=hclust(dist(t))

ggplot(t2,aes(x=factor(V1,level=rownames(t)),y=V3,fill=V2))+
geom_bar(stat="identity",width=1,position=position_stack(reverse=F))+
geom_text(aes(label=round(V3)),position=position_stack(vjust=.5,reverse=F),size=3)+
scale_x_dendrogram(hclust=hc)+
scale_y_discrete(expand=c(0,0))+
# scale_fill_manual(values=colorspace::hex(HSV(head(seq(0,360,length.out=ncol(t)+1),-1),.5,1)))+
theme(
  axis.text=element_text(color="black",size=11),
  axis.text.x=element_text(angle=90,hjust=1,vjust=.5),
  axis.text.y=element_blank(),
  axis.ticks=element_blank(),
  axis.ticks.length=unit(14,"pt"), # height of dendrogram
  axis.title=element_blank(),
  legend.justification=c(0,1),
  legend.key=element_rect(fill=NA), # remove gray border around color squares
  legend.margin=margin(-6,0,0,0),
  legend.position=c(0,1),
  legend.title=element_blank(),
  panel.background=element_rect(fill="white"),
  plot.margin=margin(5,0,5,5)
)

ggsave("a.png",height=6,width=6)

编辑:第三个选项是使用

circlize
https://jokergoo.github.io/circlize/reference/circos.barplot.html.

library(circlize)
library(vegan) # for reorder.hclust (may be masked by `seriation`)
library(dendextend) # for color_branches

t=read.table(text="Kalmyk 0.119357 0.725057 0.000010 0.037803 0.117774
Kyrgyz_China 0.039367 0.512079 0.230150 0.095038 0.123366
Altaian_Chelkan 0.034095 0.000010 0.919478 0.000010 0.046407
Azeri 0.051638 0.004671 0.010727 0.902646 0.030318
Uzbek 0.102725 0.273261 0.001854 0.452126 0.170033
Salar 0.000010 0.539636 0.460334 0.000010 0.000010
Tatar_Kazan 0.113456 0.057026 0.000010 0.099336 0.730171
Tatar_Siberian 0.251376 0.221389 0.000010 0.077505 0.449721
Finnish 0.007214 0.000010 0.000010 0.015174 0.977592
Yakut 0.505434 0.473202 0.000010 0.002914 0.018440
Mansi 0.572791 0.000010 0.000010 0.000010 0.427179
Altaian 0.222424 0.335614 0.358801 0.032694 0.050468
Shor_Mountain 0.233984 0.000010 0.724596 0.000010 0.041400
Chuvash 0.180171 0.011056 0.000010 0.006462 0.802301
Enets 0.920409 0.000010 0.000010 0.000010 0.079561
Yukagir_Tundra 0.710359 0.289611 0.000010 0.000011 0.000010
Kyrgyz_Tajikistan 0.104000 0.563708 0.000010 0.125799 0.206483
Khakass_Kachin 0.254253 0.416760 0.174200 0.005262 0.149525
Tuvinian 0.448940 0.448899 0.000010 0.031803 0.070348
Besermyan 0.209841 0.001487 0.000010 0.000460 0.788202
Nogai_Astrakhan 0.062497 0.463590 0.000010 0.183203 0.290701
Todzin 0.725173 0.257670 0.000010 0.005836 0.011312
Kazakh 0.067027 0.518213 0.087979 0.114550 0.212231
Tofalar 0.815599 0.110299 0.000010 0.009693 0.064398
Karakalpak 0.009983 0.316964 0.389103 0.158275 0.125676
Estonian 0.000010 0.000010 0.000010 0.004409 0.995561
Dolgan 0.694025 0.255361 0.000010 0.049624 0.000979
Tatar_Siberian_Zabolotniye 0.521637 0.020132 0.000010 0.000010 0.458212
Uyghur 0.043578 0.486742 0.000010 0.318983 0.150687
Udmurt 0.256391 0.000010 0.001010 0.000010 0.742579
Evenk_FarEast 0.241328 0.606202 0.000010 0.000010 0.152451
Selkup 0.804662 0.000010 0.000010 0.000010 0.195308
Kumyk 0.060751 0.000112 0.000010 0.823905 0.115222
Hungarian 0.000010 0.000010 0.000010 0.244311 0.755659
Tubalar 0.159517 0.009457 0.802778 0.000010 0.028238
Turkmen 0.123631 0.226543 0.000010 0.529793 0.120023
Karelian 0.012854 0.000010 0.000010 0.000010 0.987116
Kazakh_China 0.074285 0.573009 0.152931 0.069362 0.130412
Mongol 0.033174 0.847004 0.025135 0.005178 0.089509
Daur 0.000010 0.995215 0.000010 0.000010 0.004755
Evenk_Transbaikal 0.611414 0.388556 0.000010 0.000010 0.000010
Nogai_Karachay_Cherkessia 0.119988 0.120774 0.000010 0.617261 0.141967
Veps 0.026887 0.000010 0.000010 0.000010 0.973083
Even 0.441349 0.278457 0.000010 0.015239 0.264946
Nganasan 0.999960 0.000010 0.000010 0.000010 0.000010
Bashkir 0.114088 0.056493 0.251488 0.030390 0.547542
Xibo 0.000010 0.985541 0.000010 0.000362 0.014077
Khakass 0.202707 0.171413 0.530905 0.007675 0.087300
Shor_Khakassia 0.258218 0.000010 0.694889 0.000010 0.046873
Nanai 0.105903 0.894067 0.000010 0.000010 0.000010
Buryat 0.064420 0.848458 0.017066 0.001696 0.068360
Yukagir_Forest 0.379416 0.096266 0.000010 0.003580 0.520728
Karachai 0.067138 0.004534 0.000010 0.798982 0.129336
Mordovian 0.022303 0.001193 0.000010 0.025251 0.951243
Turkish_Balikesir 0.092314 0.038550 0.000010 0.804964 0.064163
Turkish 0.040918 0.012255 0.000010 0.873179 0.073639
Kyrgyz_Kyrgyzstan 0.090129 0.607265 0.000010 0.122885 0.179711
Balkar 0.075115 0.000010 0.000010 0.829730 0.095136
Gagauz 0.000010 0.027887 0.015891 0.601619 0.354593
Nogai_Stavropol 0.070584 0.403817 0.000010 0.244701 0.280888
Negidal 0.248518 0.751452 0.000010 0.000010 0.000010
Tatar_Mishar 0.066112 0.037441 0.010377 0.138008 0.748062",row.names=1)

hc=hclust(dist(t))
hc=reorder(hc,-(t[,1]+t[,2]-t[,4]-2*t[,5]))

labelcolor=hcl(c(260,90,120,60,0,210,180,310)+15,60,70)
barcolor=hcl(c(310,260,120,60,210)+15,60,70)

labels=hc$labels[hc$order]
cut=cutree(hc,8)
dend=color_branches(as.dendrogram(hc),k=length(unique(cut)),col=labelcolor[unique(cut[labels])])
t=t[hc$labels[hc$order],]

circos.clear()
png("a.png",w=2500,h=2500,res=300)
circos.par(cell.padding=c(0,0,0,0),gap.degree=5,points.overflow.warning=F)
circos.initialize("a",xlim=c(0,nrow(t)))

circos.track(ylim=c(0,1),bg.border=NA,track.height=.28,track.margin=c(.01,0),
  panel.fun=function(x,y)for(i in 1:nrow(t))circos.text(i-.5,0,labels[i],adj=c(0,.5),facing="clockwise",niceFacing=T,cex=.65,col=labelcolor[cut[labels[i]]]))

circos.track(ylim=c(0,1),track.margin=c(0,0),track.height=.35,bg.lty=0,panel.fun=function(x,y){
  mat=as.matrix(t)
  pos=1:nrow(mat)-.5
  barwidth=1
  for(i in 1:ncol(mat)){
    seq1=rowSums(mat[,seq(i-1),drop=F])
    seq2=rowSums(mat[,seq(i),drop=F])
    circos.rect(pos-barwidth/2,if(i==1){0}else{seq1},pos+barwidth/2,seq2,col=barcolor[i],border="gray20",lwd=.1)
  }
  for(i in 1:ncol(mat)){
    seq1=rowSums(mat[,seq(i-1),drop=F])
    seq2=rowSums(mat[,seq(i),drop=F])
    lab=round(100*t[,i])
    lab[lab<=1]=""
    circos.text(pos,if(i==1){seq1/2}else{seq1+(seq2-seq1)/2},labels=lab,col="gray10",cex=.4,facing="downward")
  }
})

circos.track(ylim=c(0,attr(dend,"height")),bg.border=NA,track.margin=c(0,.0015),track.height=.35,panel.fun=function(x,y)circos.dendrogram(dend))

circos.clear()
dev.off()


1
投票

几乎三年后,仍然没有能够将堆积条形图与 ggplot 中的层次聚类相结合的包(至少我知道)。这是我的解决方案基于该帖子加入树状图和热图

library(tidyverse)
library(phangorn)
library(vegan)
library(ggdendro)
library(dendextend)
library(ggsci)
library(cowplot)

## generate example data ####
set.seed(500)
combined_matrix <- data.frame(a=runif(14, 0, 33), b=runif(14, 0, 33), c=runif(14, 0, 33))
combined_matrix$d <- 100 - combined_matrix$a - combined_matrix$b - combined_matrix$c
row.names(combined_matrix) <- paste0("s", seq(1,14))

# vegan::vegdist() to calculate Bray-Curtis distance matrix
dm <- vegdist(combined_matrix, method = "bray")
# calculate UPGMA tree with phangorn::upgma() and convert to dendrogram
dendUPGMA <- as.dendrogram(upgma(dm))
plot_dendro_bars_v <- function(df, dend, taxonomy) {
  #convert dendrogram to segment data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- dend_data[["segments"]]
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(x_center = x, sample = as.character(label), width = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           ymax = cumsum(Frequency/sum(Frequency)),
           ymin = ymax - Frequency/sum(Frequency),
           y_center = ymax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(xmin = x_center-width/2,
           xmax = x_center+width/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(x_center - 0.5 * width), max(x_center + 0.5 * width))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, xmin = "xmin", xmax = "xmax",
                                 height = "Frequency", width = "width")) + 
    geom_tile() +
    geom_rect(ymin = 0, ymax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_y_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_x_continuous(breaks = sample_pos_table[, "x_center"], 
                       labels = sample_pos_table$sample,
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Frequency") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(-0.9, 0.2, 1, 0.2), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_y_continuous(expand = c(0, 0.05)) + 
    scale_x_continuous(breaks = sample_pos_table$x_center, 
                       labels = rep("", nrow(sample_pos_table)), 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "", y = "Distance", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'v', ncol = 1, axis = "lr", rel_heights = c(0.3, 1))
  comb
}
plot_dendro_bars_v(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")

vertical

或水平

  plot_dendro_bars_h <- function(df, dend, taxonomy) {
  #convert dendrogram to segemnt data
  dend_data <- dendro_data(dend, type="rectangle")
  segment_data <- with(segment(dend_data), 
                       data.frame(x = y, y = x, xend = yend, yend = xend))
  #sample positions df
  sample_pos_table <- with(dend_data$labels, 
                           data.frame(y_center = x, sample = as.character(label), height = 0.9))
  #prepare input data
  ptdf <- rownames_to_column(df, var = "sample") %>%
    pivot_longer(-sample, names_to = taxonomy, values_to = "Frequency") %>%
    group_by(sample) %>%
    mutate(Frequency = Frequency/100,
           xmax = cumsum(Frequency/sum(Frequency)),
           xmin = xmax - Frequency/sum(Frequency),
           x_center = xmax-(Frequency/2)) %>%
    left_join(sample_pos_table) %>%
    mutate(ymin = y_center-height/2,
           ymax = y_center+height/2)
  #plot stacked bars
  axis_limits <- with(sample_pos_table, 
                      c(min(y_center - 0.5 * height), max(y_center + 0.5 * height))) + 
    0.1 * c(-1, 1) # extra spacing: 0.1
  plt_hbars <- ggplot(ptdf, 
                      aes_string(x = "x_center", y = "y_center", fill = taxonomy, ymin = "ymin", ymax = "ymax",
                                 height = "height", width = "Frequency")) + 
    geom_tile() +
    geom_rect(xmin = 0, xmax = 1, color = "black", fill = "transparent") +
    scale_fill_rickandmorty() +
    scale_x_continuous(expand = c(0, 0)) + 
    # For the y axis, alternatively set the labels as: gene_position_table$gene
    scale_y_continuous(breaks = sample_pos_table[, "y_center"], 
                       labels = rep("", nrow(sample_pos_table)),
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Frequency", y = "") +
    theme_bw() +
    theme(# margin: top, right, bottom, and left
      plot.margin = unit(c(1, 0.2, 0.2, -0.9), "cm"), 
      panel.grid.minor = element_blank())
  #plot dendrogram
  plt_dendr <- ggplot(segment_data) + 
    geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) + 
    scale_x_reverse(expand = c(0, 0.05)) + 
    scale_y_continuous(breaks = sample_pos_table$y_center, 
                       labels = sample_pos_table$sample, 
                       limits = axis_limits, 
                       expand = c(0, 0)) + 
    labs(x = "Distance", y = "", colour = "", size = "") +
    theme_bw() + 
    theme(panel.grid.minor = element_blank(),
          panel.grid.major = element_blank())
  #combine plots
  comb <- plot_grid(plt_dendr, plt_hbars, align = 'h', rel_widths = c(0.3, 1))
  return(comb)
}
plot_dendro_bars_h(df = combined_matrix, dend = dendUPGMA, taxonomy = "example")

horizontal

数据可以与任何可以强制形成树状图的树(例如 UniFrac 树)组合。玩得开心,罗曼。


0
投票

我需要紧急帮助。我想构建一个像你一样的树状图,但是使用 Bray Curtis ..我应该如何为此选项编写代码?

© www.soinside.com 2019 - 2024. All rights reserved.