我尝试使用
iris
数据集显示截止高度对聚类数量的影响,并使用 rect.dendrogram
可视化生成的聚类。
if (!require("dendextend")) {install.packages("dendextend")} else {library("dendextend")}
data("iris", package = "datasets")
Data <- list()
Data$Lab <- as.character(iris[,5])
Data$dat <- prcomp(iris[,-5])$x[,1:2]
Data$dist <- dist(Data$dat, method = "euclidean")
Data$hist <- hclust(Data$dist, method = "complete")
# plot dendrogram
hcd <- as.dendrogram(Data$hist)
cluster.height <- 6
par(pty = "m",
mar = c(1,2,1.5,1),
mgp = c(1,0,0),
tck = 0.01,
cex.axis = 0.75,
font.main = 1)
plot(sort(hcd),
ylab = "Height",
leaflab = "none")
rect.dendrogram(sort(hcd),
h = cluster.height,
border = "black",
xpd = NA,
lower_rect = -0.1,
upper_rect = 0)
abline(h = cluster.height,
lty = 3)
dev.off()
使用高高度值时,会出现两个矩形。
该函数搜索由截止创建的簇。 问题是有没有办法只获取较大的矩形? 是否有我忽略的参数/选项,或者是
rect.dendrogram
函数中的错误?
在我看来确实像是某种错误。您可以使用该功能的相关部分。
> rect_dnd <- \(tree, which, h, ybadj=0, ytadj=0, ...) {
+ cl <- cutree(tree, h=h)
+ clt <- table(cl)[unique(cl[tree$order])]
+ m <- c(0, cumsum(clt))
+ k <- min(which(rev(tree$height) < h))
+ rect(xleft=m[which] + 0.66,
+ ybottom=mean(rev(tree$height)[(k - 1):k]) + ytadj,
+ xright=m[which + 1] + 0.33,
+ ytop=par()$usr[3] + ybadj, ...)
+ }
>
> cluster.height <- 6
>
> par(pty="m", mar=c(1, 2, 1.5, 1), mgp=c(1, 0, 0), tck=0.01, cex.axis=0.75,
+ font.main=1)
> plot(hcd, ylab="Height", leaflab="none")
> rect_dnd(Data$hist, which=1, h=cluster.height, ytadj=-.1, border='red')
> abline(h=cluster.height, lty=3)
数据:
> Data <- list(Lab=as.character(iris[, 5]), dat=prcomp(iris[, -5])$x[, 1:2])
> Data$dist <- dist(Data$dat, method="euclidean")
> Data$hist <- hclust(Data$dist, method="complete")
> hcd <- as.dendrogram(Data$hist)