我正在写一个结果分析,想改变我的MeanDecreaseAccuracy图,从执行随机森林后的变量重要性图。我想只把MeanDecreaseAccuracy图变成一个条形图,使其比目前显示的更漂亮的可视化。
最好的方法是什么?
我目前的代码(在这之前有很多事情,但为了这个例子的目的,这应该是足够的)。
wine=read.csv("wine_dataset.csv")
wine$quality01[wine$quality >= 7] <- 1
wine$quality01[wine$quality < 7] <- 0
wine$quality01=as.factor(wine$quality01)
summary(wine)
num_data <- wine[,sapply(wine,is.numeric)]
hist.data.frame(num_data)
set.seed(8, sample.kind = "Rounding") #Set Seed to make sure results are repeatable
wine.bag=randomForest(quality01 ~ alcohol + volatile_acidity + sulphates + residual_sugar +
chlorides + free_sulfur_dioxide + fixed_acidity + pH + density +
citric_acid,data=wine,mtry=3,importance=T) #Use Random Forest with a mtry value of 3 to fit the model
wine.bag #Review the Random Forest Results
plot(wine.bag) #Plot the Random Forest Results
imp=as.data.frame(importance(wine.bag)) #Analyze the importance of each variable in the model
imp=cbind(vars=rownames(imp),imp)
barplot(imp$MeanDecreaseAccuracy, names.arg=imp$vars)
我目前的输出是:
我目前使用的代码可以在下面找到 此处:
这里有几个选择。
library(randomForest)
library(tidyverse)
# Random forest model
iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE)
# Get importance values as a data frame
imp = as.data.frame(importance(iris.rf))
imp = cbind(vars=rownames(imp), imp)
imp = imp[order(imp$MeanDecreaseAccuracy),]
imp$vars = factor(imp$vars, levels=unique(imp$vars))
barplot(imp$MeanDecreaseAccuracy, names.arg=imp$vars)
imp %>%
pivot_longer(cols=matches("Mean")) %>%
ggplot(aes(value, vars)) +
geom_col() +
geom_text(aes(label=round(value), x=0.5*value), size=3, colour="white") +
facet_grid(. ~ name, scales="free_x") +
scale_x_continuous(expand=expansion(c(0,0.04))) +
theme_bw() +
theme(panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
axis.title=element_blank())
我也不会放弃点阵图,这(IMHO)是一个更干净的可视化。这里有比你的问题中的内置输出更自定义的选项。
dotchart(imp$MeanDecreaseAccuracy, imp$vars,
xlim=c(0,max(imp$MeanDecreaseAccuracy)), pch=16)
imp %>%
pivot_longer(cols=matches("Mean")) %>%
ggplot(aes(value, vars)) +
geom_point() +
facet_grid(. ~ name) +
scale_x_continuous(limits=c(0,NA), expand=expansion(c(0,0.04))) +
theme_bw() +
theme(panel.grid.minor=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.major.y=element_line(),
axis.title=element_blank())
你也可以用数值本身来代替点标来绘制。比如说,你可以用
imp %>%
pivot_longer(cols=matches("Mean")) %>%
ggplot(aes(value, vars)) +
geom_text(aes(label=round(value,1)), size=3) +
facet_grid(. ~ name, scales="free_x") +
scale_x_continuous(limits=c(0,NA), expand=expansion(c(0,0.06))) +
theme_bw() +
theme(panel.grid.minor=element_blank(),
panel.grid.major.x=element_blank(),
panel.grid.major.y=element_line(),
axis.title=element_blank())
你也可以考虑使用棒棒糖图表(例如: ggalt
),或者像这里这样做。https:/uc-r.github.iolollipop。例。
suppressPackageStartupMessages({
library(ggalt)
library(randomForest)
library(data.table)
})
# Random forest model (from @eipi10)
iris.rf <- randomForest(Species ~ ., data=iris, importance=TRUE)
imp <- data.table(importance(iris.rf), keep.rownames = TRUE,
key = "MeanDecreaseAccuracy")
imp[, rn := factor(rn, unique(rn))]
ggplot(melt(imp, id.vars="rn")[grep("Mean", variable)],
aes(x=rn, y=value, label = round(value, 1))) +
geom_lollipop(point.size = 3, point.colour = "cadetblue") +
geom_text(nudge_y = 5) +
coord_flip() +
facet_wrap(~variable) +
theme_minimal() +
labs(y="Percent", x=NULL)
创建于2020-04-28 重读包 (v0.3.0)