使用箱形图绘制数据

问题描述 投票:0回答:1

我用多个模型的插入包进行了一些预测。最终结果可以在下面看到。

structure(list(call = resamples.default(x = models), values = structure(list(
    Resample = c("Fold01.Rep1", "Fold01.Rep2", "Fold01.Rep3", 
    "Fold02.Rep1", "Fold02.Rep2", "Fold02.Rep3", "Fold03.Rep1", 
    "Fold03.Rep2", "Fold03.Rep3", "Fold04.Rep1", "Fold04.Rep2", 
    "Fold04.Rep3", "Fold05.Rep1", "Fold05.Rep2", "Fold05.Rep3", 
    "Fold06.Rep1", "Fold06.Rep2", "Fold06.Rep3", "Fold07.Rep1", 
    "Fold07.Rep2", "Fold07.Rep3", "Fold08.Rep1", "Fold08.Rep2", 
    "Fold08.Rep3", "Fold09.Rep1", "Fold09.Rep2", "Fold09.Rep3", 
    "Fold10.Rep1", "Fold10.Rep2", "Fold10.Rep3"), `ctree2~MAE` = c(2.38439486125806, 
    2.27432004515967, 2.24531372500338, 2.13114715377047, 2.75873877994736, 
    2.59691292314698, 2.63973349366821, 2.49410815045536, 3.22605936882376, 
    2.35113525514517, 1.99931520012693, 2.39659071384816, 2.99442021315024, 
    2.4904171736803, 2.20636537561672, 2.74196155319669, 2.87538881455827, 
    2.49826001419981, 2.52776844099101, 2.74110075792333, 2.4983909453917, 
    2.16844994593387, 2.45223841735022, 2.65973735363861, 2.25709962236915, 
    2.7769533874114, 1.9015650724235, 2.54556257721565, 2.43080778656189, 
    2.98952128492246), `ctree2~RMSE` = c(3.97958647210848, 3.53219461718882, 
    3.87871897373443, 3.10614529989274, 4.48888365057417, 4.53101637839183, 
    4.23709140320488, 3.53075694502852, 5.50005253812243, 3.87611540980909, 
    3.45490327789253, 3.87153153107898, 4.80397530729257, 3.89968195014742, 
    3.47544178136115, 4.87745944789539, 4.76937443928079, 3.65372309613576, 
    4.40220396293752, 4.95032851866008, 4.01529912263913, 3.713376860382, 
    4.20912261135497, 4.62031799557859, 3.83516439587673, 4.68214439549661, 
    2.88473996002199, 4.14826798154047, 4.46970494802803, 5.47300487567706
    ), `ctree2~Rsquared` = c(0.851911795682736, 0.870874182368903, 
    0.848052183858939, 0.912529172036035, 0.777874951997896, 
    0.823305383775216, 0.819077284156882, 0.871793492462385, 
    0.697111313518596, 0.850984075213742, 0.896505285711686, 
    0.853428114392334, 0.765309544806919, 0.845080924784473, 
    0.880211745688345, 0.771909873621962, 0.774334314204752, 
    0.850817824343973, 0.801097249026228, 0.786712537619774, 
    0.834727287416316, 0.869453847374309, 0.829076196066477, 
    0.814068744135405, 0.861820166143668, 0.787313444071908, 
    0.926298273316805, 0.838469926982801, 0.822182522603447, 
    0.725016430229977), `rpart~MAE` = c(2.42772633856966, 2.75967125664173, 
    2.4011984437408, 2.37651073179742, 3.07409210692062, 2.60243114263678, 
    2.89629595240576, 2.65612379243153, 3.48528944317321, 2.53114588508449, 
    2.23757771163705, 2.30644765637192, 3.26206500361755, 2.40106698408538, 
    2.29389524726154, 2.61606586291498, 2.91807469684416, 2.45252948033534, 
    2.70460117539714, 3.12508691026793, 2.25354453791025, 2.61099428078885, 
    2.78945120752749, 3.09088101036171, 2.27199561282309, 2.82322903175722, 
    1.81546746020426, 2.59030227619502, 2.49762093098879, 3.06307813632993
    ), `rpart~RMSE` = c(4.35088931475673, 4.10581596094961, 3.79401667863361, 
    3.66615556845631, 4.84825036050455, 4.44769718266361, 4.61648113379835, 
    3.76615580235136, 5.82747994046531, 4.31467751784573, 3.69963389904799, 
    3.39880787190816, 5.15181883306303, 3.93235671852579, 3.72942650150353, 
    4.66356807959634, 4.85881225047341, 3.65059516206548, 4.43465033885041, 
    5.78446631183566, 3.73094741898499, 4.24208582953254, 4.77680013047066, 
    5.38537936938824, 3.64547223182059, 4.77339981327228, 2.72866773051964, 
    4.13203622058148, 4.34411717816102, 5.6778759569548), `rpart~Rsquared` = c(0.825179417304765, 
    0.825375936910723, 0.854182801297257, 0.87916447141506, 0.740282693842249, 
    0.82606670408744, 0.784356715855597, 0.854094289093554, 0.663341340648588, 
    0.815344523194236, 0.880331757037654, 0.888789882726173, 
    0.729996563304898, 0.842057276542057, 0.861866341275138, 
    0.79155644790618, 0.766460356022522, 0.852569697419309, 0.797935573637131, 
    0.712372084898676, 0.857302394561457, 0.829004111438106, 
    0.782874879662123, 0.750116996491388, 0.87545354330518, 0.779486856659856, 
    0.933475769242811, 0.839658754948971, 0.831550991184637, 
    0.700479123006897), `lm~MAE` = c(2.48323414277954, 2.14640234322504, 
    2.34226730368296, 2.41807009824284, 3.10713050944275, 2.55604332232512, 
    2.63876249376368, 2.52553435501615, 3.34748965543572, 2.2870366967431, 
    2.03841786934404, 2.35928570957358, 2.87556220494742, 2.22961211415556, 
    2.1119467179291, 2.38366791198404, 2.90661924228036, 2.88308106130681, 
    2.75229356353403, 2.66853583477661, 2.24942050623553, 2.33307764796272, 
    2.55875224364777, 2.61719769278681, 2.55515364219301, 3.17709475900592, 
    2.32203350154043, 2.60428945820414, 2.18362732704861, 2.62147702397679
    ), `lm~RMSE` = c(3.83610227812262, 3.01535011236772, 3.22656393210266, 
    3.44215510607111, 4.55176086016678, 4.36804575176767, 3.91881082981543, 
    3.44972903209715, 5.3000719954214, 3.28024534089527, 3.40184439407703, 
    3.42517142424813, 4.33823631775932, 3.2549018827437, 2.79782683256458, 
    3.88176686743563, 3.99400309140178, 4.02024745820144, 4.27638700207352, 
    4.4036674319233, 3.47069713828166, 3.56321643484729, 3.78516055733044, 
    4.15758564870197, 3.95999973052122, 4.99712930405529, 3.15399322445381, 
    4.00714835731731, 3.61235699942935, 4.19012335583272), `lm~Rsquared` = c(0.865104037151009, 
    0.908210189706348, 0.894603035957269, 0.889951066923592, 
    0.772594379016119, 0.845239319966219, 0.846753940061429, 
    0.877485366659223, 0.717465074875061, 0.894027005241483, 
    0.898498993660265, 0.887397670809066, 0.807861403807885, 
    0.893097909241234, 0.922591240079223, 0.855254817516483, 
    0.84182402510816, 0.824042226498216, 0.812234858951197, 0.829950011488813, 
    0.87683674103531, 0.879649255943108, 0.860975714802887, 0.849268376580637, 
    0.852539478408397, 0.759787306259861, 0.910931098648678, 
    0.853815007987683, 0.884531701065134, 0.833019897213165), 
    `xgbTree~MAE` = c(2.4777022846682, 2.25169672461796, 2.03420905969184, 
    2.23259347312069, 2.73607394199619, 2.47993684162069, 2.52179454982533, 
    2.46859665413294, 3.16551990194333, 2.29629414853837, 1.86867016725456, 
    2.15023811154858, 2.81022942816054, 2.0668597563676, 2.02731693679798, 
    2.20906731857351, 2.6126301521699, 2.63438414655486, 2.49849181724452, 
    2.58315032853461, 2.1174106360667, 2.17002235802629, 2.38266828568012, 
    2.4731889389573, 2.22092956132052, 2.97509737485014, 2.02821808698635, 
    2.37741529040335, 2.17489244537738, 2.67769390664484), `xgbTree~RMSE` = c(3.88744216173628, 
    3.26904716385066, 3.15123764455891, 3.30593066661747, 4.30443045867936, 
    4.30359767733354, 4.17281668882947, 3.43702552958272, 5.19381024695531, 
    3.51132191122107, 3.2960896437404, 3.30553355064881, 4.52189776629905, 
    3.12396971961677, 3.01745763494595, 3.70758929813404, 3.85752988766883, 
    3.86644883568003, 3.95580323480958, 4.62811939772593, 3.34514611632492, 
    3.70748788558247, 3.84489010011259, 4.27478414203653, 3.64829475662718, 
    4.75601777810273, 3.06811494018071, 3.79532008262982, 3.76570245768938, 
    4.53387230666857), `xgbTree~Rsquared` = c(0.86040976060626, 
    0.892076639815855, 0.899303339196287, 0.900336168781685, 
    0.796216085794855, 0.851426367493197, 0.828086267934242, 
    0.877685916302618, 0.732322968899953, 0.877744206236396, 
    0.90460259489307, 0.898249020835918, 0.793643694987023, 0.90032651597425, 
    0.909562311133301, 0.867695205000381, 0.852290394040215, 
    0.836404679550136, 0.839521933423382, 0.813296705968721, 
    0.886532091626716, 0.869200062775477, 0.857803151612519, 
    0.841387342338447, 0.874993079626347, 0.781855945765315, 
    0.91579705542753, 0.866237752744825, 0.873595094407446, 0.804632506902698
    )), row.names = c(NA, -30L), class = "data.frame"), models = c("ctree2", 
"rpart", "lm", "xgbTree"), metrics = c("MAE", "RMSE", "Rsquared"
), timings = structure(list(Everything = c(2.96000000000095, 
1.03000000000065, 0.729999999999563, 142.74), FinalModel = c(0, 
0, 0, 0.0300000000006548), Prediction = c(NA_real_, NA_real_, 
NA_real_, NA_real_)), class = "data.frame", row.names = c("ctree2", 
"rpart", "lm", "xgbTree")), methods = c(ctree2 = "ctree2", rpart = "rpart", 
lm = "lm", xgbTree = "xgbTree")), class = "resamples")

因此,下一步应该是绘制结果。Caret包可以自动绘制所有这些结果(Rsquared,MAE和RMSE,但我对这样的图形不满意。

enter image description here

所以我的问题有人能帮助我如何仅使用RMSE用箱形图来绘制此数据,如下面使用plotly或ggplot的示例吗?

enter image description here

r ggplot2 plotly r-caret
1个回答
0
投票

在以dput格式发布的对象中,仅列表成员values是关注的。因此,首先将其提取。假设发布的对象名为resamp,可通过

完成
df1 <- resamp$values

现在显示图表。就像在许多类似的ggplot问题中一样,这是reshaping data.frame from wide to long format的问题。

library(caret)
library(tidyverse)


df1 %>%
  select(grep("RMSE", names(df1))) %>%
  pivot_longer(
    cols = everything(),
    names_to = "model",
    values_to = "RMSE"
  ) %>%
  mutate(model = str_remove(model, "~RMSE")) %>%
  ggplot(aes(model, RMSE, fill = model)) +
  geom_boxplot()

enter image description here

© www.soinside.com 2019 - 2024. All rights reserved.