大家好,我想用我的
geom_boxplot
中的唯一 ID 来标记异常值。一般来说,我使用这个公式(无论数据列是什么)就是我想要的id:
plot +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
如果我只有一个箱线图(plot_all),这效果很好。但是,如果我用一个值(例如
sex
)分隔箱线图,则异常值是根据整个日期而不是数据子集计算的。无论如何,我可以转发选定的数据来仅计算这些数据吗?
rm(list=ls())
library(tidyverse)
library(ggplot2)
library(ggbeeswarm)
set.seed(1)
male <- tibble(sex = sample('male', 1000, replace=TRUE),
id = as.character(1:1000),
whatever = c(runif(998, min = .75, max = 1.25), 0, 2))
female <- tibble(sex = sample('female', 1000, replace=TRUE),
id = as.character(1001:2000),
whatever = c(runif(998, min = 1.25, max = 1.75), .5, 2.5))
data <- bind_rows(male, female)
plot_all <- ggplot(data, aes("all", whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
plot_sex <- ggplot(data, aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = ifelse((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
id, '')),
hjust = -1)
可以通过操作图层数据来做到这一点,但为什么要这么做呢?只需在进入 ggplot 的过程中修改数据即可:
data %>%
group_by(sex) %>%
mutate(label = ifelse(whatever > max(boxplot.stats(whatever)$stats) |
whatever < min(boxplot.stats(whatever)$stats),
id, '')) %>%
ggplot(aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(aes(label = label), hjust = -1)
ggplot(data, aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(data = . %>% group_by(sex) %>% filter((whatever >
(quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
(whatever <
(quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25)))),
aes(label = id), hjust = -1)
ave
即时进行每组计算:
library(ggplot2)
library(ggbeeswarm)
ggplot(data, aes(sex, whatever)) +
geom_boxplot() +
geom_quasirandom(size = .3, alpha = .3) +
geom_text(
aes(
label = if_else(
as.logical(
ave(whatever, sex, FUN = \(x) {
(x > (quantile(x, 0.75) + 1.5 * IQR(x, 0.25))) |
(x < (quantile(x, 0.25) - 1.5 * IQR(x, 0.25)))
})
),
id,
""
)
),
hjust = -1
)