根据aes仅使用数据子集在ggplot中进行计算

问题描述 投票:0回答:2

大家好,我想用我的

geom_boxplot
中的唯一 ID 来标记异常值。一般来说,我使用这个公式(无论数据列是什么)就是我想要的id:

plot +
  geom_text(aes(label = ifelse((whatever > 
                                  (quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
                                 (whatever < 
                                    (quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
                               id, '')),
            hjust = -1)

如果我只有一个箱线图(plot_all),这效果很好。但是,如果我用一个值(例如

sex
)分隔箱线图,则异常值是根据整个日期而不是数据子集计算的。无论如何,我可以转发选定的数据来仅计算这些数据吗?

rm(list=ls())

library(tidyverse)
library(ggplot2)
library(ggbeeswarm)

set.seed(1)

male <- tibble(sex = sample('male', 1000, replace=TRUE),
               id = as.character(1:1000),
               whatever = c(runif(998, min = .75, max = 1.25), 0, 2))

female <- tibble(sex = sample('female', 1000, replace=TRUE),
                 id = as.character(1001:2000),
                 whatever = c(runif(998, min = 1.25, max = 1.75), .5, 2.5))

data <- bind_rows(male, female)

plot_all <- ggplot(data, aes("all", whatever)) +
  geom_boxplot() +
  geom_quasirandom(size = .3, alpha = .3) +
  geom_text(aes(label = ifelse((whatever > 
                                  (quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
                                 (whatever < 
                                    (quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
                               id, '')),
            hjust = -1)

plot_sex <- ggplot(data, aes(sex, whatever)) +
  geom_boxplot() +
  geom_quasirandom(size = .3, alpha = .3) +
  geom_text(aes(label = ifelse((whatever > 
                                  (quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) |
                                 (whatever < 
                                    (quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25))),
                               id, '')),
            hjust = -1)
r ggplot2 tidyverse
2个回答
2
投票

可以通过操作图层数据来做到这一点,但为什么要这么做呢?只需在进入 ggplot 的过程中修改数据即可: data %>% group_by(sex) %>% mutate(label = ifelse(whatever > max(boxplot.stats(whatever)$stats) | whatever < min(boxplot.stats(whatever)$stats), id, '')) %>% ggplot(aes(sex, whatever)) + geom_boxplot() + geom_quasirandom(size = .3, alpha = .3) + geom_text(aes(label = label), hjust = -1)

这比更改图层数据(给出相同的结果)更整洁

ggplot(data, aes(sex, whatever)) + geom_boxplot() + geom_quasirandom(size = .3, alpha = .3) + geom_text(data = . %>% group_by(sex) %>% filter((whatever > (quantile(whatever, 0.75) + 1.5 * IQR(whatever, 0.25))) | (whatever < (quantile(whatever, 0.25) - 1.5 * IQR(whatever, 0.25)))), aes(label = id), hjust = -1)


                

2
投票
ave

即时进行每组计算:

library(ggplot2)
library(ggbeeswarm)

ggplot(data, aes(sex, whatever)) +
  geom_boxplot() +
  geom_quasirandom(size = .3, alpha = .3) +
  geom_text(
    aes(
      label = if_else(
        as.logical(
          ave(whatever, sex, FUN = \(x) {
            (x > (quantile(x, 0.75) + 1.5 * IQR(x, 0.25))) |
              (x < (quantile(x, 0.25) - 1.5 * IQR(x, 0.25)))
          })
        ),
        id,
        ""
      )
    ),
    hjust = -1
  )

© www.soinside.com 2019 - 2024. All rights reserved.