library(dplyr)
data = data.frame(
group = c("A", "A", "A", "B","B"),
category1 = c("X", "Y", "Z", "Z", "X"),
category2 = c("M", "Q", "M", "M", "L")
)
data %>% group_by(group, category2) %>% summarize(count = n()) %>% ungroup() %>%
group_by(group) %>%
mutate(percent = count/sum(count)*100)
data %>% group_by(group, category1) %>% summarize(count = n()) %>% ungroup() %>%
group_by(group) %>%
mutate(percent = count/sum(count)*100)
我想总结一下类别1和类别2。我可以像所示的那样单独进行操作,但我不知道如何将它们组合起来并获得相同的结果。 我尝试了这个,但它不正确:
data %>% group_by(group, category1, category2) %>% summarize(count = n()) %>% ungroup() %>%
group_by(group) %>%
mutate(percent = count/sum(count)*100)
期望的输出将是一个表格,其中包含独立于其他类别变量的组中每个类别变量的频率和百分比...
data <- data.frame(
group = c("A", "A", "A", "B", "B"),
category1 = c("X", "Y", "Z", "Z", "X"),
category2 = c("M", "Q", "M", "M", "L")
)
library(tidyverse)
to_do_list <- c(
"category2",
"category1"
)
names(to_do_list) <- to_do_list
(result_df <- map(to_do_list, \(x){
data |>
group_by(group,
category_value = !!sym(x)
) |>
summarize(count = n()) |>
ungroup() |>
group_by(group) |>
mutate(percent = count / sum(count) * 100)
}) |> list_rbind(names_to = "category_source"))
# A tibble: 9 × 5
category_source group category_value count percent
<chr> <chr> <chr> <int> <dbl>
1 category2 A M 2 66.7
2 category2 A Q 1 33.3
3 category2 B L 1 50
4 category2 B M 1 50
5 category1 A X 1 33.3
6 category1 A Y 1 33.3
7 category1 A Z 1 33.3
8 category1 B X 1 50
9 category1 B Z 1 50