我有一个问题,如何根据一个变量是否满足某个条件来重新分类。也就是说,如果这个类别不符合条件,它就会被分配到另一个符合条件的类别。
我的数据有如下形式。
data = data.frame(firm_size = c("Micro", "Small", "Medium","Big"),
employees = c(5,10,100,1000))
> data
firm_size employees
1 Micro 5
2 Small 10
3 Medium 100
4 Big 1000
所以,如果我的条件是,我必须把雇员人数少于10人的公司归为一组 然后把他们和符合标准的另一个类别结合起来。
> new_data
firm_size employees
1 Micro-Small 15
3 Medium 100
4 Big 1000
我想做的是写一个函数来概括这个过程,例如,如果我的数据是
> data
firm_size employees
1 Micro 5
2 Small 8
3 Medium 9
4 Big 1000
> new_data
firm_size employees
1 Micro-Small-Medium 22
4 Big 1000
我想,可以用整蛊的工具来完成。
先谢谢你
这里有一个方法 tally
:
library(dplyr)
size <- 10
data %>%
arrange(firm_size,desc(employees)) %>%
group_by(firm_size = c(as.character(firm_size[employees > size]),
rep(paste(firm_size[employees <= size], collapse = "-"),
sum(employees <= size)))) %>%
tally(employees, name = "employees")
## A tibble: 3 x 2
# firm_size employees
# <chr> <dbl>
#1 Big 1000
#2 Medium 100
#3 Small-Micro 15
而对于你的第二组数据。
data2 %>%
arrange(firm_size,desc(employees)) %>%
group_by(firm_size = c(as.character(firm_size[employees > size]),
rep(paste(firm_size[employees <= size], collapse = "-"),
sum(employees <= size)))) %>%
tally(employees, name = "employees")
## A tibble: 2 x 2
# firm_size employees
# <chr> <int>
#1 Big 1000
#2 Medium-Small-Micro 22
数据
data <- structure(list(firm_size = structure(c(3L, 4L, 2L, 1L), .Label = c("Big",
"Medium", "Micro", "Small"), class = "factor"), employees = c(5,
10, 100, 1000)), class = "data.frame", row.names = c(NA, -4L))
data2 <- structure(list(firm_size = structure(c(3L, 4L, 2L, 1L), .Label = c("Big",
"Medium", "Micro", "Small"), class = "factor"), employees = c(5L,
8L, 9L, 1000L)), class = "data.frame", row.names = c("1", "2",
"3", "4"))
您可以使用great forcats软件包
library(tidyverse)
data <- data.frame(
firm_size = c("Micro", "Small", "Medium", "Big", "Small"),
employees = c(5, 10, 100, 1000, 10)
)
# If you need n groups
data %>%
mutate(firm_size2 = firm_size %>% as_factor() %>% fct_lump(n = 2, w = employees)) %>%
group_by(firm_size2) %>%
summarise(sum_emp = sum(employees),.groups = "drop")
#> # A tibble: 3 x 2
#> firm_size2 sum_emp
#> <fct> <dbl>
#> 1 Medium 100
#> 2 Big 1000
#> 3 Other 25
# If you need at least x on the sum of a vector
data %>%
mutate(firm_size2 = firm_size %>% as_factor() %>% fct_lump_min(min = 10, w = employees)) %>%
group_by(firm_size2) %>%
summarise(sum_emp = sum(employees),.groups = "drop")
#> # A tibble: 4 x 2
#> firm_size2 sum_emp
#> <fct> <dbl>
#> 1 Small 20
#> 2 Medium 100
#> 3 Big 1000
#> 4 Other 5
创建于2020-06-11 重读包 (v0.3.0)
然而,另一种解决方案,设置成一个自定义函数。
library(tidyverse)
mymerge <- function(dat, min) {
merged_dat <- dat %>%
filter(if_else(employees <= min, TRUE, FALSE)) %>%
summarize(firm_size = str_flatten(firm_size, collapse = " - "),
employees = sum(employees))
dat %>%
filter(if_else(employees <= min, FALSE, TRUE)) %>%
bind_rows(merged_dat)
}
mymerge(data, 30)
firm_size employees
1 Medium 100
2 Big 1000
3 Micro - Small 15
mymerge(data, 300)
firm_size employees
1 Big 1000
2 Micro - Small - Medium 115