变异一个变量,计算变量的所有唯一值

问题描述 投票:0回答:2

我有以下数据框

table=data.frame(ID=c("A", "A", "A", "A", "A", "A", "A", "B", "B", "C", "C", "C", "D", "D"),
                 obj=c("fu", "sp", "sp", "sp", "sp", "sp", "sp", "fu", "sp", "fu", "sp", "sp", "fu", "sp"),
                 gens=c(NA, NA, "AZJE", NA, "JAUE", NA, "AZJE", NA, NA, NA, "AUEJ", NA, NA, "EUF"))

> table
   ID obj gens
1   A  fu <NA>
2   A  sp <NA>
3   A  sp AZJE
4   A  sp <NA>
5   A  sp JAUE
6   A  sp <NA>
7   A  sp AZJE
8   B  fu <NA>
9   B  sp <NA>
10  C  fu <NA>
11  C  sp AUEJ
12  C  sp <NA>
13  D  fu <NA>
14  D  sp  EUF

我想添加一个变量来计算每个ID,即gens中唯一值的数量。

期望的输出如下

> output
   ID obj gens res
1   A  fu <NA>   2
2   A  sp <NA>   2
3   A  sp AZJE   2
4   A  sp <NA>   2
5   A  sp JAUE   2
6   A  sp <NA>   2
7   A  sp AZJE   2
8   B  fu <NA>   0
9   B  sp <NA>   0
10  C  fu <NA>   1
11  C  sp AUEJ   1
12  C  sp <NA>   1
13  D  fu <NA>   1
14  D  sp  EUF   1

我可以计算每个ID的值的数量,但我不知道如何计算唯一值。请看看我在下面尝试过的内容

table_test = table %>% 
  group_by(ID)  %>%
  mutate(res = sum(obj=="sp" & !is.na(gens )))
 
> table_test
# A tibble: 14 × 4
# Groups:   ID [4]
   ID    obj   gens    res
   <chr> <chr> <chr> <int>
 1 A     fu    NA        3
 2 A     sp    NA        3
 3 A     sp    AZJE      3
 4 A     sp    NA        3
 5 A     sp    JAUE      3
 6 A     sp    NA        3
 7 A     sp    AZJE      3
 8 B     fu    NA        0
 9 B     sp    NA        0
10 C     fu    NA        1
11 C     sp    AUEJ      1
12 C     sp    NA        1
13 D     fu    NA        1
14 D     sp    EUF       1

r sum tidyverse mutate
2个回答
0
投票
table |>
  mutate(res = n_distinct(gens, na.rm = TRUE), .by = ID)
#    ID obj gens res
# 1   A  fu <NA>   2
# 2   A  sp <NA>   2
# 3   A  sp AZJE   2
# 4   A  sp <NA>   2
# 5   A  sp JAUE   2
# 6   A  sp <NA>   2
# 7   A  sp AZJE   2
# 8   B  fu <NA>   0
# 9   B  sp <NA>   0
# 10  C  fu <NA>   1
# 11  C  sp AUEJ   1
# 12  C  sp <NA>   1
# 13  D  fu <NA>   1
# 14  D  sp  EUF   1

0
投票
table |> 
group_by(ID) |>
 mutate(res = length(unique(setdiff(gens,NA))))
© www.soinside.com 2019 - 2024. All rights reserved.