保留列的至少一个预定义子集的行和大于阈值的行

问题描述 投票:0回答:2

我有一个这样的数据框:

df <- data.frame(
  sample1 = c(0, 1, 2, 0, 2, 1),
  sample2 = c(0.3, 3, 2, 0.4, 2, 3),
  sample3 = c(0.2, 1, 3, 0.1, 3, 3),
  sample4 = c(0.4, 2, 4, 0.3, 1, 1),
  sample5 = c(0.1, 2, 4, 0.2, 5, 3),
  sample6 = c(0.2, 3, 1, 0.1, 6, 3),
  sample7 = c(0.2, 1, 1, 0.4, 1, 1)
)

以及在此 df 中定义的组:

groups <- data.frame(
  samples = c("sample1", "sample2", "sample3", "sample4", "sample5", "sample6", "sample7"),
  groups = c("group1", "group1", "group1", "group2", "group2", "group3", "group3")
)

使用 R,我只想保留至少一组总和大于 0.5 的行,这样得到的 df 将是:

  sample1 sample2 sample3 sample4 sample5 sample6 sample7
2       1       3       1       2       2       3       1
3       2       2       3       4       4       1       1
5       2       2       3       1       5       6       1
6       1       3       3       1       3       3       1
r dataframe dplyr subset rowsum
2个回答
0
投票
library(dplyr)
library(purrr)
library(tibble)

map({groups %>% 
      group_split(groups, .keep = F) %>% 
      map(~pull(.x, samples))} , ~df %>% 
      rownames_to_column("id") %>% 
      mutate(, sum_grp = select(., all_of(.x)) %>% 
               rowSums) %>% 
      filter(sum_grp > 0.5)) %>% 
  bind_rows() %>% 
  arrange(id) %>% 
  select(-sum_grp, -id) %>% 
  unique()

#>    sample1 sample2 sample3 sample4 sample5 sample6 sample7
#> 1        1       3       1       2       2       3       1
#> 4        2       2       3       4       4       1       1
#> 7        2       2       3       1       5       6       1
#> 10       1       3       3       1       3       3       1

0
投票

这里是

tidyverse
版本:注意只有满足条件的行(例如第4行):

library(dplyr)
library(tidyr)

df %>% 
  pivot_longer(everything()) %>% 
  left_join(groups, by = c("name"="samples")) %>% 
  mutate(row =as.integer(gl(n(),ncol(df),n()))) %>% 
  mutate(sum_group = sum(value), .by = c(row, groups)) %>% 
  group_by(row) %>% 
  filter(!all(sum_group <= 0.5)) %>% 
  select(name, value) %>% 
  pivot_wider(names_from = name, values_from = value) 


  row sample1 sample2 sample3 sample4 sample5 sample6 sample7
  <int>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
1     1       0     0.3     0.2     0.4     0.1     0.5     0.2
2     2       1     3       1       2       2       3       1  
3     3       2     2       3       4       4       1       1  
4     5       2     2       3       1       5       6       1  
5     6       1     3       3       1       3       3       1 
© www.soinside.com 2019 - 2024. All rights reserved.