基于单独的列缩放一列中的值

问题描述 投票:0回答:1

我希望根据样本将值扩展到 100%。我的 df 包含 col

Project
,我想将 col
n
分组,以将
n
的值标准化为 100%。

library("dplyr")

ex %>%
  dplyr::group_by(Project) %>%
  scale(n) -> perc

对于我提供的数据,我期望输出

perc

项目 反应 规范
Ga0598239 砷酸盐还原 0.0312
Ga0598239 碳固定 0.0033

等等。

> dput(ex)
structure(list(Project = c("Ga0598239", "Ga0598239", "Ga0598239", 
"Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", 
"Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", 
"Ga0598239", "Ga0598240", "Ga0598240", "Ga0598240", "Ga0598240", 
"Ga0598240", "Ga0598240"), reaction = c("arsenate-reduction", 
"carbon-fixation", "formaldehyde-oxidation", "halogenated-compounds-breakdown", 
"hydrogen-oxidation", "iron-oxidation", "iron-reduction", "manganese-oxidation", 
"methanol-oxidation", "selenate-reduction", "sulfide-oxidation", 
"sulfite-reduction", "sulfur-oxidation", "thiosulfate-disproportionation", 
"arsenate-reduction", "carbon-fixation", "formaldehyde-oxidation", 
"halogenated-compounds-breakdown", "hydrogen-oxidation", "iron-oxidation"
), n = c(103L, 11L, 157L, 90L, 2296L, 85L, 33L, 156L, 17L, 38L, 
8L, 9L, 259L, 13L, 90L, 21L, 202L, 81L, 2090L, 73L)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
    Project = c("Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", 
    "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", 
    "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", "Ga0598239", 
    "Ga0598240", "Ga0598240", "Ga0598240", "Ga0598240", "Ga0598240", 
    "Ga0598240"), reaction = c("arsenate-reduction", "carbon-fixation", 
    "formaldehyde-oxidation", "halogenated-compounds-breakdown", 
    "hydrogen-oxidation", "iron-oxidation", "iron-reduction", 
    "manganese-oxidation", "methanol-oxidation", "selenate-reduction", 
    "sulfide-oxidation", "sulfite-reduction", "sulfur-oxidation", 
    "thiosulfate-disproportionation", "arsenate-reduction", "carbon-fixation", 
    "formaldehyde-oxidation", "halogenated-compounds-breakdown", 
    "hydrogen-oxidation", "iron-oxidation"), .rows = structure(list(
        1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
        14L, 15L, 16L, 17L, 18L, 19L, 20L), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L), .drop = TRUE))
r dataframe normalization
1个回答
0
投票

你想要这个: 请注意,您提供的数据已分组。所以首先

ungroup

library(dplyr) #> 1.1.0

df %>%
  ungroup() %>% 
  mutate(norm = (n / sum(n)), .by=Project) 
   Project   reaction                            n    norm
   <chr>     <chr>                           <int>   <dbl>
 1 Ga0598239 arsenate-reduction                103 0.0315 
 2 Ga0598239 carbon-fixation                    11 0.00336
 3 Ga0598239 formaldehyde-oxidation            157 0.0479 
 4 Ga0598239 halogenated-compounds-breakdown    90 0.0275 
 5 Ga0598239 hydrogen-oxidation               2296 0.701  
 6 Ga0598239 iron-oxidation                     85 0.0260 
 7 Ga0598239 iron-reduction                     33 0.0101 
 8 Ga0598239 manganese-oxidation               156 0.0476 
 9 Ga0598239 methanol-oxidation                 17 0.00519
10 Ga0598239 selenate-reduction                 38 0.0116 
11 Ga0598239 sulfide-oxidation                   8 0.00244
12 Ga0598239 sulfite-reduction                   9 0.00275
13 Ga0598239 sulfur-oxidation                  259 0.0791 
14 Ga0598239 thiosulfate-disproportionation     13 0.00397
15 Ga0598240 arsenate-reduction                 90 0.0352 
16 Ga0598240 carbon-fixation                    21 0.00821
17 Ga0598240 formaldehyde-oxidation            202 0.0790 
18 Ga0598240 halogenated-compounds-breakdown    81 0.0317 
19 Ga0598240 hydrogen-oxidation               2090 0.817  
20 Ga0598240 iron-oxidation                     73 0.0285 
© www.soinside.com 2019 - 2024. All rights reserved.