在多个变量之间改变一个新变量

问题描述 投票:0回答:1

我有以下数据框。 CP 变量中的每个值都采用相同的格式 -> HueX:Y-Z。

hueX 在一行中始终相同。

我想创建另一个变量,其值等于

  • 如果 TCK_A!="Yes",则为 CP_A 的值,
  • 否则我希望 CP_B、CP_C 和 CP_D 的输出等于“hueX:mean(Y)-mean(Z)”
data_trial = data.frame(hue=c(2,8,3,2,5),
          CP_A=c("hue2:6789-99987", "hue8:7854-98743","hue3:60987-123423","hue2:7658-873457","hue5:45658-676549"),
          CP_B=c("hue2:6782-99987", "hue8:7859-98734","hue3:60989-123407","","hue5:45697-676598"),
          CP_C=c("hue2:6785-99989", "hue8:6797-99980","hue3:60995-123434","hue2:7657-8734509","hue5:45667-676500"),
          CP_D=c("", "hue8:6756-99987","hue3:60942-123412","hue2:7650-87345065","hue5:45699-676565"),
          TCK_A=c("Yes", "", "Yes", "Yes", "Yes"))

> data_trial
  hue              CP_A              CP_B              CP_C               CP_D TCK_A
1   2   hue2:6789-99987   hue2:6782-99987   hue2:6785-99989                      Yes
2   8   hue8:7854-98743   hue8:7859-98734   hue8:6797-99980    hue8:6756-99987      
3   3 hue3:60987-123423 hue3:60989-123407 hue3:60995-123434  hue3:60942-123412   Yes
4   2  hue2:7658-873457                   hue2:7657-8734509 hue2:7650-87345065   Yes
5   5 hue5:45658-676549 hue5:45697-676598 hue5:45667-676500  hue5:45699-676565   Yes


output

  hue              CP_A              CP_B              CP_C               CP_D TCK_A             output
1   2   hue2:6789-99987   hue2:6782-99987   hue2:6785-99989                      Yes    hue2:6784-99988
2   8   hue8:7854-98743   hue8:7859-98734   hue8:6797-99980    hue8:6756-99987          hue8:7854-98743
3   3 hue3:60987-123423 hue3:60989-123407 hue3:60995-123434  hue3:60942-123412   Yes  hue3:60975-123418
4   2  hue2:7658-873457                   hue2:7657-8734509 hue2:7650-87345065   Yes hue2:7654-48039787
5   5 hue5:45658-676549 hue5:45697-676598 hue5:45667-676500  hue5:45699-676565   Yes  hue5:45688-676554

我尝试过的:


  data_trial %>% 
  separate(CP_B, into=c("hueX_B","Y_BxZ_B"), sep=":") %>%
  separate(CP_C, into=c("hueX_C","Y_CxZ_C"), sep=":") %>%
  separate(CP_D, into=c("hueX_D","Y_DxZ_D"), sep=":") %>%
  separate(Y_BxZ_B, into=c("Y_B", "Z_B"), sep="-") %>%
  separate(Y_CxZ_C, into=c("Y_C", "Z_C"), sep="-") %>%
  separate(Y_DxZ_D, into=c("Y_D", "Z_D"), sep="-") %>%
  mutate(Y_B=as.numeric(Y_B)) %>%
  mutate(Y_C=as.numeric(Y_C)) %>%
  mutate(Y_D=as.numeric(Y_D)) %>%
  mutate(Z_B=as.numeric(Z_B)) %>%
  mutate(Z_C=as.numeric(Z_C)) %>%
  mutate(Z_D=as.numeric(Z_D)) %>%
  rowwise %>%
  mutate(CP_output=ifelse(TCK_A=="Yes", paste0("hue", hue, ":", mean(across(c(Y_B, Y_C, Y_D)), na.rm=TRUE), "-", mean(across(c(Z_B,Z_C,Z_D)), na.rm=TRUE)), CP_A))

# A tibble: 5 × 13
# Rowwise: 
    hue CP_A              hueX_B   Y_B    Z_B hueX_C   Y_C     Z_C hueX_D   Y_D      Z_D TCK_A CP_output      
  <dbl> <chr>             <chr>  <dbl>  <dbl> <chr>  <dbl>   <dbl> <chr>  <dbl>    <dbl> <chr> <chr>          
1     2 hue2:6789-99987   "hue2"  6782  99987 hue2    6785   99989 ""        NA       NA "Yes" hue2:NA-NA     
2     8 hue8:7854-98743   "hue8"  7859  98734 hue8    6797   99980 "hue8"  6756    99987 ""    hue8:7854-98743
3     3 hue3:60987-123423 "hue3" 60989 123407 hue3   60995  123434 "hue3" 60942   123412 "Yes" hue3:NA-NA     
4     2 hue2:7658-873457  ""        NA     NA hue2    7657 8734509 "hue2"  7650 87345065 "Yes" hue2:NA-NA     
5     5 hue5:45658-676549 "hue5" 45697 676598 hue5   45667  676500 "hue5" 45699   676565 "Yes" hue5:NA-NA 
  

第一步有效,但可能会被简化。 最后一步不起作用。我只有在 FALSE 条件下才有正确的结果。

我仍然很难跨多个列工作并在多个列上重复一些相同的操作。有什么想法可以帮助我吗?

编辑

我也尝试过下面的代码,我得到了正确的方法,但色调值不正确(与我的第一次尝试相反)


data_trials_2 = data_trial %>% 
  separate(CP_B, into=c("hueX_B","Y_BxZ_B"), sep=":") %>%
  separate(CP_C, into=c("hueX_C","Y_CxZ_C"), sep=":") %>%
  separate(CP_D, into=c("hueX_D","Y_DxZ_D"), sep=":") %>%
  separate(Y_BxZ_B, into=c("Y_B", "Z_B"), sep="-") %>%
  separate(Y_CxZ_C, into=c("Y_C", "Z_C"), sep="-") %>%
  separate(Y_DxZ_D, into=c("Y_D", "Z_D"), sep="-") %>%
  mutate(Y_B=as.numeric(Y_B)) %>%
  mutate(Y_C=as.numeric(Y_C)) %>%
  mutate(Y_D=as.numeric(Y_D)) %>%
  mutate(Z_B=as.numeric(Z_B)) %>%
  mutate(Z_C=as.numeric(Z_C)) %>%
  mutate(Z_D=as.numeric(Z_D))

  data_trials_2$CP_output= paste0("hue", rowwise(data_trials_2[,1]), ":", round(rowMeans(data_trials_2[,c(4,7,10)], na.rm=TRUE)) , "-",    round(rowMeans(data_trials_2[,c(5,8,11)], na.rm=TRUE)))

data_trials_2$CP_output
[1] "huec(2, 8, 3, 2, 5):6783.5-99988"                     
[2] "huec(2, 8, 3, 2, 5):7137.33333333333-99567"           
[3] "huec(2, 8, 3, 2, 5):60975.3333333333-123417.666666667"
[4] "huec(2, 8, 3, 2, 5):7653.5-48039787"                  
[5] "huec(2, 8, 3, 2, 5):45687.6666666667-676554.333333333"
r dataframe mutate across
1个回答
0
投票

separate
中的
tidyr
已被取代,所以我将使用
separate_wider_delim

一旦将列分开,您最终会得到方便的列名称来使用

tidy-select
contains
函数,从而允许您用
mutate
across
代替多个
mutate
调用。

此示例不保留原始列数据,但您可以通过在

cols_remove = FALSE
中设置
separate_wider_delim
来实现。

library(dplyr)
library(tidyr)

data_trial %>%
  separate_wider_delim(starts_with('CP'), delim = ':', names = c('hue', 'values'),
                       names_sep = '_', too_few = 'align_end') %>%
  separate_wider_delim(ends_with('_values'), delim = '-', names = c('Y', 'Z'),
                       names_sep = '_', too_few = 'align_end') %>%
  mutate(across(contains('values'), as.numeric)) %>%
  rowwise %>%
  mutate(
    mean_Y = mean(CP_B_values_Y, CP_C_values_Y, CP_D_values_Y, na.rm = TRUE),
    mean_Z = mean(CP_B_values_Z, CP_C_values_Z, CP_D_values_Z, na.rm = TRUE)) %>% 
  ungroup() %>%
  mutate(output = if_else(TCK_A != 'Yes',
                          paste0('hue',hue,':',CP_A_values_Y,'-',CP_A_values_Z),
                          paste0('hue',hue,':', mean_Y, '-', mean_Z)
  )
  ) %>% glimpse()
#> Rows: 5
#> Columns: 17
#> $ hue           <dbl> 2, 8, 3, 2, 5
#> $ CP_A_hue      <chr> "hue2", "hue8", "hue3", "hue2", "hue5"
#> $ CP_A_values_Y <dbl> 6789, 7854, 60987, 7658, 45658
#> $ CP_A_values_Z <dbl> 99987, 98743, 123423, 873457, 676549
#> $ CP_B_hue      <chr> "hue2", "hue8", "hue3", NA, "hue5"
#> $ CP_B_values_Y <dbl> 6782, 7859, 60989, NA, 45697
#> $ CP_B_values_Z <dbl> 99987, 98734, 123407, NA, 676598
#> $ CP_C_hue      <chr> "hue2", "hue8", "hue3", "hue2", "hue5"
#> $ CP_C_values_Y <dbl> 6785, 6797, 60995, 7657, 45667
#> $ CP_C_values_Z <dbl> 99989, 99980, 123434, 8734509, 676500
#> $ CP_D_hue      <chr> NA, "hue8", "hue3", "hue2", "hue5"
#> $ CP_D_values_Y <dbl> NA, 6756, 60942, 7650, 45699
#> $ CP_D_values_Z <dbl> NA, 99987, 123412, 87345065, 676565
#> $ TCK_A         <chr> "Yes", "", "Yes", "Yes", "Yes"
#> $ mean_Y        <dbl> 6782, 7859, 60989, NaN, 45697
#> $ mean_Z        <dbl> 99987, 98734, 123407, NaN, 676598
#> $ output        <chr> "hue2:6782-99987", "hue8:7854-98743", "hue3:60989-123407…

创建于 2023-12-06,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.