减少一些繁琐的代码以求简单

问题描述 投票:0回答:2

我的数据样本如下(真实数据是近 50 万育龄女性):在这个数据集中,我的女性平等包括女性拥有的所有孩子)。此行一直持续到 30 个子级(ageownchild_pernum1:ageownchild_pernum30)。例如,一名妇女有 2 个孩子,该妇女有 30 行(填充孩子的年龄),但只有第一行和第二行填充该妇女有孩子的年龄,其他行填充 NA。这里我只带了两行,为了简单起见省略了其他行。

library("tidyverse")
DataSet1<-
tibble(
id = c(1,2,3,4,5,6,7,8,9,10),
ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
ageownchild_pernum2=  c(16,NA,9 ,10,7,NA,13,11,20,5 ),
AGE=  c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
F_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
F_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
)

F_curve_notch 和 f_curve_tilde 适用于年龄(15 至 49,.25)。

现在,我想对我的数据执行这个庞大的过程,可能会达到一千多行代码。

DataSet1$low_notch      <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 - 0.75,0)
DataSet1$high_notch     <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 + 0.75,0)
DataSet1$low_low_notch  <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 - 1.25,0)
DataSet1$high_high_notch<-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE - 
DataSet1$ageownchild_pernum1 + 1.25,0)
DataSet1$low_low_notch  <-ifelse ((DataSet1$low_low_notch>=20)   & (DataSet1$low_low_notch<35)  
,DataSet1$low_low_notch+0.25,DataSet1$low_low_notch)
DataSet1$high_high_notch<-ifelse ((DataSet1$high_high_notch>=20) & 
(DataSet1$high_high_notch<35), DataSet1$high_high_notch+0.25, DataSet1$high_high_notch)

notch <- function(a, b,c,d){
ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}
DataSet1$f_curve_notched_15<-mapply('notch',DataSet1$low_low_notch, DataSet1$high_high_notch, 
DataSet1$f_curve_notched_15,DataSet1$f_curve_tilde_15, DataSet1$f_curve_notched_15)

对于所有ageownchild_pernum(1:30) 和f_curve_notched(15 到49, .25),应继续执行此过程。我非常感谢您提供的任何帮助。

r function loops dplyr mapply
2个回答
1
投票

这是你所期待的吗?

left_join(
  DataSet1 %>% 
    select(id, AGE, starts_with("ageownchild_pernum")) %>% 
    pivot_longer(-c(id,AGE)) %>% 
    mutate(
      low_notch = ifelse((value>=0),AGE - 
                           value - 0.75,0),
      high_notch = ifelse((value>=0),AGE - 
                            value + 0.75,0),
      low_low_notch = ifelse((value>=0),AGE - 
                               value - 1.25,0),
      high_high_notch = ifelse((value>=0),AGE - 
                                 value + 1.25,0),
      low_low_notch = ifelse ((low_low_notch>=20)   & (low_low_notch<35)  
                              ,low_low_notch+0.25,low_low_notch),
      high_high_notch = ifelse ((high_high_notch>=20) & 
                                  (high_high_notch<35), high_high_notch+0.25, high_high_notch)
    ),
  
  full_join(
    DataSet1 %>% 
      select(id, starts_with("F_curve_notch_")) %>% 
      pivot_longer(-id, values_to = "F_curve_notch_") %>% 
      mutate(name = str_remove(name, "F_curve_notch_")),
    
    DataSet1 %>% 
      select(id, starts_with("f_curve_tilde_")) %>% 
      pivot_longer(-id, values_to = "f_curve_tilde_")%>% 
      mutate(name = str_remove(name, "f_curve_tilde_")),
    by = c("id", "name")
  ) %>% 
    rename(curve_id = name),
  by = "id",
  relationship = "many-to-many" # consider other by specifications if necessary
) %>% 
  mutate(notch_result = notch(low_low_notch, high_high_notch, F_curve_notch_, f_curve_tilde_))

最后一列包含

notch()
结果:

# A tibble: 40 × 12
      id   AGE name                value low_notch high_notch low_low_notch high_high_notch curve_id F_curve_notch_ f_curve_tilde_ notch_result
   <dbl> <dbl> <chr>               <dbl>     <dbl>      <dbl>         <dbl>           <dbl> <chr>             <dbl>          <dbl>        <dbl>
 1     1    38 ageownchild_pernum1    18      19.2       20.8          18.8            21.5 15                 25.1           25.1         25.1
 2     1    38 ageownchild_pernum1    18      19.2       20.8          18.8            21.5 15.25              34.0           25.1         34.0
 3     1    38 ageownchild_pernum2    16      21.2       22.8          21              23.5 15                 25.1           25.1         25.1
 4     1    38 ageownchild_pernum2    16      21.2       22.8          21              23.5 15.25              34.0           25.1         34.0
 5     2    52 ageownchild_pernum1    24      27.2       28.8          27              29.5 15                 30.3           30.3         30.3
 6     2    52 ageownchild_pernum1    24      27.2       28.8          27              29.5 15.25              40.3           30.3         40.3
 7     2    52 ageownchild_pernum2    NA      NA         NA            NA              NA   15                 30.3           30.3         NA  
 8     2    52 ageownchild_pernum2    NA      NA         NA            NA              NA   15.25              40.3           30.3         NA  
 9     3    41 ageownchild_pernum1    13      27.2       28.8          27              29.5 15                 43.3           43.3         43.3
10     3    41 ageownchild_pernum1    13      27.2       28.8          27              29.5 15.25              51.7           43.3         51.7
# ℹ 30 more rows
# ℹ Use `print(n = ...)` to see more rows

0
投票

可以使用多个主元在一次运行中将所有 f_curve 年龄和所有子列进行相互比较:

library(tidyverse)

DataSet1<-
  tibble(
    id = c(1,2,3,4,5,6,7,8,9,10),
    ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
    ageownchild_pernum2=  c(16,NA,9 ,10,7,NA,13,11,20,5 ),
    AGE=  c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
    f_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
    f_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
    f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
    f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67   ,33.77),
  )

notch <- function(a, b,c,d){
  ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}



dat_out <- DataSet1 |>
  pivot_longer(
    starts_with("f_curve"),
    names_to = c("marker", "age_cat"),
    names_pattern = c("f_curve_(.*)_(.*)")
  ) |> 
  pivot_wider(names_from = marker, values_from = value) |> 
  pivot_longer(
    starts_with("ageownchild"),
    names_to = "child_n",
    values_to = "child_age",
    names_prefix = "ageownchild_pernum"
  ) |> 
  filter(!is.na(child_age)) |> 
  mutate(
    low_notch = child_age - 0.75,
    high_notch = child_age + 0.75,
    low_low_notch = child_age - 1.25,
    high_high_notch = child_age + 1.25,
    low_low_notch = if_else(low_low_notch>=20 & low_low_notch<35, low_low_notch+0.25, low_low_notch),
    high_high_notch = if_else(high_high_notch>=20 & high_high_notch<35, high_high_notch+0.25, high_high_notch),
    f_curve_notch = notch(low_low_notch, high_high_notch, notch, tilde)
  )

dat_out
#> # A tibble: 34 × 12
#>       id   AGE age_cat notch tilde child_n child_age low_notch high_notch
#>    <dbl> <dbl> <chr>   <dbl> <dbl> <chr>       <dbl>     <dbl>      <dbl>
#>  1     1    38 15       25.1  25.1 1              18     17.2       18.8 
#>  2     1    38 15       25.1  25.1 2              16     15.2       16.8 
#>  3     1    38 15.25    34.0  25.1 1              18     17.2       18.8 
#>  4     1    38 15.25    34.0  25.1 2              16     15.2       16.8 
#>  5     2    52 15       30.3  30.3 1              24     23.2       24.8 
#>  6     2    52 15.25    40.3  30.3 1              24     23.2       24.8 
#>  7     3    41 15       43.3  43.3 1              13     12.2       13.8 
#>  8     3    41 15       43.3  43.3 2               9      8.25       9.75
#>  9     3    41 15.25    51.7  43.3 1              13     12.2       13.8 
#> 10     3    41 15.25    51.7  43.3 2               9      8.25       9.75
#> # ℹ 24 more rows
#> # ℹ 3 more variables: low_low_notch <dbl>, high_high_notch <dbl>,
#> #   f_curve_notch <dbl>

这意味着每位女性对于儿童和 f_curve 年龄段的每个组合都会有一行。如果需要,可以将这些数据转回更广泛的数据集,以便为每个女性提供一列:

dat_out |> 
  pivot_wider(
    names_from = c(child_n, age_cat),
    values_from = f_curve_notch,
    names_prefix = "f_curve_notch_"
  )
#> # A tibble: 34 × 13
#>       id   AGE notch tilde child_age low_notch high_notch low_low_notch
#>    <dbl> <dbl> <dbl> <dbl>     <dbl>     <dbl>      <dbl>         <dbl>
#>  1     1    38  25.1  25.1        18     17.2       18.8          16.8 
#>  2     1    38  25.1  25.1        16     15.2       16.8          14.8 
#>  3     1    38  34.0  25.1        18     17.2       18.8          16.8 
#>  4     1    38  34.0  25.1        16     15.2       16.8          14.8 
#>  5     2    52  30.3  30.3        24     23.2       24.8          23   
#>  6     2    52  40.3  30.3        24     23.2       24.8          23   
#>  7     3    41  43.3  43.3        13     12.2       13.8          11.8 
#>  8     3    41  43.3  43.3         9      8.25       9.75          7.75
#>  9     3    41  51.7  43.3        13     12.2       13.8          11.8 
#> 10     3    41  51.7  43.3         9      8.25       9.75          7.75
#> # ℹ 24 more rows
#> # ℹ 5 more variables: high_high_notch <dbl>, f_curve_notch_1_15 <dbl>,
#> #   f_curve_notch_2_15 <dbl>, f_curve_notch_1_15.25 <dbl>,
#> #   f_curve_notch_2_15.25 <dbl>
© www.soinside.com 2019 - 2024. All rights reserved.