我的数据样本如下(真实数据是近 50 万育龄女性):在这个数据集中,我的女性平等包括女性拥有的所有孩子)。此行一直持续到 30 个子级(ageownchild_pernum1:ageownchild_pernum30)。例如,一名妇女有 2 个孩子,该妇女有 30 行(填充孩子的年龄),但只有第一行和第二行填充该妇女有孩子的年龄,其他行填充 NA。这里我只带了两行,为了简单起见省略了其他行。
library("tidyverse")
DataSet1<-
tibble(
id = c(1,2,3,4,5,6,7,8,9,10),
ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
ageownchild_pernum2= c(16,NA,9 ,10,7,NA,13,11,20,5 ),
AGE= c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
F_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
F_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
)
F_curve_notch 和 f_curve_tilde 适用于年龄(15 至 49,.25)。
现在,我想对我的数据执行这个庞大的过程,可能会达到一千多行代码。
DataSet1$low_notch <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE -
DataSet1$ageownchild_pernum1 - 0.75,0)
DataSet1$high_notch <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE -
DataSet1$ageownchild_pernum1 + 0.75,0)
DataSet1$low_low_notch <-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE -
DataSet1$ageownchild_pernum1 - 1.25,0)
DataSet1$high_high_notch<-ifelse((DataSet1$ageownchild_pernum1>=0),DataSet1$AGE -
DataSet1$ageownchild_pernum1 + 1.25,0)
DataSet1$low_low_notch <-ifelse ((DataSet1$low_low_notch>=20) & (DataSet1$low_low_notch<35)
,DataSet1$low_low_notch+0.25,DataSet1$low_low_notch)
DataSet1$high_high_notch<-ifelse ((DataSet1$high_high_notch>=20) &
(DataSet1$high_high_notch<35), DataSet1$high_high_notch+0.25, DataSet1$high_high_notch)
notch <- function(a, b,c,d){
ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}
DataSet1$f_curve_notched_15<-mapply('notch',DataSet1$low_low_notch, DataSet1$high_high_notch,
DataSet1$f_curve_notched_15,DataSet1$f_curve_tilde_15, DataSet1$f_curve_notched_15)
对于所有ageownchild_pernum(1:30) 和f_curve_notched(15 到49, .25),应继续执行此过程。我非常感谢您提供的任何帮助。
这是你所期待的吗?
left_join(
DataSet1 %>%
select(id, AGE, starts_with("ageownchild_pernum")) %>%
pivot_longer(-c(id,AGE)) %>%
mutate(
low_notch = ifelse((value>=0),AGE -
value - 0.75,0),
high_notch = ifelse((value>=0),AGE -
value + 0.75,0),
low_low_notch = ifelse((value>=0),AGE -
value - 1.25,0),
high_high_notch = ifelse((value>=0),AGE -
value + 1.25,0),
low_low_notch = ifelse ((low_low_notch>=20) & (low_low_notch<35)
,low_low_notch+0.25,low_low_notch),
high_high_notch = ifelse ((high_high_notch>=20) &
(high_high_notch<35), high_high_notch+0.25, high_high_notch)
),
full_join(
DataSet1 %>%
select(id, starts_with("F_curve_notch_")) %>%
pivot_longer(-id, values_to = "F_curve_notch_") %>%
mutate(name = str_remove(name, "F_curve_notch_")),
DataSet1 %>%
select(id, starts_with("f_curve_tilde_")) %>%
pivot_longer(-id, values_to = "f_curve_tilde_")%>%
mutate(name = str_remove(name, "f_curve_tilde_")),
by = c("id", "name")
) %>%
rename(curve_id = name),
by = "id",
relationship = "many-to-many" # consider other by specifications if necessary
) %>%
mutate(notch_result = notch(low_low_notch, high_high_notch, F_curve_notch_, f_curve_tilde_))
最后一列包含
notch()
结果:
# A tibble: 40 × 12
id AGE name value low_notch high_notch low_low_notch high_high_notch curve_id F_curve_notch_ f_curve_tilde_ notch_result
<dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
1 1 38 ageownchild_pernum1 18 19.2 20.8 18.8 21.5 15 25.1 25.1 25.1
2 1 38 ageownchild_pernum1 18 19.2 20.8 18.8 21.5 15.25 34.0 25.1 34.0
3 1 38 ageownchild_pernum2 16 21.2 22.8 21 23.5 15 25.1 25.1 25.1
4 1 38 ageownchild_pernum2 16 21.2 22.8 21 23.5 15.25 34.0 25.1 34.0
5 2 52 ageownchild_pernum1 24 27.2 28.8 27 29.5 15 30.3 30.3 30.3
6 2 52 ageownchild_pernum1 24 27.2 28.8 27 29.5 15.25 40.3 30.3 40.3
7 2 52 ageownchild_pernum2 NA NA NA NA NA 15 30.3 30.3 NA
8 2 52 ageownchild_pernum2 NA NA NA NA NA 15.25 40.3 30.3 NA
9 3 41 ageownchild_pernum1 13 27.2 28.8 27 29.5 15 43.3 43.3 43.3
10 3 41 ageownchild_pernum1 13 27.2 28.8 27 29.5 15.25 51.7 43.3 51.7
# ℹ 30 more rows
# ℹ Use `print(n = ...)` to see more rows
可以使用多个主元在一次运行中将所有 f_curve 年龄和所有子列进行相互比较:
library(tidyverse)
DataSet1<-
tibble(
id = c(1,2,3,4,5,6,7,8,9,10),
ageownchild_pernum1 = c(18,24,13,16,9,NA,17,13,32,7 ),
ageownchild_pernum2= c(16,NA,9 ,10,7,NA,13,11,20,5 ),
AGE= c(38,52 ,41 ,43 ,38 ,36 ,40 ,36 ,56,31),
f_curve_notch_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
f_curve_notch_15.25= c(34.01,40.33,51.74,51.74,34.01,34.01,34.01,34.01,73.85,41.91),
f_curve_tilde_15= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
f_curve_tilde_15.25= c(25.14,30.28,43.33,43.33,25.14,25.14,25.14,25.14,67 ,33.77),
)
notch <- function(a, b,c,d){
ifelse((a<= 15)&(b>=15)&(c!= 0),0.01*d,c)
}
dat_out <- DataSet1 |>
pivot_longer(
starts_with("f_curve"),
names_to = c("marker", "age_cat"),
names_pattern = c("f_curve_(.*)_(.*)")
) |>
pivot_wider(names_from = marker, values_from = value) |>
pivot_longer(
starts_with("ageownchild"),
names_to = "child_n",
values_to = "child_age",
names_prefix = "ageownchild_pernum"
) |>
filter(!is.na(child_age)) |>
mutate(
low_notch = child_age - 0.75,
high_notch = child_age + 0.75,
low_low_notch = child_age - 1.25,
high_high_notch = child_age + 1.25,
low_low_notch = if_else(low_low_notch>=20 & low_low_notch<35, low_low_notch+0.25, low_low_notch),
high_high_notch = if_else(high_high_notch>=20 & high_high_notch<35, high_high_notch+0.25, high_high_notch),
f_curve_notch = notch(low_low_notch, high_high_notch, notch, tilde)
)
dat_out
#> # A tibble: 34 × 12
#> id AGE age_cat notch tilde child_n child_age low_notch high_notch
#> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 1 38 15 25.1 25.1 1 18 17.2 18.8
#> 2 1 38 15 25.1 25.1 2 16 15.2 16.8
#> 3 1 38 15.25 34.0 25.1 1 18 17.2 18.8
#> 4 1 38 15.25 34.0 25.1 2 16 15.2 16.8
#> 5 2 52 15 30.3 30.3 1 24 23.2 24.8
#> 6 2 52 15.25 40.3 30.3 1 24 23.2 24.8
#> 7 3 41 15 43.3 43.3 1 13 12.2 13.8
#> 8 3 41 15 43.3 43.3 2 9 8.25 9.75
#> 9 3 41 15.25 51.7 43.3 1 13 12.2 13.8
#> 10 3 41 15.25 51.7 43.3 2 9 8.25 9.75
#> # ℹ 24 more rows
#> # ℹ 3 more variables: low_low_notch <dbl>, high_high_notch <dbl>,
#> # f_curve_notch <dbl>
这意味着每位女性对于儿童和 f_curve 年龄段的每个组合都会有一行。如果需要,可以将这些数据转回更广泛的数据集,以便为每个女性提供一列:
dat_out |>
pivot_wider(
names_from = c(child_n, age_cat),
values_from = f_curve_notch,
names_prefix = "f_curve_notch_"
)
#> # A tibble: 34 × 13
#> id AGE notch tilde child_age low_notch high_notch low_low_notch
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1 38 25.1 25.1 18 17.2 18.8 16.8
#> 2 1 38 25.1 25.1 16 15.2 16.8 14.8
#> 3 1 38 34.0 25.1 18 17.2 18.8 16.8
#> 4 1 38 34.0 25.1 16 15.2 16.8 14.8
#> 5 2 52 30.3 30.3 24 23.2 24.8 23
#> 6 2 52 40.3 30.3 24 23.2 24.8 23
#> 7 3 41 43.3 43.3 13 12.2 13.8 11.8
#> 8 3 41 43.3 43.3 9 8.25 9.75 7.75
#> 9 3 41 51.7 43.3 13 12.2 13.8 11.8
#> 10 3 41 51.7 43.3 9 8.25 9.75 7.75
#> # ℹ 24 more rows
#> # ℹ 5 more variables: high_high_notch <dbl>, f_curve_notch_1_15 <dbl>,
#> # f_curve_notch_2_15 <dbl>, f_curve_notch_1_15.25 <dbl>,
#> # f_curve_notch_2_15.25 <dbl>