我正在尝试通过应用可归类为以下几类的函数对嵌套模型进行分析:
帮助:
make_data_input
公式起作用make_data_input
公式library(tidyverse)
#This work
make_data_fixed <- function(df) {
df %>%
mutate(price_cumsum=cumsum(price),
max_price_cumsum=max(price_cumsum))
}
# the max column is not calculating the max column of price but instead pulling the prie_cumsum column values it pulls in the literal value
make_data_input <- function(df,x) {
df %>%
mutate("{{x}}_cumsum":=cumsum({{x}}),
"max_{{x}}_cumsum":=max("{{x}}_cumsum")
)
}
selected_cols <- c("clarity","depth")
calculate_stuff <- function(df,x) {
df %>%
summarize(across({x},
~length(
unique(.)
)
)
)
}
calculate_stuff(diamonds,selected_cols)
diamonds %>%
group_by(cut,color) %>%
nest() %>%
mutate(test=list(selected_cols),
carat="carat") %>%
mutate(simple=map(data,make_data_fixed),# this works
calculate_direct=map2(.x=data,.y=test,~calculate_stuff(df=.x,x=.y)), # this works
calculate_indirect=map2(.x=data,.y=carat,~calculate_stuff(df=.x,x=.y)), # this works
complex=map2(.x=data,.y=price,~make_data_input(df=.x,.y=price)) # this doesn't work
)
我会做两件事。首先,使用
dplyr::rowwise()
而不是 purrr::map
这使得代码更容易阅读和推理。也更容易理解哪些对象名称进入函数。
其次,我们要纠正
make_data_input
。您使用 max("{{x}}_cumsum")
但 "{{x}}_cumsum"
只不过是一个字符串。我们需要首先创建一个基于 "{{x}}_cumsum"
的字符串,我们用 rlang::englue()
做这个,然后可以在 .data[[]]
或者 !! sym()
中使用它。
library(tidyverse)
make_data_fixed <- function(df) {
df %>%
mutate(price_cumsum=cumsum(price),
max_price_cumsum=max(price_cumsum))
}
make_data_input <- function(df, x) {
df %>%
mutate("{{x}}_cumsum" := cumsum({{x}}),
"max_{{x}}_cumsum" := max(.data[[rlang::englue("{{x}}_cumsum")]])
)
}
selected_cols <- c("clarity","depth")
calculate_stuff <- function(df,x) {
df %>%
summarize(across({x},
~length(
unique(.)
)
)
)
}
diamonds %>%
group_by(cut,color) %>%
nest() %>%
mutate(test=list(selected_cols),
carat="carat") %>%
rowwise() %>%
mutate(simple = list(make_data_fixed(data)),
calculate_direct = list(calculate_stuff(data, test)),
calculate_indirect = list(calculate_stuff(data, carat)),
complex = list(make_data_input(data, price))
) %>%
pull(complex) %>%
.[[1]]
#> # A tibble: 3,903 x 10
#> carat clarity depth table price x y z price_cumsum
#> <dbl> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <int>
#> 1 0.23 SI2 61.5 55 326 3.95 3.98 2.43 326
#> 2 0.26 VVS2 62.9 58 554 4.02 4.06 2.54 880
#> 3 0.7 SI1 62.5 57 2757 5.7 5.72 3.57 3637
#> 4 0.59 VVS2 62 55 2761 5.38 5.43 3.35 6398
#> 5 0.74 SI2 62.2 56 2761 5.8 5.84 3.62 9159
#> 6 0.7 VS2 60.7 58 2762 5.73 5.76 3.49 11921
#> 7 0.74 SI1 62.3 54 2762 5.8 5.83 3.62 14683
#> 8 0.7 SI1 60.9 57 2768 5.73 5.76 3.5 17451
#> 9 0.6 VS1 61.7 55 2774 5.41 5.44 3.35 20225
#> 10 0.7 SI1 62.7 55 2774 5.68 5.74 3.58 22999
#> # ... with 3,893 more rows, and 1 more variable: max_price_cumsum <int>
由 reprex 包 (v2.0.1) 于 2023-02-22 创建