我在下面提出了一个代表。我创建了一个嵌套的
df
。 cols
列表示嵌套 df
内的列。我正在使用 cols
列通过 map
传递到组成的函数。
在本例中,即为
lifeExp
列。
如何传递查看
select
列和 cols
该行中存在的字符向量列名称的 selects
,然后传递 select
嵌套 df 中的 sd_col
该行?此外,我想传递同一个 country
中的 continent
、sd
和 select
列。因此,在这种情况下,我们将在 country
中的嵌套 continent
中选择 lifeExp
、sd
、df
和 sd_col
。
我猜这可以使用
purrr
来实现。
library(gapminder)
library(tidyverse)
data <- gapminder_unfiltered
sample_fn1 <- function(data, cols){
testdata <- data %>%
mutate(across(!!cols, mean))
}
sample_fn2 <- function(data, cols){
testdata <- data %>%
mutate(sd = sd(pop))
}
nested_data <- data %>%
filter(country %in% c("United States", "Mexico", "Canada", "Argentina", "Brazil", "Italy", "Japan")) %>%
mutate(
country_group = case_when(
country %in% c("United States", "Mexico", "Canada") ~ "North America",
country %in% c("Argentina", "Brazil") ~ "South America",
country %in% c("Italy", "Japan") ~ "Eurasia"
)
) %>%
group_by(country_group) %>%
nest() %>%
ungroup()
> nested_data
# A tibble: 3 x 2
country_group data
<chr> <list>
1 South America <tibble[,6] [24 x 6]>
2 North America <tibble[,6] [127 x 6]>
3 Eurasia <tibble[,6] [114 x 6]>
下一个
col_tbl <- tibble::tribble(
~country_group, ~cols,
"North America", c("lifeExp", "pop"),
"South America", c("lifeExp", "gdpPercap"),
"Eurasia" , c("lifeExp", "pop")
)
> col_tbl
# A tibble: 3 x 2
country_group cols
<chr> <list>
1 North America <chr [2]>
2 South America <chr [2]>
3 Eurasia <chr [2]>
nested_data <- nested_data %>%
left_join(col_tbl)
#> Joining, by = "country_group"
> nested_data
# A tibble: 3 x 3
country_group data cols
<chr> <list> <list>
1 South America <tibble[,6] [24 x 6]> <chr [2]>
2 North America <tibble[,6] [127 x 6]> <chr [2]>
3 Eurasia <tibble[,6] [114 x 6]> <chr [2]>
nested_data <- nested_data %>%
mutate(nest1 = map2(data, cols, sample_fn1),
nest2 = map(data, sample_fn2),
# select specified columns
nest1 = map(nest1, ~select(.x, c(country, year, continent)))
)
> nested_data
# A tibble: 3 x 5
country_group data cols nest1 nest2
<chr> <list> <list> <list> <list>
1 South America <tibble[,6] [24 x 6]> <chr [2]> <tibble[,3] [24 x 3]> <tibble[,7] [24 x 7]>
2 North America <tibble[,6] [127 x 6]> <chr [2]> <tibble[,3] [127 x 3]> <tibble[,7] [127 x 7]>
3 Eurasia <tibble[,6] [114 x 6]> <chr [2]> <tibble[,3] [114 x 3]> <tibble[,7] [114 x 7]>
mutate(nested_data, answer = imap(data, ~select(.x, all_of(cols[[.y]]))))