我现在的数据结构是这样的。
age_group age_division total_estimate zipcode single_age
<chr> <dbl> <dbl> <dbl> <list>
1 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
2 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
3 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
4 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
5 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
6 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
7 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
8 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
9 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
10 0-4 2 10 51557 list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
11 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
12 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
13 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
14 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
15 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
16 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
17 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
18 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
19 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
20 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
21 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
22 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
23 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
24 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
25 0-4 3 15 51558 list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
26 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
27 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
28 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
29 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
30 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
31 85-90 1 8 51559 list(C(0, 1, 2, 3, 4))
对于每个第一个唯一的邮政编码和年龄组,我想垂直扩展相应的第一个单一年龄列表。例如,如果第一个唯一邮政编码为 1,age_group 为 0-4,age_division 为 2。那么列表应采用垂直格式,其中有 0, 0, 1, 1, 2, 2, 3, 3, 4 , 4. 这是我的代码,它为我提供了 single_age 作为列表。
# Function to expand age groups
expand_age <- function(age_group, age_division) {
age <- rep(seq(as.numeric(strsplit(age_group, "-")[[1]][1]),
as.numeric(strsplit(age_group, "-")[[1]][2])),
each = age_division)
return(list(age))
}
# Apply the function to each row of the dataframe using mutate
df <- df_1 %>%
group_by(zipcode) %>%
mutate(single_age = list(expand_age(first(age_group), first(age_division))))
这就是预期的输出应该是什么样的。
age_group age_division total_estimate zipcode single_age
<chr> <dbl> <dbl> <dbl> <dbl>
1 0-4 2 10 51557 0
2 0-4 2 10 51557 0
3 0-4 2 10 51557 1
4 0-4 2 10 51557 1
5 0-4 2 10 51557 2
6 0-4 2 10 51557 2
7 0-4 2 10 51557 3
8 0-4 2 10 51557 3
9 0-4 2 10 51557 4
10 0-4 2 10 51557 4
11 0-4 3 15 51558 0
12 0-4 3 15 51558 0
13 0-4 3 15 51558 0
14 0-4 3 15 51558 1
15 0-4 3 15 51558 1
16 0-4 3 15 51558 1
17 0-4 3 15 51558 2
18 0-4 3 15 51558 2
19 0-4 3 15 51558 2
20 0-4 3 15 51558 3
21 0-4 3 15 51558 3
22 0-4 3 15 51558 3
23 0-4 3 15 51558 4
24 0-4 3 15 51558 4
25 0-4 3 15 51558 4
26 85-90 1 8 51559 85
27 85-90 1 8 51559 86
28 85-90 1 8 51559 87
29 85-90 1 8 51559 88
30 85-90 1 8 51559 89
31 85-90 1 8 51559 90
正如 @Onyambu 所要求的,与
dput
共享您的数据总是更好。
使用给定的格式,
unnest_longer(df_1, single_age)
将不起作用,因为:
single_age
内部的列表只有一个对象——一个整数向量;C()
(大写“C”)与 c()
不同。你可以试试这个:
library(tidyverse)
# Data with c() instead of C()
aux <- tribble(
~age_group, ~age_division, ~total_estimate, ~zipcode, ~single_age,
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 2, 10, 51557, list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4", 3, 15, 51558, list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4)),
"85-90", 1, 8 , 51559, list(c(0, 1, 2, 3, 4))
)
解除嵌套之前,先简化一下:
aux <- aux %>%
mutate(single_age = list_simplify(single_age)) %>%
unnest_longer(single_age)
输出:
> aux
# A tibble: 355 × 5
age_group age_division total_estimate zipcode single_age
<chr> <dbl> <dbl> <dbl> <dbl>
1 0-4 2 10 51557 0
2 0-4 2 10 51557 0
3 0-4 2 10 51557 1
4 0-4 2 10 51557 1
5 0-4 2 10 51557 2
6 0-4 2 10 51557 2
7 0-4 2 10 51557 3
8 0-4 2 10 51557 3
9 0-4 2 10 51557 4
10 0-4 2 10 51557 4
# ℹ 345 more rows
# ℹ Use `print(n = ...)` to see more rows