R 有没有办法选择列的第一个实例并垂直排列列表?

问题描述 投票:0回答:1

我现在的数据结构是这样的。

 age_group age_division    total_estimate zipcode  single_age
   <chr>            <dbl>       <dbl>      <dbl>      <list>    
1   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
2   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
3   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
4   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
5   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
6   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
7   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
8   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
9   0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
10  0-4              2         10        51557   list(C(0, 0, 1, 1, 2, 2, 3, 3, 4, 4))
11  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
12  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
13  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
14  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
15  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
16  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
17  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
18  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
19  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
20  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
21  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
22  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
23  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
24  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
25  0-4              3         15        51558   list(C(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4))
26  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))
27  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))
28  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))
29  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))
30  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))
31  85-90            1         8         51559   list(C(0, 1, 2, 3, 4))

对于每个第一个唯一的邮政编码和年龄组,我想垂直扩展相应的第一个单一年龄列表。例如,如果第一个唯一邮政编码为 1,age_group 为 0-4,age_division 为 2。那么列表应采用垂直格式,其中有 0, 0, 1, 1, 2, 2, 3, 3, 4 , 4. 这是我的代码,它为我提供了 single_age 作为列表。

# Function to expand age groups
expand_age <- function(age_group, age_division) {
  age <- rep(seq(as.numeric(strsplit(age_group, "-")[[1]][1]), 
                 as.numeric(strsplit(age_group, "-")[[1]][2])), 
             each = age_division)
  return(list(age))
}

# Apply the function to each row of the dataframe using mutate
df <- df_1 %>%
  group_by(zipcode) %>%
  mutate(single_age = list(expand_age(first(age_group), first(age_division))))

这就是预期的输出应该是什么样的。

age_group age_division    total_estimate zipcode  single_age
   <chr>            <dbl>       <dbl>      <dbl>      <dbl>    
1   0-4              2         10        51557          0
2   0-4              2         10        51557          0
3   0-4              2         10        51557          1
4   0-4              2         10        51557          1
5   0-4              2         10        51557          2
6   0-4              2         10        51557          2
7   0-4              2         10        51557          3
8   0-4              2         10        51557          3
9   0-4              2         10        51557          4
10  0-4              2         10        51557          4
11  0-4              3         15        51558          0
12  0-4              3         15        51558          0
13  0-4              3         15        51558          0
14  0-4              3         15        51558          1
15  0-4              3         15        51558          1
16  0-4              3         15        51558          1
17  0-4              3         15        51558          2
18  0-4              3         15        51558          2
19  0-4              3         15        51558          2
20  0-4              3         15        51558          3
21  0-4              3         15        51558          3
22  0-4              3         15        51558          3
23  0-4              3         15        51558          4
24  0-4              3         15        51558          4
25  0-4              3         15        51558          4
26  85-90            1         8         51559         85
27  85-90            1         8         51559         86
28  85-90            1         8         51559         87
29  85-90            1         8         51559         88
30  85-90            1         8         51559         89
31  85-90            1         8         51559         90
r data-manipulation
1个回答
0
投票

正如 @Onyambu 所要求的,与

dput
共享您的数据总是更好。

使用给定的格式,

unnest_longer(df_1, single_age)
将不起作用,因为:

  1. single_age
    内部的列表只有一个对象——一个整数向量;
  2. C()
    (大写“C”)与
    c()
    不同。

你可以试试这个:

library(tidyverse)

# Data with c() instead of C()
aux <- tribble(
~age_group, ~age_division,    ~total_estimate, ~zipcode,  ~single_age,
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           2,         10,        51557,   list(c(0, 0, 1, 1, 2, 2, 3, 3, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"0-4",           3,         15,        51558,   list(c(0, 0 ,0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4)),
"85-90",            1,         8  ,       51559,   list(c(0, 1, 2, 3, 4))
)

解除嵌套之前,先简化一下:

aux <- aux %>% 
  mutate(single_age = list_simplify(single_age)) %>% 
  unnest_longer(single_age)

输出:

> aux
# A tibble: 355 × 5
   age_group age_division total_estimate zipcode single_age
   <chr>            <dbl>          <dbl>   <dbl>      <dbl>
 1 0-4                  2             10   51557          0
 2 0-4                  2             10   51557          0
 3 0-4                  2             10   51557          1
 4 0-4                  2             10   51557          1
 5 0-4                  2             10   51557          2
 6 0-4                  2             10   51557          2
 7 0-4                  2             10   51557          3
 8 0-4                  2             10   51557          3
 9 0-4                  2             10   51557          4
10 0-4                  2             10   51557          4
# ℹ 345 more rows
# ℹ Use `print(n = ...)` to see more rows
© www.soinside.com 2019 - 2024. All rights reserved.