如何在嵌套在 3 级列表中的多个 tibbles 中创建新变量并将所有内容合并到 R 中的一个数据帧中?

问题描述 投票:0回答:1

R 数据操作初学者,我在处理多级嵌套列表时遇到困难。

问题:有没有办法将这个

dat0
3级列表转换为下面的全局
dat1
数据框?

  • 新的
    fulltext
    列连接每个小标题中的
    text
    变量。
  • 新的
    nbsum
    列添加了每个小标题中的
    nb
    变量。

注意:欢迎使用基于

purrr
dplyr
函数(
mutate
...)的方法,以便在特定上下文中更好地理解这些工具。也欢迎其他方法!

感谢您的帮助

初始数据:

dat0 <- list(pdf1 =
               list(page1 =
                      list(tibble1 = tibble(x = c(1,2,3,4), y = c(1,1,1,1), text = c("ha","r","r","y"), nb = c(1,2,3,4)),
                           tibble2 = tibble(x = c(1,2,3,4), y = c(2,2,2,2), text = c("p","ot","t","er"), nb = c(1,2,3,4))),
                    page2 = 
                      list(tibble1 = tibble(x = c(1,2,3), y = c(3,3,3), text = c("her","m","ione"), nb = c(1,2,3)),
                           tibble2 = tibble(x = c(1,2,3), y = c(4,4,4), text = c("gra","ng","er"), nb = c(1,2,3)))),
             pdf2 =
               list(page1 =
                      list(tibble1 = tibble(x = c(1,2), y = c(5,5), text = c("vol","de"), nb = c(1,2)),
                           tibble2 = tibble(x = c(1,2), y = c(6,6), text = c("m","ort"), nb = c(1,2))),
                    page2 =
                      list(tibble1 = tibble(x = c(1,2,3,4,5), y = c(7,7,7,7,7), text = c("a","l","b","u","s"), nb = c(1,2,3,4,5)),
                           tibble2 = tibble(x = c(1,2,3,4,5), y = c(8,8,8,8,8), text = c("du","m","ble","do","re"), nb = c(1,2,3,4,5))),
                    page3 = 
                      list(tibble1 = tibble(x = c(1,2,3,4), y = c(9,9,9,9), text = c("dr","a","g","o"), nb = c(1,2,3,4)),
                           tibble2 = tibble(x = c(1,2,3,4), y = c(10,10,10,10), text = c("ma","lf","o","y"), nb = c(1,2,3,4)))),
             pdf3 =
               list(page1 =
                      list(tibble1 = tibble(x = c(1,2,3,4,5), y = c(11,11,11,11,11), text = c("s","ev","e","ru","s"), nb = c(1,2,3,4,5)),
                           tibble2 = tibble(x = c(1,2,3,4,5), y = c(12,12,12,12,12), text = c("r","o","g","u","e"), nb = c(1,2,3,4,5))),
                    page2 =
                      list(tibble1 = tibble(x = c(1,2,3), y = c(13,13,13), text = c("r","o","n"), nb = c(1,2,3)),
                           tibble2 = tibble(x = c(1,2,3), y = c(14,14,14), text = c("we","as","ley"), nb = c(1,2,3))),
                    page3 =
                      list(tibble1 = tibble(x = c(1,2,3,4,5,6), y = c(15,15,15,15,15,15), text = c("be","l","la","t","ri","x"), nb = c(1,2,3,4,5,6)),
                           tibble2 = tibble(x = c(1,2,3,4,5,6), y = c(16,16,16,16,16,16), text = c("l","est","r","a","ng","e"), nb = c(1,2,3,4,5,6))),
                    page4 = 
                      list(tibble1 = tibble(x = c(1,2), y = c(17,17), text = c("sir","ius"), nb = c(1,2)),
                           tibble2 = tibble(x = c(1,2), y = c(18,18), text = c("bl","ack"), nb = c(1,2)))))

所需的输出(费力构建;下面的dput脚本):

dat1 <-
structure(list(pdf = c("pdf1", "pdf1", "pdf1", "pdf1", "pdf1", 
"pdf1", "pdf1", "pdf1", "pdf1", "pdf1", "pdf1", "pdf1", "pdf1", 
"pdf1", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", 
"pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", 
"pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf2", "pdf3", 
"pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", 
"pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", 
"pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", 
"pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3", "pdf3"), page = c("page1", 
"page1", "page1", "page1", "page1", "page1", "page1", "page1", 
"page2", "page2", "page2", "page2", "page2", "page2", "page1", 
"page1", "page1", "page1", "page2", "page2", "page2", "page2", 
"page2", "page2", "page2", "page2", "page2", "page2", "page3", 
"page3", "page3", "page3", "page3", "page3", "page3", "page3", 
"page1", "page1", "page1", "page1", "page1", "page1", "page1", 
"page1", "page1", "page1", "page2", "page2", "page2", "page2", 
"page2", "page2", "page3", "page3", "page3", "page3", "page3", 
"page3", "page3", "page3", "page3", "page3", "page3", "page3", 
"page4", "page4", "page4", "page4"), x = c(1, 2, 3, 4, 1, 2, 
3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 1, 2, 3, 4, 5, 1, 2, 3, 4, 
5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 
3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 1, 2), 
    y = c(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 
    6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 
    10, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 13, 13, 13, 14, 
    14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 
    17, 18, 18), text = c("ha", "r", "r", "y", "p", "ot", "t", 
    "er", "her", "m", "ione", "gra", "ng", "er", "vol", "de", 
    "m", "ort", "a", "l", "b", "u", "s", "du", "m", "ble", "do", 
    "re", "dr", "a", "g", "o", "ma", "lf", "o", "y", "s", "ev", 
    "e", "ru", "s", "r", "o", "g", "u", "e", "r", "o", "n", "we", 
    "as", "ley", "be", "l", "la", "t", "ri", "x", "l", "est", 
    "r", "a", "ng", "e", "sir", "ius", "bl", "ack"), nb = c(1, 
    2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 1, 2, 
    3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 
    4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 6, 
    1, 2, 3, 4, 5, 6, 1, 2, 1, 2), fulltext = c("harry", "harry", 
    "harry", "harry", "potter", "potter", "potter", "potter", 
    "hermione", "hermione", "hermione", "granger", "granger", 
    "granger", "volde", "volde", "mort", "mort", "albus", "albus", 
    "albus", "albus", "albus", "dumbledore", "dumbledore", "dumbledore", 
    "dumbledore", "dumbledore", "drago", "drago", "drago", "drago", 
    "malfoy", "malfoy", "malfoy", "malfoy", "severus", "severus", 
    "severus", "severus", "severus", "rogue", "rogue", "rogue", 
    "rogue", "rogue", "ron", "ron", "ron", "weasley", "weasley", 
    "weasley", "bellatrix", "bellatrix", "bellatrix", "bellatrix", 
    "bellatrix", "bellatrix", "lestrange", "lestrange", "lestrange", 
    "lestrange", "lestrange", "lestrange", "sirius", "sirius", 
    "black", "black"), nbsum = c(10, 10, 10, 10, 10, 10, 10, 
    10, 6, 6, 6, 6, 6, 6, 3, 3, 3, 3, 15, 15, 15, 15, 15, 15, 
    15, 15, 15, 15, 10, 10, 10, 10, 10, 10, 10, 10, 15, 15, 15, 
    15, 15, 15, 15, 15, 15, 15, 6, 6, 6, 6, 6, 6, 21, 21, 21, 
    21, 21, 21, 21, 21, 21, 21, 21, 21, 3, 3, 3, 3)), row.names = c(NA, 
-68L), class = "data.frame")
r dataframe purrr nested-lists mutate
1个回答
0
投票

此任务最灵活的功能是 IMO

collapse::unlist2d
:

collapse::unlist2d(dat0, idcols = c("pdf", "page", "tibble"))
© www.soinside.com 2019 - 2024. All rights reserved.