unnest_longer() 无法回收输入

问题描述 投票:0回答:1

我单独运行每一行,发现 unnest_longer() 是问题所在。我该如何解决这个问题?

df %>%
filter(n() > 2L, .by = sn2) %>%
  pivot_wider(names_from = sn3, values_from = leisure:hyear,
              values_fn = list) %>% 
  unnest_longer(leisure_1:hyear_2)



Error in `unnest_longer()`:
! In row 1, can't recycle input of size 14 to size 12.
Backtrace:
 1. ... %>% unnest_longer(leisure_1:hyear_2)
 2. tidyr::unnest_longer(., leisure_1:hyear_2)

数据已转向广泛形式。我注意到数据集中有 NULL。我怀疑这些 NULL 导致了问题,并尝试用 NA 替换它们,但无法使用 drop_na() 删除它们。

  df %>%
  filter(n() > 2L, .by = sn2) %>%
  pivot_wider(names_from = sn3, values_from = leisure:hyear,
              values_fn = list) %>% 
  mutate(across(everything(), ~ifelse(. == "NULL", NA, .))) %>%
  drop_na() %>% 
  unnest_longer(leisure_1:hyear_2)

数据:

library(tidyverse)
dput(df)
structure(list(sn2 = structure(c(171, 182, 230), label = "household number", format.stata = "%8.0g"), 
    leisure_1 = list(c(180, 370, 430, 470, 560, 320, 370, 470, 
    260, 310, 260, 150, 310, 390, 350, 110, 100), c(540, 270, 
    210, 430, 270, 320, 80, 60, 180, 140, 170, 350), c(170, 290, 
    480, 560)), leisure_2 = list(c(350, 410, 410, 500, 490, 180, 
    380, 290, 270, 350, 370, 440), c(550, 380, 440, 330, 60, 
    460), NULL), hmonth_1 = list(structure(c(8, 8, 8, 8, 1, 1, 
    1, 1, 4, 4, 9, 9, 6, 1, 1, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
    february = 2, march = 3, april = 4, may = 5, june = 6, july = 7, 
    august = 8, september = 9, october = 10, november = 11, december = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
        structure(c(8, 8, 1, 1, 9, 9, 2, 2, 9, 9, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double")), structure(c(1, 1, 6, 6), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double"))), hmonth_2 = list(structure(c(8, 8, 1, 1, 
    4, 4, 9, 9, 6, 6, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
    february = 2, march = 3, april = 4, may = 5, june = 6, july = 7, 
    august = 8, september = 9, october = 10, november = 11, december = 12
    ), class = c("haven_labelled", "vctrs_vctr", "double")), 
        structure(c(8, 8, 9, 9, 2, 2), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1, 
        february = 2, march = 3, april = 4, may = 5, june = 6, 
        july = 7, august = 8, september = 9, october = 10, november = 11, 
        december = 12), class = c("haven_labelled", "vctrs_vctr", 
        "double")), NULL), hyear_1 = list(structure(c(2001, 2001, 
    2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 
    2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2001, 2001, 2001, 2001, 2001, 
        2001, 2000, 2000, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2000, 2000), label = "year of household interview", format.stata = "%8.0g")), 
    hyear_2 = list(structure(c(2001, 2001, 2001, 2001, 2001, 
    2001, 2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        structure(c(2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"), 
        NULL)), row.names = c(NA, -3L), class = c("tbl_df", "tbl", 
"data.frame"))

之前的问题复制所需的输出:

sn2 休闲.1 h月.1 hyear.1 休闲.2 h月.2 hyear.2
227 230 6 2000 540 6 2000
227 130 6 2000 170 6 2000
250 370 6 2000 380 6 2000
250 380 6 2000 190 6 2000
294 120 6 2000 210 6 2000
294 200 6 2000 310 6 2000
307 130 7 2000 220 7 2000
307 480 7 2000 270 7 2000
r dplyr tidyverse
1个回答
0
投票

您的嵌套数据存在内部未对齐,例如,

leisure_1
有17个值,而
leisure_2
有12个值,
sn2
171。我假设第一个leisure_1应该与第一个leisure_2等一起列出,你当 Leisure_1 处于其第 13-17 个值时,将有 NA。

我敢打赌这不是您想要的,在这种情况下,请在您的问题中进一步解释。

library(tidyverse) # worked using tidyverse 1.3.2 and 2.0.0
df |>
  pivot_longer(-sn2, names_to = c("type", "obs"), names_sep = "_") |>
  unnest_longer(value) |> 
  mutate(num = row_number(), .by = c(sn2, type, obs)) |>
  pivot_wider(names_from = c(type, obs), values_from = value, names_vary = "slowest")

结果

# A tibble: 33 × 8
     sn2   num leisure_1 leisure_2    hmonth_1      hmonth_2   hyear_1   hyear_2
   <dbl> <int> <dbl+lbl> <dbl+lbl>   <dbl+lbl>     <dbl+lbl> <dbl+lbl> <dbl+lbl>
 1   171     1       180       350 8 [august]  8 [august]         2001      2001
 2   171     2       370       410 8 [august]  8 [august]         2001      2001
 3   171     3       430       410 8 [august]  1 [january]        2001      2001
 4   171     4       470       500 8 [august]  1 [january]        2001      2001
 5   171     5       560       490 1 [january] 4 [april]          2001      2001
 6   171     6       320       180 1 [january] 4 [april]          2001      2001
 7   171     7       370       380 1 [january] 9 [september]      2001      2001
 8   171     8       470       290 1 [january] 9 [september]      2001      2001
 9   171     9       260       270 4 [april]   6 [june]           2001      2001
10   171    10       310       350 4 [april]   6 [june]           2001      2001
# ℹ 23 more rows
© www.soinside.com 2019 - 2024. All rights reserved.