我单独运行每一行,发现 unnest_longer() 是问题所在。我该如何解决这个问题?
df %>%
filter(n() > 2L, .by = sn2) %>%
pivot_wider(names_from = sn3, values_from = leisure:hyear,
values_fn = list) %>%
unnest_longer(leisure_1:hyear_2)
Error in `unnest_longer()`:
! In row 1, can't recycle input of size 14 to size 12.
Backtrace:
1. ... %>% unnest_longer(leisure_1:hyear_2)
2. tidyr::unnest_longer(., leisure_1:hyear_2)
数据已转向广泛形式。我注意到数据集中有 NULL。我怀疑这些 NULL 导致了问题,并尝试用 NA 替换它们,但无法使用 drop_na() 删除它们。
df %>%
filter(n() > 2L, .by = sn2) %>%
pivot_wider(names_from = sn3, values_from = leisure:hyear,
values_fn = list) %>%
mutate(across(everything(), ~ifelse(. == "NULL", NA, .))) %>%
drop_na() %>%
unnest_longer(leisure_1:hyear_2)
数据:
library(tidyverse)
dput(df)
structure(list(sn2 = structure(c(171, 182, 230), label = "household number", format.stata = "%8.0g"),
leisure_1 = list(c(180, 370, 430, 470, 560, 320, 370, 470,
260, 310, 260, 150, 310, 390, 350, 110, 100), c(540, 270,
210, 430, 270, 320, 80, 60, 180, 140, 170, 350), c(170, 290,
480, 560)), leisure_2 = list(c(350, 410, 410, 500, 490, 180,
380, 290, 270, 350, 370, 440), c(550, 380, 440, 330, 60,
460), NULL), hmonth_1 = list(structure(c(8, 8, 8, 8, 1, 1,
1, 1, 4, 4, 9, 9, 6, 1, 1, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6, july = 7,
august = 8, september = 9, october = 10, november = 11, december = 12
), class = c("haven_labelled", "vctrs_vctr", "double")),
structure(c(8, 8, 1, 1, 9, 9, 2, 2, 9, 9, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6,
july = 7, august = 8, september = 9, october = 10, november = 11,
december = 12), class = c("haven_labelled", "vctrs_vctr",
"double")), structure(c(1, 1, 6, 6), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6,
july = 7, august = 8, september = 9, october = 10, november = 11,
december = 12), class = c("haven_labelled", "vctrs_vctr",
"double"))), hmonth_2 = list(structure(c(8, 8, 1, 1,
4, 4, 9, 9, 6, 6, 5, 5), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6, july = 7,
august = 8, september = 9, october = 10, november = 11, december = 12
), class = c("haven_labelled", "vctrs_vctr", "double")),
structure(c(8, 8, 9, 9, 2, 2), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6,
july = 7, august = 8, september = 9, october = 10, november = 11,
december = 12), class = c("haven_labelled", "vctrs_vctr",
"double")), NULL), hyear_1 = list(structure(c(2001, 2001,
2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001, 2001,
2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"),
structure(c(2001, 2001, 2001, 2001, 2001, 2001, 2001,
2001, 2000, 2000, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"),
structure(c(2001, 2001, 2000, 2000), label = "year of household interview", format.stata = "%8.0g")),
hyear_2 = list(structure(c(2001, 2001, 2001, 2001, 2001,
2001, 2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"),
structure(c(2001, 2001, 2001, 2001, 2001, 2001), label = "year of household interview", format.stata = "%8.0g"),
NULL)), row.names = c(NA, -3L), class = c("tbl_df", "tbl",
"data.frame"))
从之前的问题复制所需的输出:
sn2 | 休闲.1 | h月.1 | hyear.1 | 休闲.2 | h月.2 | hyear.2 |
---|---|---|---|---|---|---|
227 | 230 | 6 | 2000 | 540 | 6 | 2000 |
227 | 130 | 6 | 2000 | 170 | 6 | 2000 |
250 | 370 | 6 | 2000 | 380 | 6 | 2000 |
250 | 380 | 6 | 2000 | 190 | 6 | 2000 |
294 | 120 | 6 | 2000 | 210 | 6 | 2000 |
294 | 200 | 6 | 2000 | 310 | 6 | 2000 |
307 | 130 | 7 | 2000 | 220 | 7 | 2000 |
307 | 480 | 7 | 2000 | 270 | 7 | 2000 |
您的嵌套数据存在内部未对齐,例如,
leisure_1
有17个值,而leisure_2
有12个值,sn2
171。我假设第一个leisure_1应该与第一个leisure_2等一起列出,你当 Leisure_1 处于其第 13-17 个值时,将有 NA。
我敢打赌这不是您想要的,在这种情况下,请在您的问题中进一步解释。
library(tidyverse) # worked using tidyverse 1.3.2 and 2.0.0
df |>
pivot_longer(-sn2, names_to = c("type", "obs"), names_sep = "_") |>
unnest_longer(value) |>
mutate(num = row_number(), .by = c(sn2, type, obs)) |>
pivot_wider(names_from = c(type, obs), values_from = value, names_vary = "slowest")
结果
# A tibble: 33 × 8
sn2 num leisure_1 leisure_2 hmonth_1 hmonth_2 hyear_1 hyear_2
<dbl> <int> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl> <dbl+lbl>
1 171 1 180 350 8 [august] 8 [august] 2001 2001
2 171 2 370 410 8 [august] 8 [august] 2001 2001
3 171 3 430 410 8 [august] 1 [january] 2001 2001
4 171 4 470 500 8 [august] 1 [january] 2001 2001
5 171 5 560 490 1 [january] 4 [april] 2001 2001
6 171 6 320 180 1 [january] 4 [april] 2001 2001
7 171 7 370 380 1 [january] 9 [september] 2001 2001
8 171 8 470 290 1 [january] 9 [september] 2001 2001
9 171 9 260 270 4 [april] 6 [june] 2001 2001
10 171 10 310 350 4 [april] 6 [june] 2001 2001
# ℹ 23 more rows