我正在尝试将数据从长格式重塑为宽格式。我想保留与 sn3=1 和 sn3=2 匹配的所有行,而不是仅采用第一个值。我怎样才能实现这个目标?
所需输出:
sn2 | 休闲.1 | h月.1 | hyear.1 | 休闲.2 | h月.2 | hyear.2 |
---|---|---|---|---|---|---|
227 | 230 | 6 | 2000 | 540 | 6 | 2000 |
227 | 130 | 6 | 2000 | 170 | 6 | 2000 |
250 | 370 | 6 | 2000 | 380 | 6 | 2000 |
250 | 380 | 6 | 2000 | 190 | 6 | 2000 |
294 | 120 | 6 | 2000 | 210 | 6 | 2000 |
294 | 200 | 6 | 2000 | 310 | 6 | 2000 |
307 | 130 | 7 | 2000 | 220 | 7 | 2000 |
307 | 480 | 7 | 2000 | 270 | 7 | 2000 |
> dput(df)
structure(list(sn2 = structure(c(227, 227, 227, 227, 249, 249,
250, 250, 250, 250, 294, 294, 294, 294, 307, 307, 307, 307), label = "household number", format.stata = "%8.0g"),
sn3 = structure(c(1, 1, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2,
2, 1, 1, 2, 2), label = "person number", format.stata = "%8.0g"),
leisure = c(230, 130, 540, 170, 430, 480, 370, 380, 380,
190, 120, 200, 210, 310, 130, 480, 220, 270), hmonth = structure(c(6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7), label = "month of household interview", format.stata = "%8.0g", labels = c(january = 1,
february = 2, march = 3, april = 4, may = 5, june = 6, july = 7,
august = 8, september = 9, october = 10, november = 11, december = 12
), class = c("haven_labelled", "vctrs_vctr", "double")),
hyear = structure(c(2000, 2000, 2000, 2000, 2000, 2000, 2000,
2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,
2000), label = "year of household interview", format.stata = "%8.0g")), row.names = c(NA,
-18L), class = c("tbl_df", "tbl", "data.frame"), na.action = structure(c(`3492` = 3492L,
`3493` = 3493L, `3494` = 3494L, `3495` = 3495L, `3496` = 3496L,
`3497` = 3497L, `3498` = 3498L, `3499` = 3499L, `3500` = 3500L,
`3501` = 3501L, `3508` = 3508L, `3509` = 3509L, `3510` = 3510L,
`3511` = 3511L, `3512` = 3512L, `3513` = 3513L, `3518` = 3518L,
`3519` = 3519L, `3520` = 3520L, `3521` = 3521L, `3522` = 3522L,
`3523` = 3523L, `3524` = 3524L, `3525` = 3525L), class = "omit"))
您可以使用
pivot_wider
中的 tidyr
。此外,您还必须确保每一行都有一个唯一的 ID,因为它们不是像这样唯一标识的:
library(dplyr)
library(tidyr)
df %>%
group_by(sn3) %>%
mutate(id = row_number()) %>%
pivot_wider(names_from = sn3,
values_from = leisure:hyear,
names_glue = "{.value}.{sn3}") %>%
select(-id)
#> # A tibble: 16 × 7
#> sn2 leisure.1 leisure.2 hmonth.1 hmonth.2 hyear.1 hyear.2
#> <dbl> <dbl> <dbl> <hvn_lbll> <hvn_lbll> <dbl> <dbl>
#> 1 227 230 540 6 6 2000 2000
#> 2 227 130 170 6 6 2000 2000
#> 3 249 430 NA 6 NA 2000 NA
#> 4 249 480 NA 6 NA 2000 NA
#> 5 250 370 NA 6 NA 2000 NA
#> 6 250 380 NA 6 NA 2000 NA
#> 7 250 NA 380 NA 6 NA 2000
#> 8 250 NA 190 NA 6 NA 2000
#> 9 294 120 NA 6 NA 2000 NA
#> 10 294 200 NA 6 NA 2000 NA
#> 11 294 NA 210 NA 6 NA 2000
#> 12 294 NA 310 NA 6 NA 2000
#> 13 307 130 NA 7 NA 2000 NA
#> 14 307 480 NA 7 NA 2000 NA
#> 15 307 NA 220 NA 7 NA 2000
#> 16 307 NA 270 NA 7 NA 2000
创建于 2024-03-18,使用 reprex v2.0.2