按组跨多列创建虚拟变量

问题描述 投票:0回答:4

我的家庭花名册中有数据,如下面的数据框所示

hhroster <- data.frame(HHID = c(1, 1,   1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  5,  5,  6),                     
                    INDID = c(1,    2,  3,  1,  2,  1,  2,  3,  4,  1,  2,  3,  1,  2,  1),
                    response_1 = c("yes",   "no",   "yes",  "yes",  "no",   "no",   "no",   "no",   "no",   "yes",  "yes",  "no",   "yes",  "yes",  "no"),
                    response_2 = c("no",    "no",   "yes",  "no",   "no",   "no",   "yes",  "no",   "no",   "no",   "no",   "no",   "yes",  "yes",  "no"))

并希望在家庭层面创建一个虚拟变量,其值为 1,表示个人至少有一个“是”的回答。所需的输出是

hh <- data.frame(HHID = c(1,    2,  3,  4,  5,  6),
                       HH_response_1 = c(1, 1,  0,  1,  1,  0),
                       HH_response_2 = c(1, 0,  1,  0,  1,  0))
r dataframe dplyr
4个回答
2
投票

这是一个解决方案。
使用

across
获取所有感兴趣的列,并通过检查逻辑值
.x == "yes"
的总和是否大于零来检查是否有任何 yes 值。
您可以保持结果符合逻辑,如果有必要,R 会将
F/T
强制为
0/1

hhroster <- data.frame(HHID = c(1, 1,   1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  5,  5,  6),                     
                       INDID = c(1,    2,  3,  1,  2,  1,  2,  3,  4,  1,  2,  3,  1,  2,  1),
                       response_1 = c("yes",   "no",   "yes",  "yes",  "no",   "no",   "no",   "no",   "no",   "yes",  "yes",  "no",   "yes",  "yes",  "no"),
                       response_2 = c("no",    "no",   "yes",  "no",   "no",   "no",   "yes",  "no",   "no",   "no",   "no",   "no",   "yes",  "yes",  "no"))

suppressPackageStartupMessages(
  library(dplyr)
)

hhroster %>%
  summarise(
    across(starts_with("response"), ~ sum(.x == "yes") > 0L),
    .by = HHID
  )
#>   HHID response_1 response_2
#> 1    1       TRUE       TRUE
#> 2    2       TRUE      FALSE
#> 3    3      FALSE       TRUE
#> 4    4       TRUE      FALSE
#> 5    5       TRUE       TRUE
#> 6    6      FALSE      FALSE

创建于 2024-02-10,使用 reprex v2.0.2


1
投票

您可以使用

aggregate

> aggregate(. ~ HHID, hhroster[-2], \(x) +any(x == "yes"))
  HHID response_1 response_2
1    1          1          1
2    2          1          0
3    3          0          1
4    4          1          0
5    5          1          1
6    6          0          0

1
投票

使用

dplyr
包,你可以做这样的事情

library(dplyr)

hh <- hhroster |>
  mutate(
    response_1_logic = ifelse(response_1 == "yes", TRUE, FALSE),
    response_2_logic = ifelse(response_2 == "yes", TRUE, FALSE)
  ) |>
  summarise(
    HH_response_1 = as.numeric(any(response_1_logic)),
    HH_response_2 = as.numeric(any(response_2_logic)),
    .by = HHID
  )

输出

> hh
  HHID HH_response_1 HH_response_2
1    1             1             1
2    2             1             0
3    3             0             1
4    4             1             0
5    5             1             1
6    6             0             0

0
投票
library(dplyr)

hhroster %>% 
  reframe(across(response_1:response_2, ~ +any(.x == "yes")), .by = HHID)

#>   HHID response_1 response_2
#> 1    1          1          1
#> 2    2          1          0
#> 3    3          0          1
#> 4    4          1          0
#> 5    5          1          1
#> 6    6          0          0

创建于 2024-02-10,使用 reprex v2.0.2

© www.soinside.com 2019 - 2024. All rights reserved.