这有点复杂;我有一个数据框,其中有进行门诊就诊的患者,每次就诊都有药物,每列一种药物。在某些情况下,患者在一天内多次就诊,并且他们使用不同的药物 - 可能会记录更多、更少或相同数量的药物 - 每次就诊都会记录。发生这种情况时,我想将这些行合并为仅包含唯一药物的单行。下面,第一个数据帧是与我所拥有的类似的示例,第二个数据帧是所需的输出。
任何解决方案/指导将不胜感激。
patient_id <- c(1, 1, 2, 2, 3, 3, 4, 4)
dates <- c("2023-01-01", "2023-01-01", "2022-01-01", "2022-01-01", "2019-01-01", "2019-01-02", "2019-01-02", "2019-01-02")
drug_1 <- c("paracetamol", NA, "aspirin", NA, NA,NA, NA, NA)
drug_2 <- c("ibuprofen", "codine", NA, "paracetamol", NA, "asprin", NA, NA)
drug_3 <- c("aspirin", "cough syrup", NA, NA, "ibuprofen", NA, NA, NA)
drug_4 <- c(NA, "cocodamol", "ibuprofen", NA, NA, NA, NA, NA)
drug_5 <- c(NA, "aspirin", NA, NA, NA, NA, NA, NA)
patients_data <- data.frame(patient_id,dates, drug_1, drug_2, drug_3, drug_4, drug_5);patients_data
patient_id dates drug_1 drug_2 drug_3 drug_4 drug_5
1 1 2023-01-01 paracetamol ibuprofen aspirin <NA> <NA>
2 1 2023-01-01 <NA> codine cough syrup cocodamol aspirin
3 2 2022-01-01 aspirin <NA> <NA> ibuprofen <NA>
4 2 2022-01-01 <NA> paracetamol <NA> <NA> <NA>
5 3 2019-01-01 <NA> <NA> ibuprofen <NA> <NA>
6 3 2019-01-02 <NA> asprin <NA> <NA> <NA>
7 4 2019-01-02 <NA> <NA> <NA> <NA> <NA>
8 4 2019-01-02 <NA> <NA> <NA> <NA> <NA>
patient_id <- c(1, 2,3,3,4)
dates <- c("2023-01-01", "2022-01-01", "2019-01-01", "2019-01-02", "2019-01-02")
drug_1 <- c("paracetamol", "aspirin", NA, NA, NA)
drug_2 <- c("ibuprofen", "ibuprofen", NA, "aspirin", NA)
drug_3 <- c("aspirin", "paracetamol", "ibuprofen",NA, NA)
drug_4 <- c("codine", NA, NA, NA, NA)
drug_5 <- c("cough syrup", NA,NA, NA, NA)
drug_6 <- c("cocodamol", NA,NA, NA, NA)
patients_data <- data.frame(patient_id,dates, drug_1, drug_2, drug_3, drug_4, drug_5, drug_6);patients_data
patient_id dates drug_1 drug_2 drug_3 drug_4 drug_5 drug_6
1 1 2023-01-01 paracetamol ibuprofen aspirin codine cough syrup cocodamol
2 2 2022-01-01 aspirin ibuprofen paracetamol <NA> <NA> <NA>
3 3 2019-01-01 <NA> <NA> ibuprofen <NA> <NA> <NA>
4 3 2019-01-02 <NA> aspirin <NA> <NA> <NA> <NA>
5 4 2019-01-02 <NA> <NA> <NA> <NA> <NA> <NA>
你可以尝试这样的事情。
library(tidyverse)
patients_data %>%
pivot_longer(
starts_with('drug')
) %>%
filter(
!is.na(value)
) %>%
mutate(
rn = row_number(),
.by = c(patient_id, dates)
) %>%
mutate(
name = str_glue('drug_{rn}')
) %>%
select(-rn) %>%
pivot_wider(
names_from = name,
values_from = value
)
#> # A tibble: 4 x 9
#> patient_id dates drug_1 drug_2 drug_3 drug_4 drug_5 drug_6 drug_7
#> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 1 2023-01-01 paracetamol ibuprofen aspir~ codine cough~ cocod~ aspir~
#> 2 2 2022-01-01 aspirin ibuprofen parac~ <NA> <NA> <NA> <NA>
#> 3 3 2019-01-01 ibuprofen <NA> <NA> <NA> <NA> <NA> <NA>
#> 4 3 2019-01-02 asprin <NA> <NA> <NA> <NA> <NA> <NA>
创建于 2023-10-23,使用 reprex v2.0.2