R将多列转换为长格式

问题描述 投票:0回答:1

我有一个宽格式的人级数据框,我试图将其合并成一个长格式的摘要表。示例代码和示例如下:

set.seed(100)

#Original wide format person level data
dat_wide <- tibble(group = rep(x = c('a','b','c'), each = 5),
                   d1 = sample(x = c(1, 0, NA_integer_), size = 15, replace = TRUE),
                   d2 = sample(x = c(1, 0, NA_integer_), size = 15, replace = TRUE),
                   d3 = sample(x = c(1, 0, NA_integer_), size = 15, replace = TRUE)) %>% 
  mutate(d1_br = case_when(
    d1 == 1 ~ .8,
    d1 == 0 ~ .2,
    TRUE ~ NA_real_
  ),
  d2_br = case_when(
    d2 == 1 ~ .6,
    d2 == 0 ~ .4,
    TRUE ~ NA_real_
  ),
  d3_br = case_when(
    d3 == 1 ~ .95,
    d3 == 0 ~ .05,
    TRUE ~ NA_real_
  ))
dat_wide

# A tibble: 15 x 7
   group    d1    d2    d3 d1_br d2_br d3_br
   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 a         0    NA     0   0.2  NA    0.05
 2 a         1     0     0   0.8   0.4  0.05
 3 a         1    NA     1   0.8  NA    0.95
 4 a         1    NA    NA   0.8  NA   NA   
 5 a         0    NA     1   0.2  NA    0.95
 6 b        NA     0     1  NA     0.4  0.95
 7 b        NA    NA     1  NA    NA    0.95
 8 b        NA     0     1  NA     0.4  0.95
 9 b         1     0     0   0.8   0.4  0.05
10 b         1     1    NA   0.8   0.6 NA   
11 c         0    NA     1   0.2  NA    0.95
12 c         0    NA     0   0.2  NA    0.05
13 c        NA    NA     1  NA    NA    0.95
14 c         1    NA     0   0.8  NA    0.05
15 c         0     1     0   0.2   0.6  0.05

#Long format summary table
dat_long <- tibble(group = rep(c('a','b','b'), each = 9),
                   dv = rep(c('d1','d2','d3'), each = 3, times = 3),
                   response = rep(c(1, 0, NA_integer_), times = 9)) %>% 
  mutate(br = case_when(
    dv == 'd1' & response == 1 ~ .8,
    dv == 'd1' & response == 0 ~ .2,
    dv == 'd2' & response == 1 ~ .6,
    dv == 'd2' & response == 0 ~ .4,
    dv == 'd3' & response == 1 ~ .95,
    dv == 'd3' & response == 0 ~ .05,
    TRUE ~ NA_real_
  ))

# A tibble: 27 x 4
   group dv    response    br
   <chr> <chr>    <dbl> <dbl>
 1 a     d1           1  0.8 
 2 a     d1           0  0.2 
 3 a     d1          NA NA   
 4 a     d2           1  0.6 
 5 a     d2           0  0.4 
 6 a     d2          NA NA   
 7 a     d3           1  0.95
 8 a     d3           0  0.05
 9 a     d3          NA NA   
10 b     d1           1  0.8 
# ... with 17 more rows

我相当确定这可以使用tidyr::pivot_longer来完成,尽管我还不熟悉它,但还没有弄清它的全部功能。类似于以下内容的方法应该可以工作,但是有人可以帮助您填充ivot_longer语法吗?我是否需要names_sepnames_prefix参数?我还没缠着那些头。

#Possible solution
dat_long <- dat_wide %>% 
  # pivot_longer(...) %>% 
  distinct(group, dv, response, .keep_all = TRUE)
r dplyr tidyverse reshape tidyr
1个回答
0
投票
library(tidyverse)

names(dat_wide) <- sub("(\\w+)(\\d+)(\\w*)", "\\1\\3\\2", names(dat_wide))

dat_wide %>%
  pivot_longer(cols = -group, 
               names_to = c(".value", "dv"),
               names_pattern = "(\\w+)(\\d+)") %>%
  distinct(group, dv, d, .keep_all = TRUE) %>%
  arrange(group, dv)
© www.soinside.com 2019 - 2024. All rights reserved.