通过从现有列中提取字符串并将行值转换为新列，将数据从宽格式重新调整为长格式

Question

我有这个数据：

    long1<-structure(list(id = "R_88j7lG37gLfxk22", t1_choice = "2", t2_choice = "1", 
    t3_choice = "1", t4_choice = "2", t1_p1_env = "high_env", 
    t1_p1_eth = "low_eth", t1_p1_pri = "$125", t1_p2_env = "mid_env", 
    t1_p2_eth = "high_eth", t1_p2_pri = "$25", t1_p3_env = "low_env", 
    t1_p3_eth = "mid_eth", t1_p3_pri = "$75", t2_p1_env = "high_env", 
    t2_p1_eth = "low_eth", t2_p1_pri = "$75", t2_p2_env = "mid_env", 
    t2_p2_eth = "mid_eth", t2_p2_pri = "$125", t2_p3_env = "mid_env", 
    t2_p3_eth = "mid_eth", t2_p3_pri = "$75", t3_p1_env = "high_env", 
    t3_p1_eth = "high_eth", t3_p1_pri = "$125", t3_p2_env = "mid_env", 
    t3_p2_eth = "low_eth", t3_p2_pri = "$25", t3_p3_env = "low_env", 
    t3_p3_eth = "high_eth", t3_p3_pri = "$25", t4_p1_env = "low_env", 
    t4_p1_eth = "low_eth", t4_p1_pri = "$75", t4_p2_env = "high_env", 
    t4_p2_eth = "mid_eth", t4_p2_pri = "$125", t4_p3_env = "low_env", 
    t4_p3_eth = "high_eth", t4_p3_pri = "$25"), row.names = c(NA, 
-1L), class = c("tbl_df", "tbl", "data.frame"))

我想将其转换为更宽的格式，如下所示：

逻辑是我将

和

加宽为基于

t1_p1 t1_p2

的2个新列等等..

然后选择列有二进制数据

和

。选择列为从

t1_choice

到

t4_choice

的 4 列，可用值为

1,2,3

，因此每个

id

将有 12 行。例如，如果

t1_choice =2

3 行中的第二行应该是 1，其他 2 行应该是 0。

然后我得到 9 列

low_env

mid_env

high_env

,

low_eth

mid_eth

high_eth

和 p

ri25  pri75 pri125

如果值存在则为 1，如果值不存在则为 0。

图像中的颜色可以帮助更好地理解新数据是如何创建的。这仅适用于我的第一行数据。我已经尝试了下面的一些方法，但无法使整个过程同时工作，从那以后我通过合并它们来弄乱我的结果。与我的表格相比，结果也不正确

#my trial

# reshape longer
df_long <- long1 |> 
  pivot_longer(
    !id,
    names_to = "task",
    names_pattern = "t(\\d+).*",
    values_to = "choice"
  )

# expand dummies
expand_dummy <- function(x) {
  out <- rep(0, times = 3)
  out[as.numeric(x)] <- 1
  out
}

df_long<-df_long |> 
  group_by(id, task) |> 
  reframe(choice = expand_dummy(choice))





names(long1)[-1] <- 
  strsplit(names(long1)[-1], '_') |>
  sapply(\(x) paste(paste(c(rev(x[-1])), collapse='_'), x[1], sep='.'))
res <- reshape(as.data.frame(long1), varying=-1, direction='l') |>
  reshape(direction='l', varying=-(1:3), new.row.names=1:1e9, sep='_') |>
  type.convert(as.is=FALSE)
all<-cbind(res[1:3], model.matrix(~ 0 + env + eth + pri, res, 
                                  contrasts.arg = list(eth=contrasts(res$eth, contrasts=FALSE),
                                                       pri=contrasts(res$pri, contrasts=FALSE))))



all<-all[order(with(all, factor(id, unique(id)))),]

# Reordering the rows in the second dataframe based on the order of 'id' column in the first dataframe
all$id <- factor(all$id, levels = unique(df_long$id))
all2 <- all[order(all$id), ]


all2<-all2[,-c(1,3)]
all3<-cbind(df_long,all2)

Answer 1

这是一项极其复杂的重塑任务：

library(tidyverse)
library(fastDummies)

long1 %>%
  select(!contains("choice")) %>%
  pivot_longer(-1, names_sep = "_", names_to = c("t", "p", ".value")) %>%
  mutate(across(t:p, ~as.numeric(substr(.x, 2, 4)))) %>%
  bind_cols(long1 %>% select(id, contains("choice")) %>%
    pivot_longer(-1, names_sep = "_", names_to = c("t", "choice")) %>%
    reframe(value = value, choice = c(0, 0, 0), .by = c("id", "t")) %>%
    mutate(choice = as.numeric(row_number() == value), .by = c("id", "t")) %>%
    select(choice)) %>%
  select(id, env, eth, pri, choice, t, p) %>%
  mutate(env = factor(env, c("low_env", "mid_env", "high_env")),
         eth = factor(eth, c("low_eth", "mid_eth", "high_eth"))) %>%
  dummy_cols(c("env", "eth", "pri"), remove_selected_columns = TRUE) %>%
  rename_with(~substr(.x, 5, 20), env_low_env:eth_high_eth) %>%
  select(1, 5:13, 2:4) %>%
  as.data.frame()
#>                   id low_env mid_env high_env low_eth mid_eth high_eth pri_$25 pri_$75 pri_$125 choice t p
#> 1  R_88j7lG37gLfxk22       0       0        1       1       0        0       0       0        1      0 1 1
#> 2  R_88j7lG37gLfxk22       0       1        0       0       0        1       1       0        0      1 1 2
#> 3  R_88j7lG37gLfxk22       1       0        0       0       1        0       0       1        0      0 1 3
#> 4  R_88j7lG37gLfxk22       0       0        1       1       0        0       0       1        0      1 2 1
#> 5  R_88j7lG37gLfxk22       0       1        0       0       1        0       0       0        1      0 2 2
#> 6  R_88j7lG37gLfxk22       0       1        0       0       1        0       0       1        0      0 2 3
#> 7  R_88j7lG37gLfxk22       0       0        1       0       0        1       0       0        1      1 3 1
#> 8  R_88j7lG37gLfxk22       0       1        0       1       0        0       1       0        0      0 3 2
#> 9  R_88j7lG37gLfxk22       1       0        0       0       0        1       1       0        0      0 3 3
#> 10 R_88j7lG37gLfxk22       1       0        0       1       0        0       0       1        0      0 4 1
#> 11 R_88j7lG37gLfxk22       0       0        1       0       1        0       0       0        1      1 4 2
#> 12 R_88j7lG37gLfxk22       1       0        0       0       0        1       1       0        0      0 4 3

通过从现有列中提取字符串并将行值转换为新列，将数据从宽格式重新调整为长格式

问题描述投票：0回答：1

1个回答

最新问题

通过从现有列中提取字符串并将行值转换为新列，将数据从宽格式重新调整为长格式

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1