我有这个数据:
long1<-structure(list(id = "R_88j7lG37gLfxk22", t1_choice = "2", t2_choice = "1",
t3_choice = "1", t4_choice = "2", t1_p1_env = "high_env",
t1_p1_eth = "low_eth", t1_p1_pri = "$125", t1_p2_env = "mid_env",
t1_p2_eth = "high_eth", t1_p2_pri = "$25", t1_p3_env = "low_env",
t1_p3_eth = "mid_eth", t1_p3_pri = "$75", t2_p1_env = "high_env",
t2_p1_eth = "low_eth", t2_p1_pri = "$75", t2_p2_env = "mid_env",
t2_p2_eth = "mid_eth", t2_p2_pri = "$125", t2_p3_env = "mid_env",
t2_p3_eth = "mid_eth", t2_p3_pri = "$75", t3_p1_env = "high_env",
t3_p1_eth = "high_eth", t3_p1_pri = "$125", t3_p2_env = "mid_env",
t3_p2_eth = "low_eth", t3_p2_pri = "$25", t3_p3_env = "low_env",
t3_p3_eth = "high_eth", t3_p3_pri = "$25", t4_p1_env = "low_env",
t4_p1_eth = "low_eth", t4_p1_pri = "$75", t4_p2_env = "high_env",
t4_p2_eth = "mid_eth", t4_p2_pri = "$125", t4_p3_env = "low_env",
t4_p3_eth = "high_eth", t4_p3_pri = "$25"), row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
我想将其转换为更宽的格式,如下所示:
逻辑是我将
t
和p
加宽为基于t1_p1 t1_p2
的2个新列等等..
然后选择列有二进制数据
0
和1
。选择列为从 t1_choice
到 t4_choice
的 4 列,可用值为 1,2,3
,因此每个 id
将有 12 行。例如,如果 t1_choice =2
3 行中的第二行应该是 1,其他 2 行应该是 0。
然后我得到 9 列
low_env
mid_env
high_env
,low_eth
mid_eth
high_eth
和 pri25 pri75 pri125
如果值存在则为 1,如果值不存在则为 0。
图像中的颜色可以帮助更好地理解新数据是如何创建的。这仅适用于我的第一行数据。我已经尝试了下面的一些方法,但无法使整个过程同时工作,从那以后我通过合并它们来弄乱我的结果。与我的表格相比,结果也不正确
#my trial
# reshape longer
df_long <- long1 |>
pivot_longer(
!id,
names_to = "task",
names_pattern = "t(\\d+).*",
values_to = "choice"
)
# expand dummies
expand_dummy <- function(x) {
out <- rep(0, times = 3)
out[as.numeric(x)] <- 1
out
}
df_long<-df_long |>
group_by(id, task) |>
reframe(choice = expand_dummy(choice))
names(long1)[-1] <-
strsplit(names(long1)[-1], '_') |>
sapply(\(x) paste(paste(c(rev(x[-1])), collapse='_'), x[1], sep='.'))
res <- reshape(as.data.frame(long1), varying=-1, direction='l') |>
reshape(direction='l', varying=-(1:3), new.row.names=1:1e9, sep='_') |>
type.convert(as.is=FALSE)
all<-cbind(res[1:3], model.matrix(~ 0 + env + eth + pri, res,
contrasts.arg = list(eth=contrasts(res$eth, contrasts=FALSE),
pri=contrasts(res$pri, contrasts=FALSE))))
all<-all[order(with(all, factor(id, unique(id)))),]
# Reordering the rows in the second dataframe based on the order of 'id' column in the first dataframe
all$id <- factor(all$id, levels = unique(df_long$id))
all2 <- all[order(all$id), ]
all2<-all2[,-c(1,3)]
all3<-cbind(df_long,all2)
这是一项极其复杂的重塑任务:
library(tidyverse)
library(fastDummies)
long1 %>%
select(!contains("choice")) %>%
pivot_longer(-1, names_sep = "_", names_to = c("t", "p", ".value")) %>%
mutate(across(t:p, ~as.numeric(substr(.x, 2, 4)))) %>%
bind_cols(long1 %>% select(id, contains("choice")) %>%
pivot_longer(-1, names_sep = "_", names_to = c("t", "choice")) %>%
reframe(value = value, choice = c(0, 0, 0), .by = c("id", "t")) %>%
mutate(choice = as.numeric(row_number() == value), .by = c("id", "t")) %>%
select(choice)) %>%
select(id, env, eth, pri, choice, t, p) %>%
mutate(env = factor(env, c("low_env", "mid_env", "high_env")),
eth = factor(eth, c("low_eth", "mid_eth", "high_eth"))) %>%
dummy_cols(c("env", "eth", "pri"), remove_selected_columns = TRUE) %>%
rename_with(~substr(.x, 5, 20), env_low_env:eth_high_eth) %>%
select(1, 5:13, 2:4) %>%
as.data.frame()
#> id low_env mid_env high_env low_eth mid_eth high_eth pri_$25 pri_$75 pri_$125 choice t p
#> 1 R_88j7lG37gLfxk22 0 0 1 1 0 0 0 0 1 0 1 1
#> 2 R_88j7lG37gLfxk22 0 1 0 0 0 1 1 0 0 1 1 2
#> 3 R_88j7lG37gLfxk22 1 0 0 0 1 0 0 1 0 0 1 3
#> 4 R_88j7lG37gLfxk22 0 0 1 1 0 0 0 1 0 1 2 1
#> 5 R_88j7lG37gLfxk22 0 1 0 0 1 0 0 0 1 0 2 2
#> 6 R_88j7lG37gLfxk22 0 1 0 0 1 0 0 1 0 0 2 3
#> 7 R_88j7lG37gLfxk22 0 0 1 0 0 1 0 0 1 1 3 1
#> 8 R_88j7lG37gLfxk22 0 1 0 1 0 0 1 0 0 0 3 2
#> 9 R_88j7lG37gLfxk22 1 0 0 0 0 1 1 0 0 0 3 3
#> 10 R_88j7lG37gLfxk22 1 0 0 1 0 0 0 1 0 0 4 1
#> 11 R_88j7lG37gLfxk22 0 0 1 0 1 0 0 0 1 1 4 2
#> 12 R_88j7lG37gLfxk22 1 0 0 0 0 1 1 0 0 0 4 3