我有一个excel文档,里面有不同的工作表(对应不同的样本)。在每个表样上,我有不同类别的值。我有不同类别的值。
所需的输出将是一个 表,以样本为列,以变量为行,以变量的水平为行。的值将在样本列上,如。
这里是一个 easy reproducible example in R
的两个samplesdataframes(excel表),我想把变量作为行,把样本作为列。
sample1 <- tibble::tibble(
value = c(0.38,0.22,0.18,0.12,0.1),
Variable = c("A","B","C","D","E"),
Level1 = c("China","India","UK","Italy","Egypt"),
Level2 = c("East","South","West","South","North"),
Level3 = c("Asia","Asia","Europe","Europe","Africa"))
sample2 <- tibble::tibble(
value = c(0.23,0.20,0.15,0.12,0.11),
Variable = c("A","B","F","D","E"),
Level1 = c("China","India","Mexico","Italy","Egypt"),
Level2 = c("East","South","Centre","South","North"),
Level3 = c("Asia","Asia","America","Europe","Africa"))
所需的输出是这样的。
output <- tibble::tibble(
Variable = c("A","B","C","D","E", "F"),
Level1 = c("China","India","UK","Italy","Egypt", "Mexico"),
Level2 = c("East","South","West","South","North","Centre"),
Level3 = c("Asia","Asia","Europe","Europe","Africa","America"),
sample1 = c(0.38,0.22,0.18,0.12,0.1,0),
sample2 = c(0.23,0.2,0,0.12,0.11,0.15))
output
Variable Level1 Level2 Level3 Sample1 Sample2
A China East Asia 0.38 0.23
B India South Asia 0.22 0.2
C UK West Europe 0.18 0
D Italy South Europe 0.12 0.12
E Egypt North Africa 0.1 0.11
F Mexico Centre America 0 0.15
使用 data.table
方法,您可以按以下步骤进行。
library(data.table)
sample1$sample <- "sample1"
sample2$sample <- "sample2"
dcast(rbind(setDT(sample1), sample2), ... ~ sample, value.var = "value", fill = 0)
# Variable Level1 Level2 Level3 sample1 sample2
# 1: A China East Asia 0.38 0.23
# 2: B India South Asia 0.22 0.20
# 3: C UK West Europe 0.18 0.00
# 4: D Italy South Europe 0.12 0.12
# 5: E Egypt North Africa 0.10 0.11
# 6: F Mexico Centre America 0.00 0.15
sample1$name <- rep("S1",nrow(sample1))
sample2$name <- rep("S2",nrow(sample2))
outputs <- rbind(sample1,sample2)
outputs_reshape <-
dcast(Variable+Level1+Level2+Level3~name,data=outputs,fill=0)
outputs_reshape
使用 data.table
:
library(data.table)
setDT(sample1)
setDT(sample2)
merge(
sample1, sample2,
by = c("Variable", paste0("Level", 1:3)),
all = TRUE, suffixes = c(".sample1", "sample2"),
)
# Variable Level1 Level2 Level3 value.sample1 valuesample2
# 1: A China East Asia 0.38 0.23
# 2: B India South Asia 0.22 0.20
# 3: C UK West Europe 0.18 NA
# 4: D Italy South Europe 0.12 0.12
# 5: E Egypt North Africa 0.10 0.11
# 6: F Mexico Centre America NA 0.15
使用 tidyr
:
full_join(
sample1, sample2,
by = c("Variable", paste0("Level", 1:3)),
suffix = c("_sample1", "_sample2")
)
# value_sample1 Variable Level1 Level2 Level3 value_sample2
# <dbl> <chr> <chr> <chr> <chr> <dbl>
# 1 0.38 A China East Asia 0.23
# 2 0.22 B India South Asia 0.2
# 3 0.18 C UK West Europe NA
# 4 0.12 D Italy South Europe 0.12
# 5 0.1 E Egypt North Africa 0.11
# 6 NA F Mexico Centre America 0.15