如何在 R 中从样本数据(不带循环)创建所需的数据?
样本数据:
sample_data1 <- data.frame(group=c("L1","L1","L1","L1","L1","L1"),class=c("A","A","B","B","C","C"),data=c(1,1,1,1,1,1),stringsAsFactors = F)
sample_data2 <- data.frame(group=c("L2","L2","L2","L2","L2","L2"),class=c("A","A",NA,NA,"D","D"),data=c(1,1,1,1,1,1),stringsAsFactors = F)
sample_data3 <- data.frame(group=c("L3","L3","L3","L3","L3","L3"),class=c(NA,NA,NA,NA,"C","C"),data=c(1,1,1,1,1,1),stringsAsFactors = F)
sample_data4 <- data.frame(group=c("L4","L4","L4","L4","L4","L4"),class=c(NA,NA,"B","B","C","C"),data=c(1,1,1,1,1,1),stringsAsFactors = F)
sample_data5 <- data.frame(group=c("L5","L5","L5","L5","L5","L5"),class=c(NA,NA,"E","E",NA,NA),data=c(1,1,1,1,1,1),stringsAsFactors = F)
sample_data <- rbind(sample_data1,sample_data2,sample_data3, sample_data4,sample_data5)
创建desired_data样本:
desired_data1 <- data.frame(group=c("L1","L1","L1","L1","L1","L1","L1","L1","L1","L1"),class=c("A","A","B","B","C","C","D","D","E","E"),data=c(1,1,1,1,1,1,0,0,0,0),stringsAsFactors = F)
desired_data2 <- data.frame(group=c("L2","L2","L2","L2","L2","L2","L2","L2","L2","L2"),class=c("A","A","B","B","C","C","D","D","E","E"),data=c(1,1,0,0,0,0,1,1,0,0),stringsAsFactors = F)
desired_data3 <- data.frame(group=c("L3","L3","L3","L3","L3","L3","L3","L3","L3","L3"),class=c("A","A","B","B","C","C","D","D","E","E"),data=c(0,0,0,0,1,1,0,0,0,0),stringsAsFactors = F)
desired_data4 <- data.frame(group=c("L4","L4","L4","L4","L4","L4","L4","L4","L4","L4"),class=c("A","A","B","B","C","C","D","D","E","E"),data=c(0,0,1,1,1,1,0,0,0,0),stringsAsFactors = F)
desired_data5 <- data.frame(group=c("L5","L5","L5","L5","L5","L5","L5","L5","L5","L5"),class=c("A","A","B","B","C","C","D","D","E","E"),data=c(0,0,0,0,0,0,0,0,1,1),stringsAsFactors = F)
desired_data <- rbind(desired_data1,desired_data2,desired_data3,desired_data4,desired_data5)
样本数据和所需数据中的每一行都重复两次 - 这是故意的吗?
library(tidyverse)
tibble(group = rep(paste0("L", 1:5), each = 5),
class = rep(c("A", "B", "C", "D", "E"),5)) |>
left_join(distinct(sample_data)) |>
replace_na(list(data = 0))