我正在尝试自动计算一些稍后必须集成的范围。 我这样做时没有使用 tidyR,但我相信应用 tidy 可以帮助解决我问题的最后一个瓶颈 我的初始表格如下所示:
int_NMR <- data.frame("Component" = c("A", "B", "C", "D", "E",
"F", "G", "H"),
"From" = c(0.0, 45.0, 60.0, 95.0, 110.0, 145.0, 165.0, 190),
"To" = c(45.0, 60.0, 95.0, 110.0, 145.0, 165.0, 190.0, 215.0))
对于“From”和“To”值,我必须添加或减去 X 值(例如 160)。 然而,这意味着某些范围将重叠,然后需要进行切片,并且为两个切片保留“组件”列。
最终结果看起来像这样:
正如你所看到的,有些已经被切片,并且有一个重叠需要保留,例如组件 A,最初从 0 到 45,现在从 0 到 5,从 5 到 30,从 30 到45,并与组件 F_low、G_low 和 H_low 重叠。
我遵循的方法是将表与 NA 绑定在一起,但是当我到达最后一个表时,无法对重叠范围进行切片,而不会丢失它属于哪个组件的信息。
在我的代码下面
na_frame <- NULL
int_NMR_all<- NULL
int_NMR_high <- data.frame(setNames(lapply(int_NMR[1], function(x) paste("ssb_high", x, sep="_")),"Component_ssb"),int_NMR[2:3]+sb_ofset)
int_NMR_low <- data.frame(setNames(lapply(int_NMR[1], function(x) paste("ssb_low", x, sep="_")),"Component_ssb"),int_NMR[2:3]-sb_ofset)
int_NMR_all <- rbind(int_NMR_low,int_NMR_high)
na_frame <- as.data.frame(matrix(NA, nrow = nrow(int_NMR), ncol = 3))
names(na_frame) <- names(int_NMR_all)
int_NMR_all <- rbind(int_NMR_all, na_frame)
na_frame <- as.data.frame(matrix(NA, nrow = nrow(int_NMR_all), ncol = 1))
names(na_frame) <- c("Component")
int_NMR_all<- cbind(int_NMR_all, na_frame)
na_frame <- as.data.frame(matrix(NA, nrow = nrow(int_NMR), ncol = 1))
names(na_frame) <- c("Component_ssb")
int_NMR<- cbind(int_NMR, na_frame)
na_frame <- as.data.frame(matrix(NA, nrow = 2*nrow(int_NMR), ncol = 4))
names(na_frame) <- names(int_NMR_all)
int_NMR <- rbind(int_NMR, na_frame)
int_NMR_all <- rbind(int_NMR,int_NMR_all)
int_NMR_all <- int_NMR_all[!is.na(int_NMR_all$From),]
这是一种
data.table
方法,也使用 intervalSurgeon
包来查找唯一的间隔。
可能不是最有效的方法,但输出看起来如预期..
library(data.table)
library(IntervalSurgeon)
value_n <- 160
# comvert to data.table format
setDT(int_NMR)
# create high and low intervals
int_NMR_high <- copy(int_NMR)[, `:=`(From = From + value_n, To = To + value_n, Component_mod = paste0(Component, "_High"))]
int_NMR_low <- copy(int_NMR)[, `:=`(From = From - value_n, To = To - value_n, Component_mod = paste0(Component, "_Low"))]
# create one data.table of all intervals
all_int <- rbindlist(list(int_NMR_low, int_NMR, int_NMR_high), fill = TRUE)
# create a new data.table with all the non-overlapping intervals
final <- data.table(sections(breaks(as.matrix(all_int[, 2:3]))))
setnames(final, c("From", "To"))
# perform overlap joins
final[all_int[is.na(Component_mod), ], Component := i.Component, on = .(From < To, To > From )]
final[all_int[!is.na(Component_mod), ], Component_mod := i.Component_mod, on = .(From < To, To > From )]
决赛
# From To Component Component_mod
# 1: -160 -115 <NA> A_Low
# 2: -115 -100 <NA> B_Low
# 3: -100 -65 <NA> C_Low
# 4: -65 -50 <NA> D_Low
# 5: -50 -15 <NA> E_Low
# 6: -15 0 <NA> F_Low
# 7: 0 5 A F_Low
# 8: 5 30 A G_Low
# 9: 30 45 A H_Low
# 10: 45 55 B H_Low
# 11: 55 60 B <NA>
# 12: 60 95 C <NA>
# 13: 95 110 D <NA>
# 14: 110 145 E <NA>
# 15: 145 160 F <NA>
# 16: 160 165 F A_High
# 17: 165 190 G A_High
# 18: 190 205 H A_High
# 19: 205 215 H B_High
# 20: 215 220 <NA> B_High
# 21: 220 255 <NA> C_High
# 22: 255 270 <NA> D_High
# 23: 270 305 <NA> E_High
# 24: 305 325 <NA> F_High
# 25: 325 350 <NA> G_High
# 26: 350 375 <NA> H_High
# From To Component Component_mod