我试图将这里的代码泛化为一个以上的重复列。重命名数据框架列的列表,以模仿连接的后缀。
我有一个数据框,其列名在列表中的不同数据框中是相同的,我想使用相同的模式,如:"我的数据框"。reduce(left_join, suffix = c("_x", "_y"), by="inAll")
以创建新的名称
"inAll"
)library(dplyr)
library(purrr)
library(stringr)
dd <- list()
dd$data <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2 = c(1.4,1.5,1.6), inSome = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2 = c(2.4,2.5,2.6), inOthers = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2 = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2 = c(4.4,4.5,4.6), inOthers = c(4.10, 4.11, 4.12), inSome = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2 = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2 = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
dd$data2 <- list(
ONE = data.frame(inAll = c(1.1,1.2,1.3), inAll_2_x = c(1.4,1.5,1.6), inSome_x = c(1.7,1.8,1.9), only_one = c(1.10, 1.11, 1.12)),
TWO = data.frame(inAll = c(2.1,2.2,2.3), inAll_2_y = c(2.4,2.5,2.6), inOthers_x = c(2.7,2.8,2.9)),
THREE = data.frame(inAll = c(3.1,3.2,3.3), inAll_2_x_x = c(3.4,3.5,3.6)),
FOUR = data.frame(inAll = c(4.1,4.2,4.3), inAll_2_y_y = c(4.4,4.5,4.6), inOthers_y = c(4.10, 4.11, 4.12), inSome_y = c(4.7,4.8,4.9)),
FIVE = data.frame(inAll = c(5.1,5.2,5.3), inAll_2_x_x_x = c(5.4,5.5,5.6)),
SIX = data.frame(inAll = c(6.1,6.2,6.3), inAll_2_y_y_y = c(6.4,6.5,6.6), inOthers = c(6.7,6.8,6.8))
)
完全不同的想法!
new_names <- function(data, toExclude) {
# calculate the number of suffixes we need per column
nnames <- map(data, ~colnames(.x)) %>%
unlist() %>%
tibble() %>%
rename("names" = 1) %>%
group_by(names) %>%
count() %>%
filter(!names %in% toExclude)
suffixes <- map(nnames$n, ~strrep(rep(c('_x', '_y'), .x/2), rep(seq_len(.x/2), each = 2)))
map2(nnames$names, suffixes, ~paste0(.x, .y))
# .....somehow apply these to the right columns?
}
函数指定数据框和列,以排除哪些应该产生所需的输出。
new_names(dd$data, "inAll")
任何帮助,哪怕只是用什么逻辑来得到我想要的最终结果,都将是非常感激的,谢谢!
这里有一个选项,我们从数据框中提取列名。list
,根据它的复制方式改变名称,然后在 split
俗名 relist
还给 list
再次更改原来的列名 list
使用 map2
改名后
library(purrr)
library(dplyr)
library(stringr)
inp <- dd$data
lst1 <- map(inp, names)
nm1 <- unlist(lst1)
i1 <- nm1 != 'inAll'
lst2 <- split(nm1[i1], nm1[i1])
i2 <- lengths(lst2) > 1
lst2[i2] <- map(lst2[i2], ~ {
i3 <- (seq_along(.x)-1) %/% 2 + 1
i4 <- i3 %in% names(which(table(i3) == 2))
n <- ceiling(length(.x[i4])/2)
.x[i4] <- str_c(.x[i4], strrep(rep(c('_x', '_y'), n),
rep(seq_len(n), each = 2)))
.x
})
nm1[i1] <- unsplit(lst2, nm1[i1])
out2 <- map2(inp, relist(nm1, skeleton = lst1), set_names)
-检查OP的输出
out <- dd$data2
identical(out, out2)
#[1] TRUE