所以现在我有一个主数据框,其中包含230行和15132列(均标有特定的列和行名称),每个观察值中存储的值为0。我有多个具有不同列和行的数据框,我希望将它们粘贴到此主数据框中,同时保持正确的位置。这些数据帧中的每一个都由主数据帧的行/列名称的各种组合组成。我将如何去做呢?
dempty <- data.frame(matrix(0, nrow = 5, ncol = 5))
rownames(dempty) <- c("v", "w", "x", "y", "z")
colnames(dempty) <- c("a", "b", "c", "d", "e")
d1 <- data.frame(matrix(1, nrow = 2, ncol = 3))
rownames(d1) <- c("x", "y")
colnames(d1) <- c("b", "c", "d")
d2 <- data.frame(matrix(2, nrow = 3, ncol = 4))
rownames(d2) <- c("v", "w", "x")
colnames(d2) <- c("a", "b", "c", "d")
d3 <- data.frame(matrix(3, nrow = 4, ncol = 2))
rownames(d3) <- c("w", "x", "y", "z")
colnames(d3) <- c("d", "e")
dfinal <- data.frame(matrix(c(2, 2, 2, 0, 0, 2, 2, "1;2", "1;2", 0, 2, 2, "1;2", "1;2", 0, 2, "2;3", "1;2;3", "1;3", 3, 0, 3, 3, 3, 3), nrow = 5, ncol= 5))
rownames(dfinal) <- c("v", "w", "x", "y", "z")
colnames(dfinal) <- c("a", "b", "c", "d", "e")
此循环应使用dfinal
,df1
和df2
中的值填充df3
。可以根据您的实际问题轻松对其进行编辑。
## get row an col names of master data to be populated
cols<-names(dfinal)
rows<-row.names(dfinal)
## iterate through the master data frame
for(i in 1:length(cols)){
for(j in 1:length(rows)){
## if df1 contains matching col/row names use ifelse to replace
if(cols[i] %in% names(df1) & rows[j] %in% row.names(df1)){
## if current row/col in dfinal is still 0 replace with df1 value, else append df1 value
dfinal[rows[j],cols[i]] <- ifelse(dfinal[rows[j],cols[i]] == 0, df1[rows[j],cols[i]], paste0(dfinal[rows[j],cols[i]],"; ",df1[rows[j],cols[i]]))
}
## if current row/col in dfinal is still 0 replace with df2 value, else append df2 value
if(cols[i] %in% names(df2) & rows[j] %in% row.names(df2)){
## if current row/col in dfinal is still 0 replace with df2 value, else append df2 value
dfinal[rows[j],cols[i]] <- ifelse(dfinal[rows[j],cols[i]] == 0, df2[rows[j],cols[i]], paste0(dfinal[rows[j],cols[i]],"; ",df2[rows[j],cols[i]]))
}
## if current row/col in dfinal is still 0 replace with df3 value, else append df3 value
if(cols[i] %in% names(df3) & rows[j] %in% row.names(df3)){
## if current row/col in dfinal is still 0 replace with df3 value, else append df3 value
dfinal[rows[j],cols[i]] <- ifelse(dfinal[rows[j],cols[i]] == 0, df3[rows[j],cols[i]], paste0(dfinal[rows[j],cols[i]],"; ",df3[rows[j],cols[i]]))
}
}
}
我希望这会有所帮助!
使用此示例数据:
dat <- `dimnames<-`(data.frame(matrix(0, nrow=3, ncol=5)), list(letters[1:3], LETTERS[1:5]))
dat
# A B C D E
# a 0 0 0 0 0
# b 0 0 0 0 0
# c 0 0 0 0 0
df1 <- `rownames<-`(data.frame(B=2:3, C=22:23), c("a", "c"))
df1
# B C
# a 2 22
# c 3 23
您可以使用以下方法强行替换:
dat[,colnames(df1)] <- Map(function(x, y, rn) replace(x, rn, y),
dat[,colnames(df1)], df1,
list(match(rownames(df1), rownames(dat))))
dat
# A B C D E
# a 0 2 22 0 0
# b 0 0 0 0 0
# c 0 3 23 0 0
但是,如果您的数据具有作为实际列的行索引(行名),则对merge
可能更容易且更可靠,或者使用dplyr::*_join
函数之一进行某些合并后清除。基R的merge
确实支持按行名合并,尽管感觉不太流畅:
newdat <- merge(dat, df1, by=0, all=TRUE)
newdat
# Row.names A B.x C.x D E B.y C.y
# 1 a 0 2 22 0 0 2 22
# 2 b 0 0 0 0 0 NA NA
# 3 c 0 3 23 0 0 3 23
(cleanupvars <- gsub("\\.x$", "", grep("\\.x$", colnames(newdat), value = TRUE)))
# [1] "B" "C"
for (nm in cleanupvars) {
newdat[[nm]] <- ifelse(is.na(newdat[[paste0(nm, ".y")]]), newdat[[paste0(nm, ".x")]], newdat[[paste0(nm, ".y")]])
newdat[,paste0(nm, c(".x", ".y"))] <- NULL
}
newdat
# Row.names A D E B C
# 1 a 0 0 0 2 22
# 2 b 0 0 0 0 0
# 3 c 0 0 0 3 23
newdat$Row.names <- NULL
(不保留列顺序。)
毫无疑问,这最后一种方法很笨重。