mexico <- c(1,2,5,1,NA,1)
argentina <- c(2,2,2,2,NA,2)
italy<- c(NA,10,10,10,NA,10)
spain <- c(NA,NA,11,11,11,11)
england <- c(5,NA,10,NA,NA,12)
germany <- c(1,NA,NA,NA,NA,10)
Data_Risk = data.frame( Mexico, Argentina, Italy, Spain, England, Germany)
Data_Risk
给
mexico argentina italy spain england germany
1 1 2 NA NA 5 1
2 2 2 10 NA NA NA
3 5 2 10 11 10 NA
4 1 2 10 11 NA NA
5 NA NA NA 11 NA NA
6 1 2 10 11 12 10
在这种情况下,我不需要考虑NA情况,因此我尝试了这个
Data_Risk <- as.data.table(Data_Risk)
my_c <- !apply(Data_Risk, 1, is.na)[,1]
my_L <- Data_Risk[1]
as.data.frame(my_L)[my_c]
结果:
Mexico Argentina England Germany
1 1 2 5 1
在这种情况下,我不仅需要它考虑一行,而且还要考虑所有这些行。 此外,每行的组需要放在新列中而不考虑值,因此最终表必须如下所示:
var1 var2 var3 var4 var5 var6
mexico argentina england germany null null
mexico argentina italy null null null
mexico argentina italy spain england null
mexico argentina italy spain null null
spain null null null null null
mexico argentina italy spain england germany
一种选择是查看which(!is.na(Data_Risk), arr.ind = T)
并将其扩展为宽形式,用col
替换order(col)
变量,并添加colnm
列以用作spread-to-long(dcast
)过程中的value.var。
library(data.table)
library(magrittr)
nms <- as.data.table(which(!is.na(Data_Risk), arr.ind = T))
nms[, .(colnm = names(Data_Risk)[col], col = paste0('var', order(col)))
, by = row] %>%
dcast(row ~ col, value.var = 'colnm')
# row var1 var2 var3 var4 var5 var6
# 1: 1 mexico argentina england germany <NA> <NA>
# 2: 2 mexico argentina italy <NA> <NA> <NA>
# 3: 3 mexico argentina italy spain england <NA>
# 4: 4 mexico argentina italy spain <NA> <NA>
# 5: 5 spain <NA> <NA> <NA> <NA> <NA>
# 6: 6 mexico argentina italy spain england germany
等效的dplyr
代码:
library(dplyr)
nms <- as.data.frame(which(!is.na(Data_Risk), arr.ind = T))
nms %>%
group_by(row) %>%
mutate(colnm = names(Data_Risk)[col],
col = paste0('var', order(col))) %>%
spread(col, value = colnm) %>%
ungroup
有一些问题需要什么,但如果你想要的是在每一行中分别用下面的非NA替换每个NA,下面给出了该形式的矩阵:
library(zoo)
t(apply(Data_Risk, 1, na.locf0, fromLast = TRUE))
赠送:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 2 5 5 5 1
[2,] 2 2 10 NA NA NA
[3,] 5 2 10 11 10 NA
[4,] 1 2 10 11 NA NA
[5,] 11 11 11 11 NA NA
[6,] 1 2 10 11 12 10
或者如果你想要的是将每行中的NA移动到最后:
t(apply(Data_Risk, 1, function(x) c(na.omit(x), rep(NA, sum(is.na(x))))))
赠送:
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 1 2 5 1 NA NA
[2,] 2 2 10 NA NA NA
[3,] 5 2 10 11 10 NA
[4,] 1 2 10 11 NA NA
[5,] 11 NA NA NA NA NA
[6,] 1 2 10 11 12 10
或等效地:
t(apply(Data_Risk, 1, function(x) "length<-"(na.omit(x), length(x))))
我们可以逐行使用apply
,找出非NA索引,用列名替换它们,并用NA
附加其余的索引。
t(apply(Data_Risk, 1, function(x) {
inds <- which(!is.na(x))
c(names(Data_Risk)[inds], rep(NA,ncol(Data_Risk) - length(inds)))
}))
# [,1] [,2] [,3] [,4] [,5] [,6]
#[1,] "mexico" "argentina" "england" "germany" NA NA
#[2,] "mexico" "argentina" "italy" NA NA NA
#[3,] "mexico" "argentina" "italy" "spain" "england" NA
#[4,] "mexico" "argentina" "italy" "spain" NA NA
#[5,] "spain" NA NA NA NA NA
#[6,] "mexico" "argentina" "italy" "spain" "england" "germany"
如果您希望最终输出为数据框,请将apply
包装在data.frame()
中。