比如有列数不同的时间序列数据,会通过添加新的列来合并。我无法合并和堆叠它们,因为列名无法识别。
#First data
name1<-c("Andi","Ani","Juli")
dd<-matrix(rnorm(36),ncol=3);dd<-as.data.frame(dd)
colnames(dd)<-name1;dd1<-dd;nd1<-nrow(dd1)
n1<-ncol(dd1)
#Second data
name2<-c("Andi","Ani","Juli","Juni")
dd<-matrix(rnorm(80),ncol=4);dd<-as.data.frame(dd)
colnames(dd)<-name2;dd2<-dd
n2<-ncol(dd2);nd2<-nrow(dd2)
#Merge & Stacking
DaRR<-NULL
for(i in 1:n2){
nna2<-name2[i]
DaRR_<-NULL
for(j in 1:n1){
nna1<-name2[j]
if(nna2 == nna1){
DaRR_<-c(dd1[,i],dd2[,j])
}else{
next
}
dupl<-rep("XXX",nd1)
DaRR_<-c(dupl,dd2[,i])
}
DaRR<-cbind(DaRR,DaRR_)
}
期望的结果:
DaRR_ DaRR_ DaRR_ DaRR_
[1.] -1.9470804 -0.15451553 1.69498001 "XXX"
[2.] -0.2311135 1.14891305 0.17395141 "XXX"
[3,] 0.9371293 1.48934820 0.33862342 "XXX"
[4,] -0.3381575 1.76123310 0.93475370 "XXX"
[5,] 2.1347953 0.46455408 -0.03283264 "XXX"
[6,] -1.2142898 -1.00940986 0.12973995 "XXX"
[7,] -0.7788720 0.46866536 0.18310776 "XXX"
[8,] -0.8667355 -1.85909721 -1.05711808 "XXX"
[9,] 0.9289370 -0.25421140 -0.13492044 "XXX"
[10,] -0.3007262 0.30409309 0.54958016 "XXX"
[11,] -0.3805569 -0.09353285 2.27405059 "XXX"
[12,] 0.7587997 0.80732128 0.03422467 "XXX"
[13,] "1.34696898710666" "-0.198851239901805" "0.334057783172022"
[14,] "-0.435942723430498" "0.306828387912936" "-0.938523307710521"
[15,] "-0.265285902944808" "0.721459014968174" "-1.82707007543666"
[16,] "1.58759166754201" "-1.47614456502887" "-1.21306832466975"
[17,] "0.424393481053464" "0.655826393660495" "0.288243529871784"
[18,] "-0.65438898526425" "-0.640742323728014" "0.693551868680085"
[19,] "-1.80833220993313" "-0.897518866526767" "-1.73897841823514"
[20,] "-0.0660275125014107" "0.140663303366014" "-1.21130608877615"
[21,] "1.85286698109007" "-1.30771714034588" "-0.928005801280665"
[22,] "1.30168226335214" "0.881285603015683" "-1.24954972323302"
[23,] "0.478732452440329" "1.58022368211514" "-0.0864943280820771"
[24,] "-1.81765850980761" "0.66571286028555" "-2.55600015433571"
[25,] "-0.667110842664888" "0.952606783138395" "-0.0239496256536243"
[26,] "1.251596581557" "-0.673926090970423" "-0.922582582633877"
[27,] "-1.7415929470951" "-1.60271402324792" "0.0486990326594654"
[28,] "-0.286027857835192" "0.183397655741827" "0.334799213547238"
[29,] "-0.152689370749341" "1.18276041450592" "0.952138672622552"
[30,] "1.34231553537835" "-1.01358652507626" "0.461571536335735"
[31,] "-0.02608063103647" "0.487607550927421" "0.334013197562071"
[32,] "0.792308074641114" "-1.3595426981532" "-1.61478080821833"
如何根据需要进行数据组合?
你不需要构建
for
循环,因为 R 对于这种事情有一些很棒的功能。
您可以按如下方式在 base R 中执行此操作:
dd3 = merge(dd1, dd2, all=T)
或使用 dplyr 包:
dd3 = dplyr::full_join(dd1, dd2)
然后您可以重命名 base R 中的列
names(dd3) <- c("DaRR1", "DaRR2" , "DaRR3" , "DaRR4" )
或使用
dplyr::rename()
功能。
另一方面,我不建议将 NA 替换为“XXX”,因为这将要求您的列被视为字符向量而不是数字向量(您不能使用字符向量进行计算)。
有很多方法可以做到这一点:
dd3 <- dplyr::bind_rows(dd1, dd2)
甚至:
dd3 <- plyr::rbind.fill(dd1, dd2)