我有两个数据集,我试图根据纬度将它们合并在一起。纬度不完全匹配,因此必须是最接近的纬度(例如 30 公里)。当我尝试完全连接时,输出数据帧是连续的(即,它先输出 df,然后输出 df2,而不是合并/连接它们。有没有一种方法可以根据这两个数据帧的纬度接近程度将它们合并为一个?
Var1 <- c(1, 4, 6, 2, 8, 9, 2, 4, 5, 4)
Var2 <- c(1.9, 2.4, 3.7, 7.3, 2.4, 4.8, 9, 2.3, 9, 1.7)
Lat <- c(57.33, 58.21, 58.93, 59.23, 59.87, 60.29, 60.99, 61.5, 61.8, 62.5)
Lon <- c(131, 131, 131, 130.9, 130.99, 130.5, 131, 131, 131.2, 131)
df <- as.data.frame(cbind(Var1, Var2, Lat, Lon))
Var1 <- c(4, 3, 6, 8, 1, 9, 3, 6, 5, 8)
Var2 <- c(6.2, 3, 5.6, 5.1, 9.0, 2.4, 4.9, 8.2, 3.1, 6.2)
Lat <- c(57.15, 58.3, 59.0, 59.4, 60.0, 60.1, 61.0, 61.6, 62, 62.7)
Lon <- c(131, 131, 131, 130.9, 130.99, 130.5, 131, 131, 131.2, 131)
df2 <- as.data.frame(cbind(Var1, Var2, Lat, Lon))
Vic = df%>%
full_join(df2, by = c("Lat" = "Lat"))
我也尝试过 geo_join 但出现错误
merged.dfs <- geo_join(df, df2, by = NULL, method = "haversine", mode = "left", max_dist = 1)
Joining by: c("Var1", "Var2", "Lat", "Lon")
Error in FUN(X[[i]], ...) :
Trying to join on Var1, Var2, Lat, Lon; geo_join needs exactly two columns (latitude and longitude)
fuzzyjoin
包和 geo_join
功能绝对是最佳选择。
您的尝试已经很接近了,您只需要按照错误所述设置一些参数即可。
library(fuzzyjoin)
Var1 <- c(1, 4, 6, 2, 8, 9, 2, 4, 5, 4)
Var2 <- c(1.9, 2.4, 3.7, 7.3, 2.4, 4.8, 9, 2.3, 9, 1.7)
Lat <- c(57.33, 58.21, 58.93, 59.23, 59.87, 60.29, 60.99, 61.5, 61.8, 62.5)
Lon <- c(131, 131, 131, 130.9, 130.99, 130.5, 131, 131, 131.2, 131)
df <- as.data.frame(cbind(Var1, Var2, Lat, Lon))
Var1 <- c(4, 3, 6, 8, 1, 9, 3, 6, 5, 8)
Var2 <- c(6.2, 3, 5.6, 5.1, 9.0, 2.4, 4.9, 8.2, 3.1, 6.2)
Lat <- c(57.15, 58.3, 59.0, 59.4, 60.0, 60.1, 61.0, 61.6, 62, 62.7)
Lon <- c(131, 131, 131, 130.9, 130.99, 130.5, 131, 131, 131.2, 131)
df2 <- as.data.frame(cbind(Var1, Var2, Lat, Lon))
Vic <- df%>%
geo_join(df2, by = c("Lat" = "Lat", "Lon" = "Lon"), method = "haversine", mode = "left", max_dist = 30, unit = "km" )
如果需要,请查看此文档