如何用 R 中的 x 和 y 对二维轨迹进行聚类

问题描述 投票:0回答:1

我在 x 轴和 y 轴上有数千条二维轨迹。示例数据集如下所示。每个轨迹都以 x 轴和 y 轴的形式表示,并且所有轨迹都有不同的行数。

x1 = c(18.12,18.63,20.17,20.69,22.28,22.81,24.41,24.94,26.53,27.06,28.63,29.16,30.72,31.24,32.83,33.37,35.01,35.56,37.26,37.84,39.58,40.17,41.94,42.53,44.30,44.89,46.65,47.23,47.82,48.39)
y1 = c(16.19,16.68,18.14,18.61,20.03,20.50,21.88,22.32,23.64,24.07,25.34,25.76,26.98,27.38,28.57,28.96,30.13,30.52,31.67,32.05,33.19,33.56,34.66,35.02,36.07,36.41,37.38,37.69,37.99,38.29)
x2 = c(18.19,18.73,20.36,20.91,22.57,23.12,24.79,25.34,27.00,27.55,29.20,29.75,31.40,31.96,33.63,34.19,35.91,36.49,38.25,38.85,40.67,41.28,43.13,43.75,45.60,46.21,48.04,48.64)
y2= c(16.23,16.75,18.27,18.77,20.24,20.72,22.13,22.59,23.94,24.39,25.68,26.11,27.35,27.76,28.96,29.36,30.53,30.92,32.07,32.45,33.58,33.95,35.04,35.39,36.41,36.74,37.67,37.96)
x3 = c(16.86,17.33,18.77,19.25,20.73,21.23,22.74,23.25,24.78,25.30,26.84,27.36,28.92,29.44,31.01,31.54,33.14,33.68,35.32,35.87,37.54,38.10,39.80,40.37,42.08,42.65,44.37,44.94,46.64,47.20,47.76,48.32)
y3 = c(17.24,17.71,19.09,19.55,20.90,21.34,22.65,23.07,24.33,24.73,25.93,26.31,27.46,27.83,28.93,29.29,30.37,30.72,31.78,32.12,33.16,33.50,34.50,34.83,35.81,36.12,37.05,37.35,38.20,38.48,38.75,39.01)
x4 = c(17.29,17.67,18.84,19.24,20.44,20.85,22.07,22.49,23.73,24.15,25.39,25.81,27.06,27.48,28.73,29.15,30.40,30.83,32.11,32.54,33.87,34.32,35.68,36.15,37.55,38.03,39.45,39.93,41.36,41.84,43.27,43.74,45.18,45.66,47.11,47.59,48.07,48.55)
y4 = c(16.55,16.95,18.17,18.57,19.77,20.16,21.33,21.72,22.85,23.22,24.32,24.68,25.73,26.08,27.10,27.44,28.44,28.77,29.75,30.07,31.02,31.33,32.26,32.56,33.46,33.75,34.61,34.90,35.73,36.00,36.80,37.06,37.84,38.09,38.82,39.06,39.29,39.51)
x5 = c(18.28,18.78,20.27,20.78,22.30,22.81,24.35,24.87,26.42,26.94,28.50,29.01,30.57,31.10,32.67,33.20,34.81,35.36,37.02,37.59,39.31,39.89,41.65,42.25,44.04,44.64,46.43,47.03,47.62,48.22,48.81)
y5 = c(15.78,16.26,17.68,18.14,19.52,19.97,21.30,21.74,23.04,23.47,24.74,25.16,26.39,26.80,28.00,28.40,29.57,29.95,31.10,31.48,32.62,33.00,34.13,34.51,35.61,35.97,37.03,37.37,37.70,38.02,38.34)
x6 = c(18.07,18.61,20.25,20.80,22.48,23.05,24.75,25.32,27.02,27.59,29.30,29.87,31.58,32.16,33.90,34.49,36.30,36.91,38.77,39.41,41.32,41.97,43.92,44.57,46.52,47.17,47.82,48.47)
y6 = c(16.49,17.01,18.54,19.04,20.54,21.03,22.48,22.95,24.36,24.82,26.18,26.62,27.95,28.39,29.69,30.12,31.41,31.84,33.12,33.55,34.81,35.22,36.44,36.83,37.97,38.33,38.69,39.03)
x7 = c(17.41,17.76,18.84,19.20,20.30,20.67,21.79,22.16,23.28,23.66,24.78,25.16,26.29,26.66,27.79,28.17,29.30,29.67,30.81,31.19,32.33,32.72,33.89,34.29,35.50,35.92,37.17,37.60,38.89,39.33,40.64,41.07,42.38,42.82,44.12,44.55,45.84,46.26,47.54,47.96,48.38)
y7 = c(16.73,17.11,18.22,18.59,19.68,20.04,21.10,21.44,22.46,22.79,23.77,24.09,25.03,25.34,26.25,26.55,27.43,27.72,28.57,28.85,29.67,29.94,30.74,31.01,31.80,32.06,32.84,33.10,33.88,34.14,34.91,35.17,35.92,36.17,36.90,37.14,37.83,38.06,38.70,38.91,39.11)

max_ln <- max(c(length(x1), length(x2), length(x3), length(x4), length(x5), length(x6), length(x7))) 

df <- data.frame(col1 = c(x1,rep(NA, max_ln - length(x1))), 
             col2 = c(y1,rep(NA, max_ln - length(y1))), 
             col3 = c(x2,rep(NA, max_ln - length(x2))), 
             col4 = c(y2,rep(NA, max_ln - length(y2))),
             col5 = c(x3,rep(NA, max_ln - length(x3))), 
             col6 = c(y3,rep(NA, max_ln - length(y3))), 
             col7 = c(x4,rep(NA, max_ln - length(x4))), 
             col8 = c(y4,rep(NA, max_ln - length(y4))),
             col9 = c(x5,rep(NA, max_ln - length(x5))), 
             col10 = c(y5,rep(NA, max_ln - length(y5))), 
             col11 = c(x6,rep(NA, max_ln - length(x6))), 
             col12 = c(y6,rep(NA, max_ln - length(y6))),
             col13 = c(x7,rep(NA, max_ln - length(x7))), 
             col14 = c(y7,rep(NA, max_ln - length(y7))))

还显示了数据的绘图,从图中,我们可以直观地观察到三组。

我正在尝试对 R 中具有类似模式的轨迹进行聚类(分组)。我已经了解了如何对时间序列数据进行聚类,但我不知道如何对此类数据进行聚类。我不知道可以使用哪个包以及如何在 R 中编码

r rstudio cluster-analysis data-analysis
1个回答
0
投票
# Combine x and y coordinates into a single matrix
data <- cbind(x = c(x1, x2, x3, x4, x5, x6, x7), 
              y = c(y1, y2, y3, y4, y5, y6, y7))

# Perform K-means clustering
k <- 3  # Number of clusters
set.seed(123)  # Set seed for reproducibility
kmeans_result <- kmeans(data, centers = k)

# Get cluster assignments
cluster_assignments <- kmeans_result$cluster

# Visualize the clusters
plot(data, col = cluster_assignments, pch = 20, 
     main = "K-means Clustering of Trajectories", 
     xlab = "X Coordinate", ylab = "Y Coordinate")
points(kmeans_result$centers, col = 1:k, pch = 3, cex = 2)
legend("topright", legend = 1:k, col = 1:k, pch = 20, title = "Cluster")
© www.soinside.com 2019 - 2024. All rights reserved.