我正在使用DBSCAN进行集群。但是,现在我想从每个簇中选择一个代表它的点,但是我意识到DBSCAN不像kmeans那样具有质心。
但是,我发现DBSCAN有一个叫做core points
的东西。我在考虑是否有可能使用这些核心点或任何其他替代方法从每个群集中获取代表点。
我在下面提到了我使用的代码。
import numpy as np
from math import pi
from sklearn.cluster import DBSCAN
#points containing time value in minutes
points = [100, 200, 600, 659, 700]
def convert_to_radian(x):
return((x / (24 * 60)) * 2 * pi)
rad_function = np.vectorize(convert_to_radian)
points_rad = rad_function(points)
#generate distance matrix from each point
dist = points_rad[None,:] - points_rad[:, None]
#Assign shortest distances from each point
dist[((dist > pi) & (dist <= (2*pi)))] = dist[((dist > pi) & (dist <= (2*pi)))] -(2*pi)
dist[((dist > (-2*pi)) & (dist <= (-1*pi)))] = dist[((dist > (-2*pi)) & (dist <= (-1*pi)))] + (2*pi)
dist = abs(dist)
#check dist
print(dist)
#using default values, set metric to 'precomputed'
db = DBSCAN(eps=((100 / (24*60)) * 2 * pi ), min_samples = 2, metric='precomputed')
#check db
print(db)
db.fit(dist)
#get labels
labels = db.labels_
#get number of clusters
no_clusters = len(set(labels)) - (1 if -1 in labels else 0)
print('No of clusters:', no_clusters)
print('Cluster 0 : ', np.nonzero(labels == 0)[0])
print('Cluster 1 : ', np.nonzero(labels == 1)[0])
print(db.core_sample_indices_)
如果需要,我很乐意提供更多详细信息。
您为什么不估计得到的估计簇的质心?
points_of_cluster_0 = dist[labels==0,:]
centroid_of_cluster_0 = np.mean(points_of_cluster_0, axis=0)
print(centroid_of_cluster_0)
points_of_cluster_1 = dist[labels==1,:]
centroid_of_cluster_1 = np.mean(points_of_cluster_1, axis=0)
print(centroid_of_cluster_1)