我正在使用 pyclustering 库来执行 K 均值。我正在使用的数据集以 CSV 格式读取,如下面的代码所示。我尝试将 X_scaled 作为 numpy 数组传递,并使用 to_list() 作为列表传递。但是,我不断收到此错误:
TypeError: len() of unsized object
pyclustering版本:0.10.1.2
代码如下:
from pyclustering.cluster.kmeans import kmeans
from pyclustering.utils.metric import distance_metric, type_metric
import matplotlib.pyplot as plt
import numpy as np
# Define a function to convert distance metric names to functions
def get_distance_metric(metric_name):
if metric_name == 'euclidean':
return distance_metric(type_metric.EUCLIDEAN)
elif metric_name == 'squared euclidean':
return distance_metric(type_metric.EUCLIDEAN_SQUARE)
elif metric_name == 'manhattan':
return distance_metric(type_metric.MANHATTAN)
elif metric_name == 'chebyshev':
return distance_metric(type_metric.CHEBYSHEV)
elif metric_name == 'canberra':
return distance_metric(type_metric.CANBERRA)
elif metric_name == 'chi-square':
return distance_metric(type_metric.CHI_SQUARE)
else:
raise ValueError(f"Unsupported distance metric: {metric_name}")
# Define the distance measures dictionary
distance_measures = {'euclidean': 0, 'squared euclidean': 1, 'manhattan': 2, 'chebyshev': 3,
'canberra': 5, 'chi-square': 6}
# Example of running the modified code
datasets = main_datasets
df = datasets['circles0.3.csv']
original_labels = df['label'].values if 'label' in df.columns else None
X = df.drop(columns=['label'], errors='ignore').values
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Set the number of clusters
k = 3
# Experiment with various distance metrics
for metric_name, metric_code in distance_measures.items():
# Get the distance metric function
distance_metric_func = get_distance_metric(metric_name)
# Perform K-means clustering with the selected distance metric
# centers, clusters = kmeans(X_scaled.tolist(), k, metric=distance_metric_func)
centers, clusters = kmeans(X_scaled, k, metric=distance_metric_func)
# Plot the clusters
plt.figure()
plt.title(f'K-means Clustering with {metric_name}')
plt.xlabel('X')
plt.ylabel('Y')
plt.scatter([point[0] for point in X_scaled], [point[1] for point in X_scaled], c=clusters, cmap='viridis')
plt.scatter([center[0] for center in centers], [center[1] for center in centers], marker='x', c='red', s=100)
plt.show()
任何人都可以帮我解决这段代码可能存在的问题吗?
在下面的函数调用中,您应该添加“初始中心”参数而不是 k。此外,在传递给 kmeans 函数之前,您必须将 X_scaled 数组转换为列表。
centers, clusters = kmeans(X_scaled, k, metric=distance_metric_func)
请使用以下代码:
X_scaled_list = X_scaled.tolist()
initial_centers = random_center_initializer(X_scaled_list,2).initialize()
result_1 = kmeans(X_scaled_list, initial_centers)