我对编程场景相当陌生,并且不知所措:D 我想应用
Hoshen-Kopelman
算法在矩阵中进行聚类检测,然后确定最大聚类的“大小”。为此,我编写了以下代码(见下文)。但是,根据示例矩阵,某些簇的标记不正确(例如,在我的代码中,簇 3 和簇 6 应该是同一簇)。有人可以帮我调试吗?
import numpy as np
import matplotlib.pyplot as plt
class UnionFind:
def __init__(self, max_labels):
self.labels = [0] * max_labels
self.labels[0] = 0
self.n_labels = max_labels
def find(self, x):
y = x
while self.labels[y] != y:
y = self.labels[y]
while self.labels[x] != x:
z = self.labels[x]
self.labels[x] = y
x = z
return y
def union(self, x, y):
self.labels[self.find(x)] = self.find(y)
return self.find(x)
def make_set(self):
self.labels[0] += 1
assert self.labels[0] < self.n_labels
self.labels[self.labels[0]] = self.labels[0]
return self.labels[0]
def hoshen_kopelman(matrix, m, n):
uf = UnionFind(m * n // 2)
cluster_sizes = {}
for i in range(m):
for j in range(n):
if matrix[i][j]:
up = matrix[i - 1][j] if i > 0 else 0
left = matrix[i][j - 1] if j > 0 else 0
if up == 0 and left == 0:
matrix[i][j] = uf.make_set()
elif up == 0 or left == 0:
matrix[i][j] = max(up, left)
else:
matrix[i][j] = uf.union(up, left)
label = matrix[i][j]
cluster_sizes[label] = cluster_sizes.get(label, 0) + 1
largest_cluster_size = max(cluster_sizes.values())
matrix_size = m * n
largest_cluster_ratio = largest_cluster_size / matrix_size
return matrix, largest_cluster_ratio
matrix1 = np.array([
[1, 0, 0, 1, 0, 1],
[1, 0, 1, 0, 0, 1],
[1, 0, 1, 1, 0, 1],
[0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1],
[1, 0, 1, 0, 1, 1]])
hkm, largest_cluster_ratio = hoshen_kopelman(matrix1, 6, 6)
fig, ax = plt.subplots()
min_val = 0
max_val = 6
m = 6
n = 6
ax.matshow(hkm, cmap=plt.cm.Blues)
for i in range(m):
for j in range(n):
c = hkm[i, j]
ax.text(j, i, str(c), va='center', ha='center')
plt.show()
print("size of the largest cluster(ratio to the matrix size):", largest_cluster_ratio)
我的示例中的集群标签