Hoshen Kopelman 聚类检测算法

Question

我对编程场景相当陌生，并且不知所措：D 我想应用

Hoshen-Kopelman

算法在矩阵中进行聚类检测，然后确定最大聚类的“大小”。为此，我编写了以下代码（见下文）。但是，根据示例矩阵，某些簇的标记不正确（例如，在我的代码中，簇 3 和簇 6 应该是同一簇）。有人可以帮我调试吗？

import numpy as np
import matplotlib.pyplot as plt


class UnionFind:
    def __init__(self, max_labels):
        self.labels = [0] * max_labels
        self.labels[0] = 0
        self.n_labels = max_labels

    def find(self, x):
        y = x
        while self.labels[y] != y:
        y = self.labels[y]
        while self.labels[x] != x:
        z = self.labels[x]
        self.labels[x] = y
        x = z
        return y

    def union(self, x, y):
        self.labels[self.find(x)] = self.find(y)
        return self.find(x)

    def make_set(self):
        self.labels[0] += 1
        assert self.labels[0] < self.n_labels
        self.labels[self.labels[0]] = self.labels[0]
        return self.labels[0]


def hoshen_kopelman(matrix, m, n):
    uf = UnionFind(m * n // 2)
    cluster_sizes = {} 

    for i in range(m):
    for j in range(n):
    if matrix[i][j]:
    up = matrix[i - 1][j] if i > 0 else 0
    left = matrix[i][j - 1] if j > 0 else 0

    if up == 0 and left == 0:
    matrix[i][j] = uf.make_set()
    elif up == 0 or left == 0:
    matrix[i][j] = max(up, left)
    else:
    matrix[i][j] = uf.union(up, left)

    label = matrix[i][j]
    cluster_sizes[label] = cluster_sizes.get(label, 0) + 1

    largest_cluster_size = max(cluster_sizes.values())
    matrix_size = m * n
    largest_cluster_ratio = largest_cluster_size / matrix_size

    return matrix, largest_cluster_ratio


matrix1 = np.array([
[1, 0, 0, 1, 0, 1],
[1, 0, 1, 0, 0, 1],
[1, 0, 1, 1, 0, 1],
[0, 1, 0, 0, 1, 1],
[0, 1, 1, 0, 1, 1],
[1, 0, 1, 0, 1, 1]])

hkm, largest_cluster_ratio = hoshen_kopelman(matrix1, 6, 6)

fig, ax = plt.subplots()
min_val = 0
max_val = 6
m = 6
n = 6
ax.matshow(hkm, cmap=plt.cm.Blues)

for i in range(m):
for j in range(n):
c = hkm[i, j]
ax.text(j, i, str(c), va='center', ha='center')

plt.show()

print("size of the largest cluster(ratio to the matrix size):", largest_cluster_ratio)

我的示例中的集群标签

Hoshen Kopelman 聚类检测算法

问题描述投票：0回答：0

最新问题

Hoshen Kopelman 聚类检测算法

问题描述 投票：0回答：0

最新问题

问题描述投票：0回答：0