生成给定大小的所有有向图直至同构

Question

我正在尝试生成具有给定数量节点的所有有向图，最多为图同构，以便我可以将它们输入到另一个Python程序中。这是使用 NetworkX 的简单参考实现，我想加快速度：

from itertools import combinations, product
import networkx as nx

def generate_digraphs(n):
  graphs_so_far = list()
  nodes = list(range(n))
  possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
  for edge_mask in product([True, False], repeat=len(possible_edges)):
    edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
    g = nx.DiGraph()
    g.add_nodes_from(nodes)
    g.add_edges_from(edges)
    if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
      graphs_so_far.append(g)
  return graphs_so_far

assert len(generate_digraphs(1)) == 1
assert len(generate_digraphs(2)) == 3
assert len(generate_digraphs(3)) == 16

此类图的数量似乎增长得很快，并且由这个 OEIS 序列给出。我正在寻找一种解决方案，能够在合理的时间内生成最多 7 个节点的所有图（总共约 10 亿个图）。

将图表示为 NetworkX 对象并不是很重要；例如，用邻接列表表示图形或使用不同的库对我来说很好。

Answer 1

我从 Brendan McKay 的论文中学到了一个有用的想法 “无同构详尽生成”（尽管我相信它早于那张纸）。

这个想法是我们可以将同构类组织成一棵树，其中具有空图的单例类是根，并且每个具有 n > 0 个节点的图形的类有一个带有图形的父类有 n − 1 个节点。枚举图的同构类 n > 0 个节点，枚举 n − 1 的图的同构类节点，并且对于每个这样的类，将其代表扩展到所有 n 个节点并过滤掉实际上不是的节点的可能方法孩子们。

下面的Python代码用一个基本的但是实现了这个想法非平凡的图同构子程序。 n = 需要几分钟 6 且（此处估计）n = 7 时大约需要几天时间。对于额外的速度，将其移植到 C++，也许可以找到更好的算法来处理排列群（也许在 TAoCP 中，尽管大多数图没有对称性，因此尚不清楚好处有多大）。

import cProfile
import collections
import itertools
import random


# Returns labels approximating the orbits of graph. Two nodes in the same orbit
# have the same label, but two nodes in different orbits don't necessarily have
# different labels.
def invariant_labels(graph, n):
    labels = [1] * n
    for r in range(2):
        incoming = [0] * n
        outgoing = [0] * n
        for i, j in graph:
            incoming[j] += labels[i]
            outgoing[i] += labels[j]
        for i in range(n):
            labels[i] = hash((incoming[i], outgoing[i]))
    return labels


# Returns the inverse of perm.
def inverse_permutation(perm):
    n = len(perm)
    inverse = [None] * n
    for i in range(n):
        inverse[perm[i]] = i
    return inverse


# Returns the permutation that sorts by label.
def label_sorting_permutation(labels):
    n = len(labels)
    return inverse_permutation(sorted(range(n), key=lambda i: labels[i]))


# Returns the graph where node i becomes perm[i] .
def permuted_graph(perm, graph):
    perm_graph = [(perm[i], perm[j]) for (i, j) in graph]
    perm_graph.sort()
    return perm_graph


# Yields each permutation generated by swaps of two consecutive nodes with the
# same label.
def label_stabilizer(labels):
    n = len(labels)
    factors = (
        itertools.permutations(block)
        for (_, block) in itertools.groupby(range(n), key=lambda i: labels[i])
    )
    for subperms in itertools.product(*factors):
        yield [i for subperm in subperms for i in subperm]


# Returns the canonical labeled graph isomorphic to graph.
def canonical_graph(graph, n):
    labels = invariant_labels(graph, n)
    sorting_perm = label_sorting_permutation(labels)
    graph = permuted_graph(sorting_perm, graph)
    labels.sort()
    return max(
        (permuted_graph(perm, graph), perm[sorting_perm[n - 1]])
        for perm in label_stabilizer(labels)
    )


# Returns the list of permutations that stabilize graph.
def graph_stabilizer(graph, n):
    return [
        perm
        for perm in label_stabilizer(invariant_labels(graph, n))
        if permuted_graph(perm, graph) == graph
    ]


# Yields the subsets of range(n) .
def power_set(n):
    for r in range(n + 1):
        for s in itertools.combinations(range(n), r):
            yield list(s)


# Returns the set where i becomes perm[i] .
def permuted_set(perm, s):
    perm_s = [perm[i] for i in s]
    perm_s.sort()
    return perm_s


# If s is canonical, returns the list of permutations in group that stabilize s.
# Otherwise, returns None.
def set_stabilizer(s, group):
    stabilizer = []
    for perm in group:
        perm_s = permuted_set(perm, s)
        if perm_s < s:
            return None
        if perm_s == s:
            stabilizer.append(perm)
    return stabilizer


# Yields one representative of each isomorphism class.
def enumerate_graphs(n):
    assert 0 <= n
    if 0 == n:
        yield []
        return
    for subgraph in enumerate_graphs(n - 1):
        sub_stab = graph_stabilizer(subgraph, n - 1)
        for incoming in power_set(n - 1):
            in_stab = set_stabilizer(incoming, sub_stab)
            if not in_stab:
                continue
            for outgoing in power_set(n - 1):
                out_stab = set_stabilizer(outgoing, in_stab)
                if not out_stab:
                    continue
                graph, i_star = canonical_graph(
                    subgraph
                    + [(i, n - 1) for i in incoming]
                    + [(n - 1, j) for j in outgoing],
                    n,
                )
                if i_star == n - 1:
                    yield graph


def test():
    print(sum(1 for graph in enumerate_graphs(5)))


cProfile.run("test()")

Answer 2

除了使用

nx.is_isomorphic

来比较两个图 G1 和 G2 之外，您还可以生成与 G1 同构的所有图并检查 G2 是否在该集合中。乍一看，这听起来比较麻烦，但它不仅可以让您检查 G2 是否与 G1 同构，还可以检查是否有任何图与 G1 同构，而

nx.is_isomorphic

在比较两个图时总是从头开始。

为了让事情变得更容易，每个图都只是存储为边列表。如果所有边的集合相同，则两个图是相同的（非同构）。始终确保边列表是一个排序的元组，以便

==

测试确切的相等性并使边列表可散列。

import itertools


def all_digraphs(n):
    possible_edges = [
        (i, j) for i, j in itertools.product(range(n), repeat=2) if i != j
    ]
    for edge_mask in itertools.product([True, False], repeat=len(possible_edges)):
        # The result is already sorted
        yield tuple(edge for include, edge in zip(edge_mask, possible_edges) if include)


def unique_digraphs(n):
    already_seen = set()
    for graph in all_digraphs(n):
        if graph not in already_seen:
            yield graph
            already_seen |= {
                tuple(sorted((perm[i], perm[j]) for i, j in graph))
                for perm in itertools.permutations(range(n))
            }

与之前解决方案的变体相比，这在我的机器上给出了以下计时：

这一切看起来很有希望，但对于 6 个节点来说，我的 16GiB 内存已经不够了，Python 进程被操作系统终止了。我确信您可以将此代码与为每个

outdegree_sequence

批量生成图表结合起来，如上一个答案中详述。这将允许在每批之后清空

already_seen

并大幅减少内存消耗。

Answer 3

98-99% 的计算时间用于同构测试，因此游戏的名称是减少必要测试的数量。在这里，我批量创建图形，以便仅在批量内测试图形的同构性。

在第一个变体（下面的版本 2）中，批次内的所有图形都具有相同数量的边。这会导致运行时间显着但适度的改进（对于大小为 4 的图，速度提高了 2.5 倍，对于较大的图，速度提升更大）。

在第二个变体（下面的版本 3）中，批次内的所有图都具有相同的出度序列。这导致运行时间的显着改善（对于大小为 4 的图，速度提高了 35 倍，对于较大的图，速度增益更大）。

在第三个变体（下面的版本 4）中，批次内的所有图都具有相同的出度序列。此外，在一个批次内，所有图表均按入度顺序排序。与版本 3 相比，这导致速度略有提高（尺寸 4 的图快 1.3 倍；尺寸 5 的图快 2.1 倍）。

#!/usr/bin/env python
"""
Efficient motif generation.
"""

import numpy as np
import matplotlib.pyplot as plt
import networkx as nx

from timeit import timeit
from itertools import combinations, product, chain, combinations_with_replacement


# for profiling with kernprof/line_profiler
try:
    profile
except NameError:
    profile = lambda x: x


@profile
def version_1(n):
    """Original implementation by @hilberts_drinking_problem"""
    graphs_so_far = list()
    nodes = list(range(n))
    possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
    for edge_mask in product([True, False], repeat=len(possible_edges)):
        edges = [edge for include, edge in zip(edge_mask, possible_edges) if include]
        g = nx.DiGraph()
        g.add_nodes_from(nodes)
        g.add_edges_from(edges)
        if not any(nx.is_isomorphic(g_before, g) for g_before in graphs_so_far):
            graphs_so_far.append(g)
    return graphs_so_far


@profile
def version_2(n):
    """Creates graphs in batches, where each batch contains graphs with
    the same number of edges. Only graphs within a batch have to be tested
    for isomorphisms."""
    graphs_so_far = list()
    nodes = list(range(n))
    possible_edges = [(i, j) for i, j in product(nodes, nodes) if i != j]
    for ii in range(len(possible_edges)+1):
        tmp = []
        for edges in combinations(possible_edges, ii):
            g = nx.from_edgelist(edges, create_using=nx.DiGraph)
            if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
                tmp.append(g)
        graphs_so_far.extend(tmp)
    return graphs_so_far


@profile
def version_3(n):
    """Creates graphs in batches, where each batch contains graphs with
    the same out-degree sequence. Only graphs within a batch have to be tested
    for isomorphisms."""
    graphs_so_far = list()
    outdegree_sequences_so_far = list()
    for outdegree_sequence in product(*[range(n) for _ in range(n)]):
        # skip degree sequences which we have already seen as the resulting graphs will be isomorphic
        if sorted(outdegree_sequence) not in outdegree_sequences_so_far:
            tmp = []
            for edges in generate_graphs(outdegree_sequence):
                g = nx.from_edgelist(edges, create_using=nx.DiGraph)
                if not any(nx.is_isomorphic(g_before, g) for g_before in tmp):
                    tmp.append(g)
            graphs_so_far.extend(tmp)
            outdegree_sequences_so_far.append(sorted(outdegree_sequence))
    return graphs_so_far


def generate_graphs(outdegree_sequence):
    """Generates all directed graphs with a given out-degree sequence."""
    for edges in product(*[generate_edges(node, degree, len(outdegree_sequence)) \
                           for node, degree in enumerate(outdegree_sequence)]):
        yield(list(chain(*edges)))


def generate_edges(node, outdegree, total_nodes):
    """Generates all edges for a given node with a given out-degree and a given graph size."""
    for targets in combinations(set(range(total_nodes)) - {node}, outdegree):
        yield([(node, target) for target in targets])


@profile
def version_4(n):
    """Creates graphs in batches, where each batch contains graphs with
    the same out-degree sequence.  Within a batch, graphs are sorted
    by in-degree sequence, such that only graphs with the same
    in-degree sequence have to be tested for isomorphism.
    """
    graphs_so_far = list()
    for outdegree_sequence in combinations_with_replacement(range(n), n):
        tmp = dict()
        for edges in generate_graphs(outdegree_sequence):
            g = nx.from_edgelist(edges, create_using=nx.DiGraph)
            indegree_sequence = tuple(sorted(degree for _, degree in g.in_degree()))
            if indegree_sequence in tmp:
                if not any(nx.is_isomorphic(g_before, g) for g_before in tmp[indegree_sequence]):
                    tmp[indegree_sequence].append(g)
            else:
                tmp[indegree_sequence] = [g]
        for graphs in tmp.values():
            graphs_so_far.extend(graphs)
    return graphs_so_far


if __name__ == '__main__':

    order = range(1, 5)
    t1 = [timeit(lambda : version_1(n), number=3) for n in order]
    t2 = [timeit(lambda : version_2(n), number=3) for n in order]
    t3 = [timeit(lambda : version_3(n), number=3) for n in order]
    t4 = [timeit(lambda : version_4(n), number=3) for n in order]

    fig, ax = plt.subplots()
    for ii, t in enumerate([t1, t2, t3, t4]):
        ax.plot(t, label=f"Version no. {ii+1}")
    ax.set_yscale('log')
    ax.set_ylabel('Execution time [s]')
    ax.set_xlabel('Graph order')
    ax.legend()
    plt.show()

Answer 4

Paul Brodersen 的回答中

有不同的并行化版本4的方法，这似乎是最快的。我们可以：

并行同构测试（不起作用，至于小图图同构很快）
并行化
```
for edges in generate_graphs(outdegree_sequence)
```
，使用托管字典或只是为每个线程/进程复制字典。这效果不佳，因为在共享字典中插入非常慢，并且复制字典也无法正常工作，因为所有线程都需要访问所有
```
tmp
```
字典，而不仅仅是副本。

下面这个，好像是最快的。

def version_para4(n):
    graphs_so_far = list()
    with multiprocessing.Pool(multiprocessing.cpu_count()) as p:
          grafetti = p.starmap(the_function_para4, [(outdegree_sequence,) for outdegree_sequence in combinations_with_replacement(range(n), n)])
        p.close()
        p.join()

    for graphs in grafetti:
        graphs_so_far.extend(graphs)

    return graphs_so_far



def the_function_para4(outdegree_sequence):
    small_graphs_so_far = []
    tmp = dict()
    for edges in generate_graphs(outdegree_sequence):
        g = nx.from_edgelist(edges, create_using=nx.DiGraph)
        indegree_sequence = tuple(sorted(degree for _, degree in g.in_degree()))
        if indegree_sequence in tmp:
            if not any(nx.is_isomorphic(g_before, g) for g_before in tmp[indegree_sequence]):
                tmp[indegree_sequence].append(g)
        else:
            tmp[indegree_sequence] = [g]
    for graph in tmp.values():
        small_graphs_so_far.extend(graph)
    return small_graphs_so_far

生成给定大小的所有有向图直至同构

问题描述投票：0回答：4

4个回答

最新问题

生成给定大小的所有有向图直至同构

问题描述 投票：0回答：4

4个回答

最新问题

问题描述投票：0回答：4