BioPython 进化到 Networkx 标记节点

问题描述 投票:0回答:1

我有一个纽维克树 newick_tree_string = "(p1,(((((((p10,p5),p11),((p14,p6),p8)),p16),(((p12,p13),p15 ),p2)),p9),(p3,p7)),p4)"

我需要将其转换为 networkx 形式,其中节点是“p_i”。我尝试使用 BioPython 阅读它:

tree = Phylo.read(io.StringIO(newick_tree_string), 'newick')
#name nameless clades
def tabulate_names(tree):
    names = {}
    for idx, clade in enumerate(tree.find_clades()):
        if clade.name:
            clade.name = clade.name
        else:
            clade.name = str(idx)
        names[clade.name] = clade
    return names
tabulate_names(tree)
G = Phylo.to_networkx(tree)

我有另一个networkx图,我需要将其与节点进行比较:

g = nx.Graph()
g.add_edges_from([('p17','p9'),('p17','p18'),('p17','p19'),('p19','p20'),('p9','p21'),('p21','p4'),('p21','p1'),('p9','p7'),('p7','p3'),('p20','p10'),('p20','p5'),('p19','p11'),('p19','p8'),('p8','p6'),('p8','p14'),('p18','p2'),('p18','p13'),('p18','p15'),('p18','p16'),('p13','p12')])
root_node = 17

问题在于,第一个图的节点属于这些奇怪的“分支”数据类型,由于数据结构不同,我无法在具有正常 networkx 图的函数中使用它们。我需要将进化枝变成节点,同时保持相同的结构和名称。

有简单的方法吗?

data-structures networkx biopython
1个回答
0
投票
import networkx as nx
import io
from Bio import Phylo
import matplotlib.pyplot as plt

# Your newick tree string
newick_tree_string = "(p1,(((((((p10,p5),p11),((p14,p6),p8)),p16),(((p12,p13),p15),p2)),p9),(p3,p7)),p4)"

# Read the tree using BioPython
tree = Phylo.read(io.StringIO(newick_tree_string), 'newick')

# Create a new NetworkX graph for the first tree
G = nx.Graph()

# Function to add nodes and edges based on the tree structure
def add_nodes_and_edges(clade):
    if clade.name:
        node_name = clade.name
    else:
        node_name = "Unnamed_Node"  # Assign a unique name if the node name is None
    G.add_node(node_name)
    for child in clade.clades:
        child_name = child.name if child.name else "Unnamed_Node"
        G.add_edge(node_name, child_name)
        add_nodes_and_edges(child)

# Start the recursive process to add nodes and edges
add_nodes_and_edges(tree.clade)

# Now you have the G graph in NetworkX format with labeled nodes
# Draw the first tree
plt.figure(figsize=(8, 6))
pos = nx.spring_layout(G)  # Define the layout for better visualization
nx.draw(G, pos, with_labels=True, node_size=500, node_color='skyblue', font_size=10)
plt.title("First Tree Structure")
plt.show()

# Draw the second tree 'g'
plt.figure(figsize=(8, 6))
pos_g = nx.spring_layout(g)  # Define the layout for better visualization
nx.draw(g, pos_g, with_labels=True, node_size=500, node_color='lightgreen', font_size=10)
plt.title("Second Tree Structure")
plt.show()

# Calculate the degree centrality of each node in the first tree 'G'
degree_centrality_G = nx.degree_centrality(G)
print("Degree centrality for the first tree:")
for node, centrality in degree_centrality_G.items():
    print(f"Node {node}: {centrality}")

# Calculate the shortest path length between two nodes in the first tree 'G'
shortest_path_length_G = nx.shortest_path_length(G, 'p1', 'p10')
print("Shortest path length in the first tree:", shortest_path_length_G)

© www.soinside.com 2019 - 2024. All rights reserved.