标题:如何在Python中使用嵌套饼图可视化分层数据?

问题描述 投票:0回答:1

我有分层数据,我想使用 Python 中的嵌套饼图将其可视化。数据由门、属和物种级别组成,我想创建一个嵌套饼图,其中每个级别代表图表中的一个环。

我已经尝试使用 Matplotlib 来实现这一点,但我在根据某些类别的丰富程度过滤和仅显示嵌套饼图的特定部分方面面临挑战。具体来说,我想:

最初显示所有门。 过滤并仅显示与特定门相关的属(例如厚壁菌门)。 仅过滤并显示与特定属相关的物种(例如芽孢杆菌)。 我尝试根据网上找到的建议修改代码,但没有得到所需的输出。

有人可以提供有关如何使用 Python 和 Matplotlib 实现此可视化的指导或代码示例吗?

任何帮助将不胜感激。谢谢!

import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch

# Read the Excel file
TissueS35_Analysis_Report = pd.read_excel("TissueS35_Analysis_Report.xlsx", sheet_name="Species")

# Select only the 'Phylum', 'Genus', and 'Species' columns
selected_columns = TissueS35_Analysis_Report[['Phylum', 'Genus', 'Species', 'Absolute Count']]

# Group by Phylum, Genus, and Species and sum the counts
grouped_data = selected_columns.groupby(['Phylum', 'Genus', 'Species']).sum().reset_index()

# Function to generate nested pie chart data
def nested_pie(df):
    outd = {}
    for level in range(3):
        if level == 0:
            gb = df.groupby('Phylum', sort=False).sum()
        elif level == 1:
            gb = df.groupby(['Phylum', 'Genus'], sort=False).sum()
        else:
            gb = df.groupby(['Phylum', 'Genus', 'Species'], sort=False).sum()
        outd[level] = {'names': gb.index.get_level_values(level).tolist(), 'values': gb['Absolute Count'].values}
    return outd

# Generate nested pie chart data
outd = nested_pie(grouped_data)

# Plot nested donut pie chart
fig, ax = plt.subplots()

# Plot Species level (Outermost ring)
sizes = outd[2]['values']
species_colors = plt.cm.tab20c.colors
species_labels = outd[2]['names']
ax.pie(sizes, radius=1, colors=species_colors, labels=species_labels, wedgeprops=dict(width=0.3, edgecolor='w'))

# Plot Genus level (Middle ring)
sizes = outd[1]['values']
genus_colors = plt.cm.tab20b.colors
genus_labels = outd[1]['names']
ax.pie(sizes, radius=0.7, colors=genus_colors, wedgeprops=dict(width=0.3, edgecolor='w'))

# Plot Phylum level (Innermost ring)
sizes = outd[0]['values']
phylum_colors = plt.cm.tab20.colors
phylum_labels = outd[0]['names']
ax.pie(sizes, radius=0.4, colors=phylum_colors, wedgeprops=dict(width=0.3, edgecolor='w'))

# Create legend for Phylum level
legend_handles = [Patch(color=color, label=label) for color, label in zip(phylum_colors, phylum_labels)]
ax.legend(handles=legend_handles, loc='center left', bbox_to_anchor=(1, 0.5), title='Phylum')

ax.set(aspect="equal")
plt.show()

small data refernce is  as follow 
Phylum             Genus         Species  Absolute Count
168  Proteobacteria       Pseudomonas    Unclassified           73745
152  Proteobacteria        Klebsiella    Unclassified           10777
190  Proteobacteria      Unclassified    Unclassified            4932
132  Proteobacteria   Chromobacterium    Unclassified            1840
84       Firmicutes    Lysinibacillus  boronitolerans            1780
104      Firmicutes         Weissella       ghanensis            1101
10   Actinobacteria   Corynebacterium    Unclassified             703
138  Proteobacteria       Cupriavidus        gilardii             586
93       Firmicutes    Staphylococcus    Unclassified             568
183  Proteobacteria  Stenotrophomonas      geniculata             542
Selection deleted

如果可能,我该如何处理下面给出的覆盖图像,我将感谢您的帮助,问候

python pandas matplotlib
1个回答
0
投票

实现此目的的一种方法是定义一个创建嵌套饼图的函数:

import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

data = {
    'Phylum': ['Proteobacteria', 'Proteobacteria', 'Proteobacteria', 'Proteobacteria',
               'Firmicutes', 'Firmicutes', 'Actinobacteria', 'Proteobacteria',
               'Firmicutes', 'Proteobacteria'],
    'Genus': ['Pseudomonas', 'Klebsiella', 'Unclassified', 'Chromobacterium',
              'Lysinibacillus', 'Weissella', 'Corynebacterium', 'Cupriavidus',
              'Staphylococcus', 'Stenotrophomonas'],
    'Species': ['Unclassified', 'Unclassified', 'Unclassified', 'Unclassified',
                'boronitolerans', 'ghanensis', 'Unclassified', 'gilardii',
                'Unclassified', 'geniculata'],
    'Absolute Count': [73745, 10777, 4932, 1840, 1780, 1101, 703, 586, 568, 542]
}

df = pd.DataFrame(data)


def create_nested_pie(df):
    fig, ax = plt.subplots()
    size = 0.3
    phylum_counts = df.groupby('Phylum')['Absolute Count'].sum()
    phylum_labels = phylum_counts.index.tolist()
    ax.pie(phylum_counts, labels=phylum_labels, radius=1, wedgeprops=dict(width=size, edgecolor='w'))

    firmicutes_genus_counts = df[df['Phylum'] == 'Firmicutes'].groupby('Genus')['Absolute Count'].sum()
    firmicutes_genus_labels = firmicutes_genus_counts.index.tolist()
    ax.pie(firmicutes_genus_counts, labels=firmicutes_genus_labels, radius=1-size, wedgeprops=dict(width=size, edgecolor='w'),
           labeldistance=0.7)

    lysinibacillus_species_counts = df[(df['Phylum'] == 'Firmicutes') & (df['Genus'] == 'Lysinibacillus')].groupby('Species')['Absolute Count'].sum()
    lysinibacillus_species_labels = lysinibacillus_species_counts.index.tolist()
    ax.pie(lysinibacillus_species_counts, labels=lysinibacillus_species_labels, radius=1-2*size, wedgeprops=dict(width=size, edgecolor='w'),
           labeldistance=0.4)

    plt.show()

create_nested_pie(df)

这给你:

© www.soinside.com 2019 - 2024. All rights reserved.