我有分层数据,我想使用 Python 中的嵌套饼图将其可视化。数据由门、属和物种级别组成,我想创建一个嵌套饼图,其中每个级别代表图表中的一个环。
我已经尝试使用 Matplotlib 来实现这一点,但我在根据某些类别的丰富程度过滤和仅显示嵌套饼图的特定部分方面面临挑战。具体来说,我想:
最初显示所有门。 过滤并仅显示与特定门相关的属(例如厚壁菌门)。 仅过滤并显示与特定属相关的物种(例如芽孢杆菌)。 我尝试根据网上找到的建议修改代码,但没有得到所需的输出。
有人可以提供有关如何使用 Python 和 Matplotlib 实现此可视化的指导或代码示例吗?
任何帮助将不胜感激。谢谢!
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
# Read the Excel file
TissueS35_Analysis_Report = pd.read_excel("TissueS35_Analysis_Report.xlsx", sheet_name="Species")
# Select only the 'Phylum', 'Genus', and 'Species' columns
selected_columns = TissueS35_Analysis_Report[['Phylum', 'Genus', 'Species', 'Absolute Count']]
# Group by Phylum, Genus, and Species and sum the counts
grouped_data = selected_columns.groupby(['Phylum', 'Genus', 'Species']).sum().reset_index()
# Function to generate nested pie chart data
def nested_pie(df):
outd = {}
for level in range(3):
if level == 0:
gb = df.groupby('Phylum', sort=False).sum()
elif level == 1:
gb = df.groupby(['Phylum', 'Genus'], sort=False).sum()
else:
gb = df.groupby(['Phylum', 'Genus', 'Species'], sort=False).sum()
outd[level] = {'names': gb.index.get_level_values(level).tolist(), 'values': gb['Absolute Count'].values}
return outd
# Generate nested pie chart data
outd = nested_pie(grouped_data)
# Plot nested donut pie chart
fig, ax = plt.subplots()
# Plot Species level (Outermost ring)
sizes = outd[2]['values']
species_colors = plt.cm.tab20c.colors
species_labels = outd[2]['names']
ax.pie(sizes, radius=1, colors=species_colors, labels=species_labels, wedgeprops=dict(width=0.3, edgecolor='w'))
# Plot Genus level (Middle ring)
sizes = outd[1]['values']
genus_colors = plt.cm.tab20b.colors
genus_labels = outd[1]['names']
ax.pie(sizes, radius=0.7, colors=genus_colors, wedgeprops=dict(width=0.3, edgecolor='w'))
# Plot Phylum level (Innermost ring)
sizes = outd[0]['values']
phylum_colors = plt.cm.tab20.colors
phylum_labels = outd[0]['names']
ax.pie(sizes, radius=0.4, colors=phylum_colors, wedgeprops=dict(width=0.3, edgecolor='w'))
# Create legend for Phylum level
legend_handles = [Patch(color=color, label=label) for color, label in zip(phylum_colors, phylum_labels)]
ax.legend(handles=legend_handles, loc='center left', bbox_to_anchor=(1, 0.5), title='Phylum')
ax.set(aspect="equal")
plt.show()
small data refernce is as follow
Phylum Genus Species Absolute Count
168 Proteobacteria Pseudomonas Unclassified 73745
152 Proteobacteria Klebsiella Unclassified 10777
190 Proteobacteria Unclassified Unclassified 4932
132 Proteobacteria Chromobacterium Unclassified 1840
84 Firmicutes Lysinibacillus boronitolerans 1780
104 Firmicutes Weissella ghanensis 1101
10 Actinobacteria Corynebacterium Unclassified 703
138 Proteobacteria Cupriavidus gilardii 586
93 Firmicutes Staphylococcus Unclassified 568
183 Proteobacteria Stenotrophomonas geniculata 542
Selection deleted
实现此目的的一种方法是定义一个创建嵌套饼图的函数:
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
data = {
'Phylum': ['Proteobacteria', 'Proteobacteria', 'Proteobacteria', 'Proteobacteria',
'Firmicutes', 'Firmicutes', 'Actinobacteria', 'Proteobacteria',
'Firmicutes', 'Proteobacteria'],
'Genus': ['Pseudomonas', 'Klebsiella', 'Unclassified', 'Chromobacterium',
'Lysinibacillus', 'Weissella', 'Corynebacterium', 'Cupriavidus',
'Staphylococcus', 'Stenotrophomonas'],
'Species': ['Unclassified', 'Unclassified', 'Unclassified', 'Unclassified',
'boronitolerans', 'ghanensis', 'Unclassified', 'gilardii',
'Unclassified', 'geniculata'],
'Absolute Count': [73745, 10777, 4932, 1840, 1780, 1101, 703, 586, 568, 542]
}
df = pd.DataFrame(data)
def create_nested_pie(df):
fig, ax = plt.subplots()
size = 0.3
phylum_counts = df.groupby('Phylum')['Absolute Count'].sum()
phylum_labels = phylum_counts.index.tolist()
ax.pie(phylum_counts, labels=phylum_labels, radius=1, wedgeprops=dict(width=size, edgecolor='w'))
firmicutes_genus_counts = df[df['Phylum'] == 'Firmicutes'].groupby('Genus')['Absolute Count'].sum()
firmicutes_genus_labels = firmicutes_genus_counts.index.tolist()
ax.pie(firmicutes_genus_counts, labels=firmicutes_genus_labels, radius=1-size, wedgeprops=dict(width=size, edgecolor='w'),
labeldistance=0.7)
lysinibacillus_species_counts = df[(df['Phylum'] == 'Firmicutes') & (df['Genus'] == 'Lysinibacillus')].groupby('Species')['Absolute Count'].sum()
lysinibacillus_species_labels = lysinibacillus_species_counts.index.tolist()
ax.pie(lysinibacillus_species_counts, labels=lysinibacillus_species_labels, radius=1-2*size, wedgeprops=dict(width=size, edgecolor='w'),
labeldistance=0.4)
plt.show()
create_nested_pie(df)
这给你: