如何说明两张图?

问题描述 投票:0回答:1

我想显示两张图表,一张是收入最高的 10% 的图表,一张是收入最低的 10% 的图表,但我遇到的问题是只显示了一张图表。

最贫困人口的数据样本是: "#""德国""、""DEU""、""最低 10% 持有的收入份额""、""SI.DST.FRST.10""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""3.7""、" "3.7""、""3.7""、""3.4""、""3.4""、""3.7""、""3.6""、""3.6""、""3.5""、""3.5 ""、""3.5""、""3.4""、""3.4""、""3.4""、""3.3""、""3.3""、""3.4""、""3.4"" ,""3.3"",""3.4"",""3.4"",""3.2"",""3.3"",""3.2"",""3.1"",""3.1""," "2.8""、""3.1""、""3.1""、""""、""""、""""、"

最富有的人的数据样本是: "德国、""DEU""、"最高 10% 持有的收入份额""、""SI.DST.10TH.10""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""23.2""、""23.1""、 “22.8”、“22.9”、“22.7”、“22.3”、“22.4”、“22.3”、“23.1”、“22.9”、“” 23.9""、""23.7""、""23.9""、""24""、""25.1""、""24.7""、""25.1""、""24.7""、""24" "、""24""、""24.5""、""24.4""、""25""、""24.1""、""24.8""、""24.6""、""24.8""、 ""25.2"",""25.2"","""","""","""","

我更新的代码是:

import csv
import matplotlib.pyplot as plt

def read_income_shares(file_name, wealthiest_file_name):
    income_shares = {}
    countries = []
    years = []

    # Read data from the first file
    try:
        with open(file_name, 'r', encoding='utf-8') as file:
            reader = csv.reader(file, quoting=csv.QUOTE_NONE)

            # Skip the first 4 rows
            for _ in range(4):
                next(reader)

            # Read the header to get the years
            header = next(reader)
            years = [int(year.strip('"')) for year in header[4:] if year.strip('"').isdigit()]

            for i, line in enumerate(reader, start=5):
                try:
                    country_name = line[0].strip('"')

                    values = []
                    for val in line[4:]:
                        val = val.replace('""""', '0').replace('"', '').strip()
                        if val and val.replace('.', '').isdigit():
                            values.append(float(val))
                        else:
                            values.append(0)

                    income_shares.setdefault(country_name, {}).update({'Values_Poorest': values})
                    if country_name not in countries:
                        countries.append(country_name)
                except Exception as e:
                    print(f"Error in line {i}: {e}")
                    print(f"Line content: {line}")
                    print(f"Warning: Unexpected data structure for {country_name}")

    except FileNotFoundError:
        print(f"Error: File '{file_name}' not found.")
    except Exception as e:
        print(f"Error: An unexpected error occurred: {e}")

    # Read data from the second file
    try:
        with open(wealthiest_file_name, 'r', encoding='utf-8') as file:
            reader = csv.reader(file, quoting=csv.QUOTE_NONE)

            # Skip the first 4 rows
            for _ in range(4):
                next(reader)

            for i, line in enumerate(reader, start=5):
                try:
                    country_name = line[0].strip('"')

                    values_wealthiest = []
                    values_poorest = []
                    for val in line[4:]:
                        val = val.replace('""""', '0').replace('"', '').strip()
                        if val and val.replace('.', '').isdigit():
                            values_poorest.append(float(val))
                        else:
                            values_poorest.append(0)   
                        if val and val.replace('.', '').isdigit():
                            values_wealthiest.append(float(val))
                        else:
                            values_wealthiest.append(0)
                    if country_name in income_shares:
                        income_shares[country_name]['Values_Poorest'] = values_poorest
                    else:
                        income_shares.setdefault(country_name, {}).update({'Values_Poorest': values_poorest})
                        if country_name not in countries:
                            countries.append(country_name)

                    if country_name in income_shares:
                        income_shares[country_name]['Values_Wealthiest'] = values_wealthiest
                    else:
                        income_shares.setdefault(country_name, {}).update({'Values_Wealthiest': values_wealthiest})
                        if country_name not in countries:
                            countries.append(country_name)

                except Exception as e:
                    print(f"Error in line {i}: {e}")
                    print(f"Line content: {line}")
                    print(f"Warning: Unexpected data structure for {country_name}")

    except FileNotFoundError:
        print(f"Error: File '{wealthiest_file_name}' not found.")
    except Exception as e:
        print(f"Error: An unexpected error occurred: {e}")

    return income_shares, countries, years

def plot_income_distribution(countries):
    income_data_one, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\one.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')
    income_data_two, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')

    formatted_countries = []  # Collect formatted country names

    for country in countries:
        # Cleaning up the country name
        country_formatted = country.strip('" \ufeff')
        formatted_countries.append(country_formatted)  # Collect formatted country names

        # Check if data for the country is available in both files
        if country_formatted in income_data_one and country_formatted in income_data_two:
            # Add debugging print statements
            print("Keys for {} in one.txt: {}".format(country_formatted, income_data_one[country_formatted].keys()))
            print("Keys for {} in two.txt: {}".format(country_formatted, income_data_two[country_formatted].keys()))

            # Process the data for one.txt
            if 'Values_Poorest' in income_data_one[country_formatted]:
                income_data_poorest_one = income_data_one[country_formatted]['Values_Poorest']

                # Choose only Years from 1960 to 2022
                years_to_plot = list(range(1960, 2023))

                # Convert values to percentage
                income_data_poorest_percent_one = [val for val in income_data_poorest_one]

                # Filter out values equal to 0
                non_zero_years_poorest_one = [year for year, val in zip(years_to_plot, income_data_poorest_percent_one) if val > 0]
                non_zero_percentages_poorest_one = [val for val in income_data_poorest_percent_one if val > 0]

                # Print the data for debugging
                print("Years for {} in one.txt: {}".format(country_formatted, non_zero_years_poorest_one))
                print("Poorest Percentages for {} in one.txt: {}".format(country_formatted, non_zero_percentages_poorest_one))

                # Plot only if data is available for the year
                plt.plot(non_zero_years_poorest_one, non_zero_percentages_poorest_one, label='{} - Poorest 10%'.format(country_formatted), linestyle='dashed')

            # Process the data for two.txt
            if 'Values_Wealthiest' in income_data_two[country_formatted]:
                income_data_wealthiest_two = income_data_two[country_formatted]['Values_Wealthiest']

                # Choose only Years from 1960 to 2022
                years_to_plot = list(range(1960, 2023))

                # Convert values to percentage
                income_data_wealthiest_percent_two = [val for val in income_data_wealthiest_two]

                # Filter out values equal to 0
                non_zero_years_wealthiest_two = [year for year, val in zip(years_to_plot, income_data_wealthiest_percent_two) if val > 0]
                non_zero_percentages_wealthiest_two = [val for val in income_data_wealthiest_percent_two if val > 0]

                # Print the data for debugging
                print("Years for {} in two.txt: {}".format(country_formatted, non_zero_years_wealthiest_two))
                print("Wealthiest Percentages for {} in two.txt: {}".format(country_formatted, non_zero_percentages_wealthiest_two))

                # Plot only if data is available for the year
                plt.plot(non_zero_years_wealthiest_two, non_zero_percentages_wealthiest_two, label='{} - Wealthiest 10%'.format(country_formatted))

    plt.title('Income Distribution Over Years')
    plt.xlabel('Year')
    plt.ylabel('Income Share (%)')
    plt.ylim(0, 100)  # Set the Y-axis to 0 to 100 percent
    plt.axis([1960, 2022, 0, 100])
    plt.grid(True)

    # Display legend only if data is present.
    if any(formatted_country in income_data_one or formatted_country in income_data_two for formatted_country in formatted_countries):
        plt.legend(loc='upper left', bbox_to_anchor=(1, 1))  # Move legend outside the plot area

    plt.savefig('income_distribution_plot.png', bbox_inches='tight')  # Save the plot as a PNG file
    plt.show()

# Example call
countries_to_plot = ['"Germany"']
plot_income_distribution(countries_to_plot)
python spyder
1个回答
0
投票

我可以(部分)重现并修复。

首先,您的数据文件已损坏,无法由 csv 模块以当前格式处理。 csv 模块非常适合处理复杂数据只要它们尊重 csv 规则,但这里的引号没有正确平衡。因此,每一行都被视为一个单引号字段,这不是您所期望的。 正确的方法是修复数据文件,但作为解决方法,您可以要求 csv 模块忽略任何引号并将它们从数据字段中删除,这是您的代码已经执行的操作。只需使用

quoting=csv.QUOTE_NONE

打开阅读器(对于这两个文件...):

...
            reader = csv.reader(file, quoting=csv.QUOTE_NONE)
...

这应该足以正确获取每行的预期字段数。

但是你有第二个问题:文件包含 0-100 范围内的数据,但你将它们乘以 100。结果你的数据进入 0-10000 范围并绘制在图之外......

作为解决方法,您可以使用:

# Convert values to percentage income_data_country_percent = [val for val in income_data_country] income_data_wealthiest_percent = [val for val in income_data_wealthiest]

或者直接对原始值进行处理。

但是经过这两种解决方法后,我可以获得一个情节。

您应该从中学到什么:您的代码已经有一些调试打印。如果您添加了更多内容,特别是如果您打印了标题行,您会立即明白您只有一个字段 - 这就是我所做的...

© www.soinside.com 2019 - 2024. All rights reserved.