我想显示两张图表,一张是收入最高的 10% 的图表,一张是收入最低的 10% 的图表,但我遇到的问题是只显示了一张图表。
最贫困人口的数据样本是: "#""德国""、""DEU""、""最低 10% 持有的收入份额""、""SI.DST.FRST.10""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""3.7""、" "3.7""、""3.7""、""3.4""、""3.4""、""3.7""、""3.6""、""3.6""、""3.5""、""3.5 ""、""3.5""、""3.4""、""3.4""、""3.4""、""3.3""、""3.3""、""3.4""、""3.4"" ,""3.3"",""3.4"",""3.4"",""3.2"",""3.3"",""3.2"",""3.1"",""3.1""," "2.8""、""3.1""、""3.1""、""""、""""、""""、"
最富有的人的数据样本是: "德国、""DEU""、"最高 10% 持有的收入份额""、""SI.DST.10TH.10""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""""、""""、"" ""、""""、""""、""""、""""、""""、""""、""""、""23.2""、""23.1""、 “22.8”、“22.9”、“22.7”、“22.3”、“22.4”、“22.3”、“23.1”、“22.9”、“” 23.9""、""23.7""、""23.9""、""24""、""25.1""、""24.7""、""25.1""、""24.7""、""24" "、""24""、""24.5""、""24.4""、""25""、""24.1""、""24.8""、""24.6""、""24.8""、 ""25.2"",""25.2"","""","""","""","
import csv
import matplotlib.pyplot as plt
def read_income_shares(file_name, wealthiest_file_name):
income_shares = {}
countries = []
years = []
# Read data from the first file
try:
with open(file_name, 'r', encoding='utf-8') as file:
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
# Skip the first 4 rows
for _ in range(4):
next(reader)
# Read the header to get the years
header = next(reader)
years = [int(year.strip('"')) for year in header[4:] if year.strip('"').isdigit()]
for i, line in enumerate(reader, start=5):
try:
country_name = line[0].strip('"')
values = []
for val in line[4:]:
val = val.replace('""""', '0').replace('"', '').strip()
if val and val.replace('.', '').isdigit():
values.append(float(val))
else:
values.append(0)
income_shares.setdefault(country_name, {}).update({'Values_Poorest': values})
if country_name not in countries:
countries.append(country_name)
except Exception as e:
print(f"Error in line {i}: {e}")
print(f"Line content: {line}")
print(f"Warning: Unexpected data structure for {country_name}")
except FileNotFoundError:
print(f"Error: File '{file_name}' not found.")
except Exception as e:
print(f"Error: An unexpected error occurred: {e}")
# Read data from the second file
try:
with open(wealthiest_file_name, 'r', encoding='utf-8') as file:
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
# Skip the first 4 rows
for _ in range(4):
next(reader)
for i, line in enumerate(reader, start=5):
try:
country_name = line[0].strip('"')
values_wealthiest = []
values_poorest = []
for val in line[4:]:
val = val.replace('""""', '0').replace('"', '').strip()
if val and val.replace('.', '').isdigit():
values_poorest.append(float(val))
else:
values_poorest.append(0)
if val and val.replace('.', '').isdigit():
values_wealthiest.append(float(val))
else:
values_wealthiest.append(0)
if country_name in income_shares:
income_shares[country_name]['Values_Poorest'] = values_poorest
else:
income_shares.setdefault(country_name, {}).update({'Values_Poorest': values_poorest})
if country_name not in countries:
countries.append(country_name)
if country_name in income_shares:
income_shares[country_name]['Values_Wealthiest'] = values_wealthiest
else:
income_shares.setdefault(country_name, {}).update({'Values_Wealthiest': values_wealthiest})
if country_name not in countries:
countries.append(country_name)
except Exception as e:
print(f"Error in line {i}: {e}")
print(f"Line content: {line}")
print(f"Warning: Unexpected data structure for {country_name}")
except FileNotFoundError:
print(f"Error: File '{wealthiest_file_name}' not found.")
except Exception as e:
print(f"Error: An unexpected error occurred: {e}")
return income_shares, countries, years
def plot_income_distribution(countries):
income_data_one, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\one.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')
income_data_two, _, _ = read_income_shares('C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt', 'C:\\Users\\Fabian\\Desktop\\Python Ausarbeitung\\Bravo\\two.txt')
formatted_countries = [] # Collect formatted country names
for country in countries:
# Cleaning up the country name
country_formatted = country.strip('" \ufeff')
formatted_countries.append(country_formatted) # Collect formatted country names
# Check if data for the country is available in both files
if country_formatted in income_data_one and country_formatted in income_data_two:
# Add debugging print statements
print("Keys for {} in one.txt: {}".format(country_formatted, income_data_one[country_formatted].keys()))
print("Keys for {} in two.txt: {}".format(country_formatted, income_data_two[country_formatted].keys()))
# Process the data for one.txt
if 'Values_Poorest' in income_data_one[country_formatted]:
income_data_poorest_one = income_data_one[country_formatted]['Values_Poorest']
# Choose only Years from 1960 to 2022
years_to_plot = list(range(1960, 2023))
# Convert values to percentage
income_data_poorest_percent_one = [val for val in income_data_poorest_one]
# Filter out values equal to 0
non_zero_years_poorest_one = [year for year, val in zip(years_to_plot, income_data_poorest_percent_one) if val > 0]
non_zero_percentages_poorest_one = [val for val in income_data_poorest_percent_one if val > 0]
# Print the data for debugging
print("Years for {} in one.txt: {}".format(country_formatted, non_zero_years_poorest_one))
print("Poorest Percentages for {} in one.txt: {}".format(country_formatted, non_zero_percentages_poorest_one))
# Plot only if data is available for the year
plt.plot(non_zero_years_poorest_one, non_zero_percentages_poorest_one, label='{} - Poorest 10%'.format(country_formatted), linestyle='dashed')
# Process the data for two.txt
if 'Values_Wealthiest' in income_data_two[country_formatted]:
income_data_wealthiest_two = income_data_two[country_formatted]['Values_Wealthiest']
# Choose only Years from 1960 to 2022
years_to_plot = list(range(1960, 2023))
# Convert values to percentage
income_data_wealthiest_percent_two = [val for val in income_data_wealthiest_two]
# Filter out values equal to 0
non_zero_years_wealthiest_two = [year for year, val in zip(years_to_plot, income_data_wealthiest_percent_two) if val > 0]
non_zero_percentages_wealthiest_two = [val for val in income_data_wealthiest_percent_two if val > 0]
# Print the data for debugging
print("Years for {} in two.txt: {}".format(country_formatted, non_zero_years_wealthiest_two))
print("Wealthiest Percentages for {} in two.txt: {}".format(country_formatted, non_zero_percentages_wealthiest_two))
# Plot only if data is available for the year
plt.plot(non_zero_years_wealthiest_two, non_zero_percentages_wealthiest_two, label='{} - Wealthiest 10%'.format(country_formatted))
plt.title('Income Distribution Over Years')
plt.xlabel('Year')
plt.ylabel('Income Share (%)')
plt.ylim(0, 100) # Set the Y-axis to 0 to 100 percent
plt.axis([1960, 2022, 0, 100])
plt.grid(True)
# Display legend only if data is present.
if any(formatted_country in income_data_one or formatted_country in income_data_two for formatted_country in formatted_countries):
plt.legend(loc='upper left', bbox_to_anchor=(1, 1)) # Move legend outside the plot area
plt.savefig('income_distribution_plot.png', bbox_inches='tight') # Save the plot as a PNG file
plt.show()
# Example call
countries_to_plot = ['"Germany"']
plot_income_distribution(countries_to_plot)
我可以(部分)重现并修复。
首先,您的数据文件已损坏,无法由 csv 模块以当前格式处理。 csv 模块非常适合处理复杂数据只要它们尊重 csv 规则,但这里的引号没有正确平衡。因此,每一行都被视为一个单引号字段,这不是您所期望的。 正确的方法是修复数据文件,但作为解决方法,您可以要求 csv 模块忽略任何引号并将它们从数据字段中删除,这是您的代码已经执行的操作。只需使用
quoting=csv.QUOTE_NONE
打开阅读器(对于这两个文件...):
...
reader = csv.reader(file, quoting=csv.QUOTE_NONE)
...
这应该足以正确获取每行的预期字段数。
但是你有第二个问题:文件包含 0-100 范围内的数据,但你将它们乘以 100。结果你的数据进入 0-10000 范围并绘制在图之外......
作为解决方法,您可以使用:
# Convert values to percentage
income_data_country_percent = [val for val in income_data_country]
income_data_wealthiest_percent = [val for val in income_data_wealthiest]
或者直接对原始值进行处理。
但是经过这两种解决方法后,我可以获得一个情节。
您应该从中学到什么:您的代码已经有一些调试打印。如果您添加了更多内容,特别是如果您打印了标题行,您会立即明白您只有一个字段 - 这就是我所做的...