以下情况需要帮助:
csv源文件中的Excel数据如下:
我想清理并重新排列它,使其看起来像:
根据要求添加凌乱和干净的字典
import pandas as pd
file_path = r"C:\Users\abcd\OneDrive\Documents"
messy_data_file_name = 'messy_data.csv'
cleaned_data_file_name = 'cleaned_Data.csv'
messy_df1 = pd.read_csv(file_path + '\\' + messy_data_file_name, skiprows=0)
messy_df1.to_dict()
{
'Company Name ': {0: 'Report Month', 1: 'Report Year', 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan}
'Toyota': {0: 'Jan ', 1: '2023', 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan},
'Unnamed: 2': {0: nan, 1: nan, 2: nan, 3: nan, 4: 'Total Inventory Cost', 5: 'Sold Inventory Cost', 6: 'Total Profit Incurred', 7: 'Total Manpower Expense',8: 'Total Infra Expense',9: 'Total Transaction Expense'},
'Unnamed: 3': {0: nan, 1: nan, 2: nan, 3: nan, 4: nan, 5: nan, 6: nan, 7: nan, 8: nan, 9: nan}, 'Unnamed: 4': {0: nan, 1: nan, 2: 'Sales in', 3: 'Ohio Showroom', 4: '344469', 5: '300690', 6: '43779', 7: '15000', 8: '500',9: '110'},
'Unnamed: 5': {0: nan, 1: nan, 2: 'Sales in', 3: 'Wincosin Showroom ', 4: '11261', 5: '9050', 6: '2211', 7: '1000', 8: '200', 9: '55'}, 'Unnamed: 6': {0: nan, 1: nan, 2: 'Service in', 3: 'Ohio Showroom', 4: '659923', 5: '612231', 6: '47692', 7: '12000', 8: '400', 9: '110'},
'Unnamed: 7': {0: nan, 1: nan, 2: 'Service in', 3: 'Wincosin Showroom ', 4: '15656', 5: '12812', 6: '2844', 7: '1200', 8: '250', 9: '45'}
}
###################################################
cleaned_df1 = pd.read_csv(file_path + '\\' + cleaned_data_file_name, skiprows=0)
cleaned_df1.to_dict()
{
'Company Name': {0: 'Toyota ', 1: 'Toyota ', 2: 'Toyota ', 3: 'Toyota ', 4: 'Toyota ', 5: 'Toyota '},
'Report Month': {0: 'January', 1: 'January', 2: 'January', 3: 'January', 4: 'January', 5: 'January'},
'Report Year ': {0: 2023, 1: 2023, 2: 2023, 3: 2023, 4: 2023, 5: 2023},
'Parameter': {0: 'Total Inventory Cost', 1: 'Sold Inventory Cost', 2: 'Total Profit Incurred', 3: 'Total Manpower Expense', 4: 'Total Infra Expense', 5: 'Total Transaction Expense'},
'Sales in Ohio Showroom': {0: 344469, 1: 300690, 2: 43779, 3: 15000, 4: 500, 5: 110},
'Sales in Wincosin Showroom': {0: 11261, 1: 9050, 2: 2211, 3: 1000, 4: 200, 5: 55},
'Service in Ohio Showroom': {0: 659923, 1: 612231, 2: 47692, 3: 12000, 4: 400, 5: 110},
'Service in Wincosin Showroom ': {0: 15656, 1: 12812, 2: 2844, 3: 1200, 4: 250, 5: 45}
}
你可以这样做 从 csv 文件读取数据
df=pd.read_csv(r"C:\Users\Aparna\Downloads\messydata.csv",skiprows=5,header=None)
# change location path of your file
df1=df.dropna(axis=1)
# since your column names are not alligned correctly try to make new column names like this
df1.columns = ["Parameter", "Sales in Ohio Showroom", "Sales in Wincosin Showroom", "Service in Ohio Showroom","Service in Wincosin Showroom"]
df1["Company"]="Toyota"
df1["Report Month"]="January"
df1["Report Year"]=2023
df1 = df1.reindex(columns=["Company","Report Month","Report Year","Parameter", "Sales in Ohio Showroom", "Sales in Wincosin Showroom", "Service in Ohio Showroom","Service in Wincosin Showroom"])
# finally export dataframe in to csv file
df1.to_csv(r"C:\Users\abcd\OneDrive\Documents\file_path.csv")
output:[![enter image description here][1]][1]
[1]: https://i.stack.imgur.com/zf7OQ.png