我正在尝试根据“类别”列中的值对 pandas 数据帧枢轴的多列中的值进行求和。但是,由于数据透视表中的日期列,我收到错误。 如何按类别汇总所有转置的每月列?
可执行代码:
# test groupbysum
import pandas as pd
import numpy as np
df = pd.DataFrame({
'JDate':["2022-01-31","2022-12-05","2023-11-10","2023-12-03","2024-01-16","2024-01-06","2011-01-04"],
# 'Month':[1,12,11,12,1,1],
'Code':[None,'John Johnson',np.nan,'John Smith','Mary Williams','ted bundy','George Lucas'],
'Unit Price':[np.nan,200,None,56,75,65,60],
'Quantity':[1500, 140000, 1400000, 455, 648, 759,1000],
'Amount':[100, 10000, 100000, 5, 48, 59,449],
'Invoice':['soccer','basketball','baseball','football','baseball','ice hockey','football'],
'energy':[100.,100,100,54,98,3,45],
'Category':['alpha','bravo','kappa','alpha','bravo','bravo','kappa']
})
df["JDate"] = pd.to_datetime(df["JDate"])
df["JYearMonth"] = df['JDate'].dt.to_period('M')
index_to_use = ['Category','Code','Invoice','Unit Price','JDate']
values_to_use = ['Amount']
columns_to_use = ['JYearMonth']
df2 = df.pivot_table(index=index_to_use,
values=values_to_use,
columns=columns_to_use)
df2 = df2['Amount'].reset_index()
df2_sum = df2.groupby('Category').sum()
writer= pd.ExcelWriter(
"t2test18.xlsx",
engine='xlsxwriter'
)
df.to_excel(writer,sheet_name="t2",index=True)
df2.to_excel(writer,sheet_name="t2test",index=True)
df2_sum.to_excel(writer,sheet_name="t2testsum",index=True)
workbook = writer.book
worksheet = writer.sheets["t2"]
fmt_header = workbook.add_format({
'bold':True,
'text_wrap':True,
'valign':'top',
'fg_color': '#5DADE2',
'font_color':'#2659D9',
'border':1
})
writer.close()
IIUC你可以尝试:
df2_sum = df2.groupby("Category").agg(
{
"Code": list, # or use `",".join` instead of `list`
"Invoice": list,
"Unit Price": list,
"JDate": list,
**{c: "sum" for c in df2.columns if not isinstance(c, str)},
}
)
print(df2_sum)
打印:
JYearMonth Code Invoice Unit Price JDate 2011-01 2022-12 2023-12 2024-01
Category
alpha [John Smith] [football] [56.0] [2023-12-03 00:00:00] 0.0 0.0 5.0 0.0
bravo [John Johnson, Mary Williams, ted bundy] [basketball, baseball, ice hockey] [200.0, 75.0, 65.0] [2022-12-05 00:00:00, 2024-01-16 00:00:00, 2024-01-06 00:00:00] 0.0 10000.0 0.0 107.0
kappa [George Lucas] [football] [60.0] [2011-01-04 00:00:00] 449.0 0.0 0.0 0.0