Can someone suggest a better shorter method to apply the same given below code.The given code were implemented for four columns, with different window of rolling function.And now I want to implement the same with 10 columns and then merge them later.plsss help!
所以,我有数据与列如Sr.No.,公司名称,PE,价格_bv,mpcap_ns和ev_ebidta.i将是感激,如果有人帮助我。
df2 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).mean()).reset_index(name='Average_pe_10_yrs')
df222 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).std()).reset_index(name='Stdev_pe_10_yrs')
df3 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).mean()).reset_index(name='Average_price_bv_10_yrs')
df333 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).std()).reset_index(name='Stdev_price_bv_10_yrs')
#print(df333)
df4 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).mean()).reset_index(name='Average_mcap_ns_10_yrs')
#print(df4)
df444 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).std()).reset_index(name='Stdev_mcap_ns_10_yrs')
#print(df444)
df5 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).mean()).reset_index(name='Average_ev_ebidta_10_yrs')
#print(df5)
df555 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=3650).std()).reset_index(name='Stdev_ev_ebidta_10_yrs')
#print(df555)
df6 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=10).mean()).reset_index(name='Average_pe_10_days')
#print(df6)
df7 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=20).mean()).reset_index(name='Average_pe_20_days')
#print(df7)
df8 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=30).mean()).reset_index(name='Average_pe_30_days')
#print(df8)
df9 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=40).mean()).reset_index(name='Average_pe_40_days')
#print(df9)
df10 = df1.groupby(level=0)['pe'].apply(lambda x: x.shift().rolling(min_periods=1,window=50).mean()).reset_index(name='Average_pe_50_days')
#print(df10)
df14 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=10).mean()).reset_index(name='Average_price_bv_10_days')
#print(df14)
df15 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=20).mean()).reset_index(name='Average_price_bv_20_days')
df16 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=30).mean()).reset_index(name='Average_price_bv_30_days')
df17 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=40).mean()).reset_index(name='Average_price_bv_40_days')
df18 = df1.groupby(level=0)['price_bv'].apply(lambda x: x.shift().rolling(min_periods=1,window=50).mean()).reset_index(name='Average_price_bv_50_days')
df22 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=10).mean()).reset_index(name='Average_mcap_ns_10_days')
df23 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=20).mean()).reset_index(name='Average_mcap_ns_20_days')
df24 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=30).mean()).reset_index(name='Average_mcap_ns_30_days')
df25 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=40).mean()).reset_index(name='Average_mcap_ns_40_days')
df26 = df1.groupby(level=0)['mcap_ns'].apply(lambda x: x.shift().rolling(min_periods=1,window=50).mean()).reset_index(name='Average_mcap_ns_50_days')
df30 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=10).mean()).reset_index(name='Average_ev_ebidta_10_days')
#print(df30)
df31 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=20).mean()).reset_index(name='Average_ev_ebidta_20_days')
#print(df31)
df32 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=30).mean()).reset_index(name='Average_ev_ebidta_30_days')
#print(df32)
df33 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=40).mean()).reset_index(name='Average_ev_ebidta_40_days')
#print(df33)
df34 = df1.groupby(level=0)['ev_ebidta'].apply(lambda x: x.shift().rolling(min_periods=1,window=50).mean()).reset_index(name='Average_ev_ebidta_50_days')
#print(df34)
编辑:thx的代码Parfait.给定的代码运行了3个小时,现在还没有给我任何输出,我应该做什么? plss帮助
考虑使用一个定义的函数,将单个列和lambda调用替换为 groupby().apply()
. 在该函数中,用以下方法遍历列、窗口和聚合参数的所有组合 itertools.product
. 以下假设你使用的Python 3.6+支持F-字符串,否则用 str.format
:
from itertools import product
...
def calc_rolling(grp):
cols = ['pe', 'price_bv', 'mcap_ns', 'ev_ebidta']
for col, agg in product(cols, ['mean', 'std']):
t = {'mean': 'Average', 'std': 'Stdev'}
grp[f"{t[agg]}_{col}_10_yrs"] = grp[col].shift().rolling(min_periods=1, window=3650).apply(agg)
for col, w in product(cols, [10, 20, 30, 40, 50]):
grp[f"Average_{col}_{w}_days"] = grp[col].shift().rolling(min_periods=1, window=w).mean()
return grp
# BUILD DATA FRAME OF 34 COLUMNS
windows_df = df1.groupby(level=0).apply(calc_rolling)
产出
print(windows_df.columns)
# Index(['Sr.No.', 'Company Name', 'pe', 'price_bv', 'mcap_ns', 'ev_ebidta',
# 'Average_pe_10_yrs', 'Stdev_pe_10_yrs', 'Average_price_bv_10_yrs',
# 'Stdev_price_bv_10_yrs', 'Average_mcap_ns_10_yrs',
# 'Stdev_mcap_ns_10_yrs', 'Average_ev_ebidta_10_yrs',
# 'Stdev_ev_ebidta_10_yrs', 'Average_pe_10_days', 'Average_pe_20_days',
# 'Average_pe_30_days', 'Average_pe_40_days', 'Average_pe_50_days',
# 'Average_price_bv_10_days', 'Average_price_bv_20_days',
# 'Average_price_bv_30_days', 'Average_price_bv_40_days',
# 'Average_price_bv_50_days', 'Average_mcap_ns_10_days',
# 'Average_mcap_ns_20_days', 'Average_mcap_ns_30_days',
# 'Average_mcap_ns_40_days', 'Average_mcap_ns_50_days',
# 'Average_ev_ebidta_10_days', 'Average_ev_ebidta_20_days',
# 'Average_ev_ebidta_30_days', 'Average_ev_ebidta_40_days',
# 'Average_ev_ebidta_50_days'],
# dtype='object')
或者,直接分配列而不需要 groupby().apply()
:
cols = ['pe', 'price_bv', 'mcap_ns', 'ev_ebidta']
for col, agg in product(cols, ['mean', 'std']):
t = {'mean': 'Average', 'std': 'Stdev'}
df1[f"{t[agg]}_{col}_10_yrs"] = df1[col].groupby(level=0).shift().rolling(min_periods=1, window=3650).apply(agg)
for col, w in product(cols, [10, 20, 30, 40, 50]):
df1[f"Average_{col}_{w}_days"] = df1[col].groupby(level=0).shift().rolling(min_periods=1, window=w).mean()
print(df1.columns)