我有一个数据框,我需要为每个具有更多值的索引选择 0 级列: 我准备了一个例子来澄清。 对于每个元素 T1、T2、T3,我需要选择多索引列 (M1、M2、M3) 的级别 0,它的大小更大。 还介绍了预期的结果,以及我设法获得结果的方法。
import pandas as pd
import numpy as np
data= [[1,1,1,0,0,0,0,0,0],
[1,1,1,0,0,0,0,0,0],
[1,1,1,0,0,0,0,0,0],
[0,0,0,2,2,2,1,1,1],
[0,0,0,0,0,0,1,1,1],
[0,0,0,2,2,2,1,1,1],
[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0],
[0,0,0,1,1,1,0,0,0]]
columns =pd.MultiIndex.from_product([['M1','M2','M3'],['x','y','z']])
index = ['T1','T1','T1','T2','T2','T2','T3','T3','T3']
df = pd.DataFrame(data, index=index, columns = columns ).replace(to_replace = 0, value=np.nan)
df
M1 M2 M3
x y z x y z x y z
T1 1.00 1.00 1.00 NaN NaN NaN NaN NaN NaN
T1 1.00 1.00 1.00 NaN NaN NaN NaN NaN NaN
T1 1.00 1.00 1.00 NaN NaN NaN NaN NaN NaN
T2 NaN NaN NaN 2.00 2.00 2.00 1.00 1.00 1.00
T2 NaN NaN NaN NaN NaN NaN 1.00 1.00 1.00
T2 NaN NaN NaN 2.00 2.00 2.00 1.00 1.00 1.00
T3 NaN NaN NaN 1.00 1.00 1.00 NaN NaN NaN
T3 NaN NaN NaN 1.00 1.00 1.00 NaN NaN NaN
T3 NaN NaN NaN 1.00 1.00 1.00 NaN NaN NaN
#Expected RESULT
# x y z
# T1 1.00 1.00 1.00
# T1 1.00 1.00 1.00
# T1 1.00 1.00 1.00
# T2 1.00 1.00 1.00
# T2 1.00 1.00 1.00
# T2 1.00 1.00 1.00
# T3 1.00 1.00 1.00
# T3 1.00 1.00 1.00
# T3 1.00 1.00 1.00
# Approach
select = df.stack(level=0).count(axis=1).reset_index().groupby(['level_0','level_1']).sum().unstack(level=1).idxmax(axis=1)
# From this select ( correctly selected M for each T I would like to go to the final RESULTt.
select
Out[52]:
level_0
T1 (0, M1)
T2 (0, M3)
T3 (0, M2)
ffill
吗?
out = df.ffill(axis=1)[df.columns[-1][0]]
输出:
print(out)
x y z
T1 1.0 1.0 1.0
T1 1.0 1.0 1.0
T1 1.0 1.0 1.0
T2 1.0 1.0 1.0
T2 1.0 1.0 1.0
T2 1.0 1.0 1.0
T3 1.0 1.0 1.0
T3 1.0 1.0 1.0
T3 1.0 1.0 1.0