
问题描述 投票:0回答:2


import numpy as np

a = np.random.rand(100)



from scipy.stats import percentileofscore

n_quantiles = 5

def get_quantile(i, a, n_quantiles):
    if a[i] >= max(a):
        return n_quantiles - 1
    return int(percentileofscore(a, a[i])/(100/n_quantiles))

a_recoded = np.array([get_quantile(i, a, n_quantiles) for i in range(len(a))])

[0.04708996 0.86267278 0.23873192 0.02967989 0.42828385 0.58003015
 0.8996666  0.15359369 0.83094778 0.44272398 0.60211289 0.90286434
 0.40681163 0.91338397 0.3273745  0.00347029 0.37471307 0.72735901
 0.93974808 0.55937197 0.39297097 0.91470761 0.76796271 0.50404401
 0.1817242  0.78244809 0.9548256  0.78097562 0.90934337 0.89914752
 0.82899983 0.44116683 0.50885813 0.2691431  0.11676798 0.84971927
 0.38505195 0.7411976  0.51377242 0.50243197 0.89677377 0.69741088
 0.47880953 0.71116534 0.01717348 0.77641096 0.88127268 0.17925502
 0.53053573 0.16935597 0.65521692 0.19042794 0.21981197 0.01377195
 0.61553814 0.8544525  0.53521604 0.88391848 0.36010949 0.35964882
 0.29721931 0.71257335 0.26350287 0.22821314 0.8951419  0.38416004
 0.19277649 0.67774468 0.27084229 0.46862229 0.3107887  0.28511048
 0.32682302 0.14682896 0.10794566 0.58668243 0.16394183 0.88296862
 0.55442047 0.25508233 0.86670299 0.90549872 0.04897676 0.33042884
 0.4348465  0.62636481 0.48201213 0.49895892 0.36444648 0.01410316
 0.46770595 0.09498391 0.96793139 0.03931124 0.64286295 0.50934846
 0.59088907 0.56368594 0.7820928  0.77172038]

[0 4 1 0 2 3 4 0 4 2 3 4 2 4 1 0 1 3 4 2 1 4 3 2 0 3 4 3 4 4 4 2 2 1 0 4 1 
3 2 2 4 3 2 3 0 3 4 0 2 0 3 0 1 0 3 4 2 4 1 1 1 3 1 1 4 1 0 3 1 2 1 1 1 0 
0 3 0 4 2 1 4 4 0 1 2 3 2 2 1 0 2 0 4 0 3 2 3 2 3 3]

更新:只是想在R:How to get the x which belongs to a quintile?中说这很容易

python numpy scipy percentile


>>> a = np.random.random(20)
>>> N = len(a)
>>> nq = 5
>>> o = a.argpartition(np.arange(1, nq) * N // nq)
>>> out = np.empty(N, int)
>>> out[o] = np.arange(N) * nq // N
>>> a
array([0.61238649, 0.37168998, 0.4624829 , 0.28554766, 0.00098016,
       0.41979328, 0.62275886, 0.4254548 , 0.20380679, 0.762435  ,
       0.54054873, 0.68419986, 0.3424479 , 0.54971072, 0.06929464,
       0.51059431, 0.68448674, 0.97009023, 0.16780152, 0.17887862])
>>> out
array([3, 1, 2, 1, 0, 2, 3, 2, 1, 4, 3, 4, 1, 3, 0, 2, 4, 4, 0, 0])



import pandas as pd
import numpy as np

df = pd.DataFrame(np.random.rand(100))
df.columns = ['values']
# Apply the quantiles
gdf = df.groupby(pd.cut(df.loc[:, 'values'], np.arange(0, 1.2, 0.2)))['values'].apply(lambda x: list(x)).to_frame()
# Make use of the automatic indexing to assign quantile numbers
gdf.reset_index(drop=True, inplace=True)
# Re-expand the grouped list of values. Method provided by @Zero at
gdf['values'].apply(pd.Series).stack().reset_index(level=1, drop=True).to_frame('values').reset_index()
© 2019 - 2024. All rights reserved.