蜂巢中按组划分的基尼系数

问题描述 投票:0回答:1

我有一个包含三列的表格

group
person
score
我想计算每组
score
的基尼系数。这样我就可以根据
score
衡量人的不平等,最有效的方法是什么?

sql hive hql
1个回答
0
投票

这是使用 Oracle SQL 代码完成的基尼计算(使用随机样本数据):

WITH      --  S a m p l e    D a t a :
    balances AS 
        ( Select DATE '2018-01-01' as A_DATE, IN_VAL as BALANCE
          From    ( Select 11 as IN_VAL From Dual Union All
                    Select 12 From Dual Union All
                    Select 13 From Dual Union All
                    Select 14 From Dual Union All
                    Select 15 From Dual )  -- Gini: 0.061538   [11,12,13,14,15]
        Union All
          Select DATE '2018-01-02' as A_DATE, IN_VAL as BALANCE
          From    ( Select 33 as IN_VAL From Dual Union All
                    Select 33 From Dual Union All
                    Select 33 From Dual Union All
                    Select 33 From Dual )  -- Gini: 0.0  [33,33,33,33]
        Union All
          Select DATE '2018-01-03' as A_DATE, IN_VAL as BALANCE
          From    ( Select 142 as IN_VAL From Dual Union All
                    Select 152 From Dual Union All
                    Select 112 From Dual Union All
                    Select 182 From Dual Union All
                    Select 162 From Dual Union All
                    Select 1452 From Dual Union All
                    Select 1672 From Dual Union All
                    Select 112 From Dual Union All
                    Select 142 From Dual Union All
                    Select 1112 From Dual)  -- Gini: 0.542985  [142,152,112,182,162,1452,1672,112,142,112]
        ),

使用 CTE returned_balances 对数据进行排名...

    ranked_balances AS 
        ( Select A_DATE, BALANCE, Row_Number() Over (Partition By A_DATE Order By BALANCE Desc) as RANK
          From balances
        ) 
--    M a i n    S Q L :    ( Gini Calculation )
SELECT      A_DATE, 
        -- (1 − 2B) https://en.wikipedia.org/wiki/Gini_coefficient
            Round( 1 - ( 2 * 
                         Sum( (BALANCE * (RANK - 1) + BALANCE / 2) ) / 
                         Count(*) / 
                         sum(BALANCE)
                       ), 6 
                 ) as GINI
FROM        ranked_balances
GROUP BY    A_DATE
HAVING      Sum(BALANCE) > 0
ORDER BY    A_DATE asc
/*
A_DATE         GINI
-------- ----------
01.01.18   0.061538   
02.01.18          0
03.01.18   0.541985      */
© www.soinside.com 2019 - 2024. All rights reserved.