如何减去蜂巢中的项目

问题描述 投票:0回答:4

我有两个商品列表,每个客户,ItemListA和ItemListB

Customer_id   ItemListA   ItemListB
   24            2,3         3,4,5
   26            6,7         8,9,10
   25            4,5          5,8

我希望ItemListB中的Item不在ItemListA中,因此输出应为:

Customer_id   ItemListB_A
     24          4,5
     26          8,9,10
     25          4,8

我不知道如何从Hive中的两个字符串中减去项目。我知道COLLECT_SET但它可以删除重复但不删除项目交集。

sql hive
4个回答
0
投票

这将给出期望的结果。

  select Customer_id, collect_list(y) from
    (select Customer_id, y , count(*) as cnt from 
    (select Customer_id,y from (select Customer_id, split(concat_ws(',',ItemListA,ItemListB),',') as x from table_name) temp lateral view explode(x) temp as y) temp1 
    group by Customer_id,y  ) temp2 where cnt =1 group by Customer_id;

0
投票
select      customer_id
           ,concat_ws (',',collect_list (item)) as ItemListB_A

from       (select      t.customer_id
                       ,pe.item

            from        mytable     t
                        lateral view posexplode (split (concat_ws(',',ItemListA,ItemListB),',')) pe as pos,item

            group by    t.customer_id
                       ,pe.item

            having      count (case when pos < size(split(ItemListA,',')) then 1 end) = 0  
            ) t

group by    customer_id              

+-------------+-------------+
| customer_id | itemlistb_a |
+-------------+-------------+
|          24 | 4,5         |
|          25 | 8           |
|          26 | 10,8,9      |
+-------------+-------------+

0
投票
select      customer_id

           ,split
            (
                regexp_replace
                (
                    concat(ItemListB,',:,',ItemListA)
                   ,'(?<=^|,)(?<item>.*?),(?=.*(?<=,)\\k<item>(?=,|$))'
                   ,''
                )
               ,',?:'
            )[0]        as ItemListB_A

from        mytable

+-------------+-------------+
| customer_id | itemlistb_a |
+-------------+-------------+
|          24 | 4,5         |
|          26 | 8,9,10      |
|          25 | 8           |
+-------------+-------------+

-1
投票
select a.Customer_id,concat_ws (',',collect_list (a.item)) as ItemListB_A from (            
select      t.Customer_id
                       ,pe.item

            from        test_hive     t
                        lateral view posexplode (split (concat_ws(',',ItemListB),',')) pe as pos,item
                        lateral view posexplode (split (concat_ws(',',ItemListA),',')) pe1 as pos1,item1
            group by t.Customer_id,pe.item
            having count(case when item=item1 then 1 end)=0
)a
group by a.Customer_id
© www.soinside.com 2019 - 2024. All rights reserved.