SQL - 根据条件对行进行分组

问题描述 投票:0回答:2

我正在尝试对以 1 开头且连续为 0 的行进行分组,并在遇到另一个 1 或 2 时停止分组。

这里是一个例子:(“组”列是我期望看到的)

+-----+------+-------+
| seq | data | group |
+-----+------+-------+
|   1 |    1 |     1 |
|   2 |    0 |     1 |
|   3 |    0 |     1 |
|   4 |    0 |     1 |
|   5 |    2 |       |
|   6 |    2 |       |
|   7 |    1 |     2 |
|   8 |    0 |     2 |
|   9 |    0 |     2 |
|  10 |    1 |     3 |
|  11 |    0 |     3 |
|  12 |    0 |     3 |
|  13 |    2 |       |
|  14 |    0 |       |
|  15 |    0 |       |
|  16 |    0 |       |
|  17 |    0 |       |
|  18 |    0 |       |
|  19 |    2 |       |
|  20 |    2 |       |
|  21 |    1 |     4 |
|  22 |    0 |     4 |
|  23 |    0 |     4 |
|  24 |    0 |     4 |
|  25 |    2 |       |
|  26 |    2 |       |
|  27 |    0 |       |
|  28 |    1 |     5 |
|  29 |    0 |     5 |
|  30 |    0 |     5 |
|  31 |    0 |     5 |
|  32 |    1 |     6 |
|  33 |    0 |     6 |
|  34 |    0 |     6 |
|  35 |    0 |     6 |
|  36 |    0 |     6 |
|  37 |    2 |       |
|  38 |    0 |       |
|  39 |    0 |       |
|  40 |    0 |       |
|  41 |    0 |       |
|  42 |    0 |       |
|  43 |    0 |       |
|  44 |    0 |       |
|  45 |    0 |       |
|  46 |    2 |       |
|  47 |    2 |       |
|  48 |    2 |       |
+-----+------+-------+

我已经尝试使用 coalesce 来实现它,但这就是我所拥有的:

select seq, data, coalesce((select sum(data) +1 
                           from mytable pre 
                           where pre.seq <= mytable.seq and pre.data = 1),0) as s1
from mytable
+-----+------+-------+
| seq | data | group |
+-----+------+-------+
|   1 |    1 |     2 |
|   2 |    0 |     2 |
|   3 |    0 |     2 |
|   4 |    0 |     2 |
|   5 |    2 |     2 |
|   6 |    2 |     2 |
|   7 |    1 |     3 |
|   8 |    0 |     3 |
|   9 |    0 |     3 |
|  10 |    1 |     4 |
|  11 |    0 |     4 |
|  12 |    0 |     4 |
|  13 |    2 |     4 |
|  14 |    0 |     4 |
|  15 |    0 |     4 |
|  16 |    0 |     4 |
|  17 |    0 |     4 |
|  18 |    0 |     4 |
|  19 |    2 |     4 |
|  20 |    2 |     4 |
|  21 |    1 |     5 |
|  22 |    0 |     5 |
|  23 |    0 |     5 |
|  24 |    0 |     5 |
|  25 |    2 |     5 |
|  26 |    2 |     5 |
|  27 |    0 |     5 |
|  28 |    1 |     6 |
|  29 |    0 |     6 |
|  30 |    0 |     6 |
|  31 |    0 |     6 |
|  32 |    1 |     7 |
|  33 |    0 |     7 |
|  34 |    0 |     7 |
|  35 |    0 |     7 |
|  36 |    0 |     7 |
|  37 |    2 |     7 |
|  38 |    0 |     7 |
|  39 |    0 |     7 |
|  40 |    0 |     7 |
|  41 |    0 |     7 |
|  42 |    0 |     7 |
|  43 |    0 |     7 |
|  44 |    0 |     7 |
|  45 |    0 |     7 |
|  46 |    2 |     7 |
|  47 |    2 |     7 |
|  48 |    2 |     7 |
+-----+------+-------+

遇到1或2我想停止分组,你能帮我解决这个问题吗?谢谢

++

我有一个类似的任务,就是从下到上分组,但条件略有不同。

我需要在数据以“2”开头时对数据进行分组,并在遇到 0 或 1 时关闭该组,但也需要将它们包含在组中(0 或 1)。以下是预期的输出。

+-----+------+-------+
| seq | data | group |
+-----+------+-------+
|   1 |    1 |    11 |
|   2 |    0 |    11 |
|   3 |    0 |    11 |
|   4 |    0 |    11 |
|   5 |    2 |    11 |
|   6 |    2 |    10 |
|   7 |    1 |       |
|   8 |    0 |       |
|   9 |    0 |       |
|  10 |    1 |     9 |
|  11 |    0 |     9 |
|  12 |    0 |     9 |
|  13 |    2 |     9 |
|  14 |    0 |     8 |
|  15 |    0 |     8 |
|  16 |    0 |     8 |
|  17 |    0 |     8 |
|  18 |    0 |     8 |
|  19 |    2 |     8 |
|  20 |    2 |     7 |
|  21 |    1 |     6 |
|  22 |    0 |     6 |
|  23 |    0 |     6 |
|  24 |    0 |     6 |
|  25 |    2 |     6 |
|  26 |    2 |     5 |
|  27 |    0 |       |
|  28 |    1 |       |
|  29 |    0 |       |
|  30 |    0 |       |
|  31 |    0 |       |
|  32 |    1 |     4 |
|  33 |    0 |     4 |
|  34 |    0 |     4 |
|  35 |    0 |     4 |
|  36 |    0 |     4 |
|  37 |    2 |     4 |
|  38 |    0 |     3 |
|  39 |    0 |     3 |
|  40 |    0 |     3 |
|  41 |    0 |     3 |
|  42 |    0 |     3 |
|  43 |    0 |     3 |
|  44 |    0 |     3 |
|  45 |    0 |     3 |
|  46 |    2 |     3 |
|  47 |    2 |     2 |
|  48 |    2 |     1 |
+-----+------+-------+

我使用了与@shawnt00 的回答类似的方法,下面是我得到的。

它给出了我期望看到的结果,但我想尽可能避免左外连接。有什么方法可以使这个查询干净吗?

with g1 as (
    select *,
        count(case when data = 2 then 1 end) over (order by seq desc) as grp
    from mytable
), g2 as (
    select *,
        count(case when data > 0 then 1 end) over (partition by grp order by seq desc) as grp2
    from g1  
)
select curr.seq, curr.data, curr.grp, 
       CASE WHEN curr.data = 1 AND next.grp2 = 1 THEN 1
            ELSE curr.grp2 END AS grp2
from g2 curr left outer join g2 next on curr.seq + 1 = next.seq
-- where grp2 = 1;
order by curr.seq
;


+-----+------+-----+------+
| seq | data | grp | grp2 |
+-----+------+-----+------+
|   1 |    1 |  11 |    1 |
|   2 |    0 |  11 |    1 |
|   3 |    0 |  11 |    1 |
|   4 |    0 |  11 |    1 |
|   5 |    2 |  11 |    1 |
|   6 |    2 |  10 |    1 |
|   7 |    1 |   9 |    3 |
|   8 |    0 |   9 |    2 |
|   9 |    0 |   9 |    2 |
|  10 |    1 |   9 |    1 |
|  11 |    0 |   9 |    1 |
|  12 |    0 |   9 |    1 |
|  13 |    2 |   9 |    1 |
|  14 |    0 |   8 |    1 |
|  15 |    0 |   8 |    1 |
|  16 |    0 |   8 |    1 |
|  17 |    0 |   8 |    1 |
|  18 |    0 |   8 |    1 |
|  19 |    2 |   8 |    1 |
|  20 |    2 |   7 |    1 |
|  21 |    1 |   6 |    1 |
|  22 |    0 |   6 |    1 |
|  23 |    0 |   6 |    1 |
|  24 |    0 |   6 |    1 |
|  25 |    2 |   6 |    1 |
|  26 |    2 |   5 |    1 |
|  27 |    0 |   4 |    3 |
|  28 |    1 |   4 |    3 |
|  29 |    0 |   4 |    2 |
|  30 |    0 |   4 |    2 |
|  31 |    0 |   4 |    2 |
|  32 |    1 |   4 |    1 |
|  33 |    0 |   4 |    1 |
|  34 |    0 |   4 |    1 |
|  35 |    0 |   4 |    1 |
|  36 |    0 |   4 |    1 |
|  37 |    2 |   4 |    1 |
|  38 |    0 |   3 |    1 |
|  39 |    0 |   3 |    1 |
|  40 |    0 |   3 |    1 |
|  41 |    0 |   3 |    1 |
|  42 |    0 |   3 |    1 |
|  43 |    0 |   3 |    1 |
|  44 |    0 |   3 |    1 |
|  45 |    0 |   3 |    1 |
|  46 |    2 |   3 |    1 |
|  47 |    2 |   2 |    1 |
|  48 |    2 |   1 |    1 |
+-----+------+-----+------+
sql postgresql group-by row-number coalesce
2个回答
1
投票

标记行两次。首先计算以 1 开头的块/运行。现在在每个块/运行中,将以非零开头的运行分开并编号。 “分组”行必须是第二个列表中的第一行。

with g1 as (
    select *,
        count(case when data = 1  then 1 end) over (order by seq) as grpA,
        count(case when data = 2  then 1 end) over (order by seq desc) as grpD
    from T
), g2 as (
    select *,
        count(case when data <> 0 then 1 end) over (partition by grpA order by seq) as grpA2,
        count(case when data  = 1 then 1 end) over (partition by grpD order by seq desc
            rows between unbounded preceding and 1 preceding) as grpD2
    from g1
)
select seq, data,
    case when grpA2 = 1 then grpA end as groupA,
    case when grpD2 = 1 then grpD end as groupD
from g2;

这应该比自连接更有效率。


0
投票

使用子查询和

lead
为每个有效的行组构建独特的
id
s 的运行:

with cte as (
   select t.*, (select count(*) from tbl t1 where t1.seq <= t.seq and t1.data > 0) r 
   from tbl t
),
runs as (
   select c.seq, c.data, c.r, case when (c.data = 1 and lead(c.data, 1) over (order by c.seq) = 0) or 
        (c.data = 0 and not exists (select 1 from cte c1 where c1.r = c.r and c1.data > 1)) 
     then c.r else 0 end r1
   from cte c
)
select r.seq, r.data, case when r.r1 = 0 then null else (select count(distinct r1.r1) from runs r1 where r1.seq <= r.seq and r1.r1 > 0) end from runs r

见小提琴

© www.soinside.com 2019 - 2024. All rights reserved.