我需要选择一些机制来使用row_number或rank对行进行排名。我试图使用两种情况RNK1,RNK2列,但我不确定它是否可能。请查看实际和预期的结果。
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual
)
select
tmp.*,
case when flag = 1 then row_number() over(partition by flag order by flag) else null end as rnk1,
case when flag = 1 then rank() over(partition by flag order by flag) else null end as rnk2
from tmp
order by startdate, username
实际:
+-------------+--------------------+--------+--------+--------+
| "USERNAME" | "STARTDATE" | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1 | 6 | 1 |
| "username1" | 01-APR-19 01:00:00 | 0 | | |
| "username1" | 01-APR-19 02:00:00 | 1 | 4 | 1 |
| "username1" | 01-APR-19 03:00:00 | 1 | 3 | 1 |
| "username1" | 01-APR-19 04:00:00 | 0 | | |
| "username1" | 02-APR-19 01:00:00 | 1 | 5 | 1 |
| "username1" | 02-APR-19 02:00:00 | 1 | 1 | 1 |
| "username1" | 02-APR-19 03:00:00 | 1 | 2 | 1 |
| "username1" | 02-APR-19 04:00:00 | 0 | | |
| "username1" | 02-APR-19 05:00:00 | 0 | | |
+-------------+--------------------+--------+--------+--------+
预期:
+-------------+--------------------+--------+--------+--------+
| "USERNAME" | "STARTDATE" | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1 | 1 | 1 |
| "username1" | 01-APR-19 01:00:00 | 0 | | |
| "username1" | 01-APR-19 02:00:00 | 1 | 2 | 2 |
| "username1" | 01-APR-19 03:00:00 | 1 | 2 | 2 |
| "username1" | 01-APR-19 04:00:00 | 0 | | |
| "username1" | 02-APR-19 01:00:00 | 1 | 3 | 3 |
| "username1" | 02-APR-19 02:00:00 | 1 | 3 | 3 |
| "username1" | 02-APR-19 03:00:00 | 1 | 3 | 3 |
| "username1" | 02-APR-19 04:00:00 | 0 | | |
| "username1" | 02-APR-19 05:00:00 | 0 | | |
+-------------+--------------------+--------+--------+--------+
谢谢大家的快速回复。我开始玩你的提议并再次陷入困境
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
)
select
tmp.*,
dense_rank() over( order by startdate, username, threshold)-
(case when flag=1 then
row_number()over(partition by flag, username order by startdate, username) - flag
else null end) as grp
from tmp
order by
startdate, username
实际:
+-------------+--------------------+--------+-------------+-------+
| "USERNAME" | "STARTDATE" | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1 | 1 | 1 |
| "username1" | 01-APR-19 01:00:00 | 0 | | |
| "username1" | 01-APR-19 02:00:00 | 1 | 1 | 2 |
| "username1" | 01-APR-19 03:00:00 | 1 | | 2 |
| "username1" | 01-APR-19 04:00:00 | 0 | | |
| "username1" | 01-APR-19 05:00:00 | 0 | | |
| "username1" | 02-APR-19 01:00:00 | 1 | 1 | 4 |
| "username1" | 02-APR-19 02:00:00 | 1 | | 4 |
| "username1" | 02-APR-19 03:00:00 | 1 | 1 | 4 |
| "username1" | 02-APR-19 04:00:00 | 1 | | 4 |
| "username1" | 02-APR-19 05:00:00 | 0 | | |
+-------------+--------------------+--------+-------------+-------+
预期:
+-------------+--------------------+--------+-------------+-------+
| "USERNAME" | "STARTDATE" | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1 | 1 | 1 |
| "username1" | 01-APR-19 01:00:00 | 0 | | |
| "username1" | 01-APR-19 02:00:00 | 1 | 1 | 2 |
| "username1" | 01-APR-19 03:00:00 | 1 | | 2 |
| "username1" | 01-APR-19 04:00:00 | 0 | | |
| "username1" | 01-APR-19 05:00:00 | 0 | | |
| "username1" | 02-APR-19 01:00:00 | 1 | 1 | 4 |
| "username1" | 02-APR-19 02:00:00 | 1 | | 4 |
| "username1" | 02-APR-19 03:00:00 | 1 | 1 | 5 |
| "username1" | 02-APR-19 04:00:00 | 1 | | 5 |
| "username1" | 02-APR-19 05:00:00 | 0 | | |
+-------------+--------------------+--------+-------------+-------+
假设您正在对连续的flag = 1行进行分组,您可以使用Tabibitosan技术和dense_rank的组合来完成此操作,如下所示:
WITH tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual),
tabibitosan AS (SELECT tmp.*,
CASE
WHEN flag = 1 THEN
row_number() over(ORDER BY startdate) - row_number() over(PARTITION BY flag ORDER BY startdate)
END grp
FROM tmp)
SELECT username,
startdate,
flag,
CASE
WHEN flag = 1 THEN
dense_rank() over(PARTITION BY flag ORDER BY grp)
END rnk
FROM tabibitosan
ORDER BY startdate,
username;
USERNAME STARTDATE FLAG RNK
--------- ------------------- ---------- ----------
username1 01/04/2019 00:00:00 1 1
username1 01/04/2019 01:00:00 0
username1 01/04/2019 02:00:00 1 2
username1 01/04/2019 03:00:00 1 2
username1 01/04/2019 04:00:00 0
username1 02/04/2019 01:00:00 1 3
username1 02/04/2019 02:00:00 1 3
username1 02/04/2019 03:00:00 1 3
username1 02/04/2019 04:00:00 0
username1 02/04/2019 05:00:00 0
我更新了查询以考虑额外的阈值列:
WITH tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual),
tabibitosan AS (SELECT tmp.*,
CASE
WHEN flag = 1 THEN
row_number() over(PARTITION BY username ORDER BY startdate) - row_number() over(PARTITION BY username, flag ORDER BY startdate)
END grp,
SUM(CASE WHEN flag = 1 THEN threshold END) OVER (PARTITION BY username, flag ORDER BY startdate) threshold_sum -- assumes threshold is 1 or null; change the case statement inside the sum if this isn't the case
FROM tmp)
SELECT username,
startdate,
flag,
threshold,
CASE
WHEN flag = 1 THEN
dense_rank() over(PARTITION BY flag ORDER BY grp, threshold_sum)
END rnk
FROM tabibitosan
ORDER BY startdate,
username;
USERNAME STARTDATE FLAG THRESHOLD RNK
--------- ----------- ---------- ---------- ----------
username1 01/04/2019 1 1 1
username1 01/04/2019 0
username1 01/04/2019 1 1 2
username1 01/04/2019 1 2
username1 01/04/2019 0
username1 01/04/2019 0
username1 02/04/2019 1 3
username1 02/04/2019 1 1 4
username1 02/04/2019 1 1 5
username1 02/04/2019 1 5
username1 02/04/2019 0
注:我假设阈值列只能是1或null;如果不是这种情况,则必须根据您的数据更新条件总和。
我还更新了分析函数的分区以包含用户名列,因为我假设这是数据的主键。
最后,请注意我已稍微更改了您的示例数据,以显示如果对于flag = 1的组中的第一行,阈值为null,则如果下一行设置了阈值,则它将位于其自己的组中。如果这不是您想要的行为,您需要使用您想要的逻辑更新您的问题。
尝试如下
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual
)
select
tmp.*,
dense_rank() over( order by startdate, username)-
(case when flag=1 then
row_number()over(partition by flag, username order by startdate, username) - flag
else null end) as grp
from tmp
order by
startdate, username
USERNAME STARTDATE FLAG GRP
username1 01-APR-19 1 1
username1 01-APR-19 0
username1 01-APR-19 1 2
username1 01-APR-19 1 2
username1 01-APR-19 0
username1 02-APR-19 1 3
username1 02-APR-19 1 3
username1 02-APR-19 1 3
username1 02-APR-19 0
username1 02-APR-19 0
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
)
select tmp.*, decode(flag, 1, count(threshold) over (partition by username order by startdate)) rn
from tmp;
USERNAME STARTDATE FLAG THRESHOLD RN
--------- ------------------- ---------- ---------- ----------
username1 2019-04-01 00:00:00 1 1 1
username1 2019-04-01 01:00:00 0
username1 2019-04-01 02:00:00 1 1 2
username1 2019-04-01 03:00:00 1 2
username1 2019-04-01 04:00:00 0
username1 2019-04-01 05:00:00 0
username1 2019-04-02 01:00:00 1 1 3
username1 2019-04-02 02:00:00 1 3
username1 2019-04-02 03:00:00 1 1 4
username1 2019-04-02 04:00:00 1 4
username1 2019-04-02 05:00:00 0
11 rows selected.
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag from dual
)
select username, startdate, flag, decode(flag, 1, match_num) as rn
from tmp
match_recognize(
partition by username
order by startdate
measures match_number() AS match_num
all rows per match
pattern (s* f*)
define f as f.flag = 1, s as s.flag = 0
);
USERNAME STARTDATE FLAG RN
--------- ------------------- ---------- ----------
username1 2019-04-01 00:00:00 1 1
username1 2019-04-01 01:00:00 0
username1 2019-04-01 02:00:00 1 2
username1 2019-04-01 03:00:00 1 2
username1 2019-04-01 04:00:00 0
username1 2019-04-02 01:00:00 1 3
username1 2019-04-02 02:00:00 1 3
username1 2019-04-02 03:00:00 1 3
username1 2019-04-02 04:00:00 0
username1 2019-04-02 05:00:00 0
10 rows selected.