我有以下数据库表:
日期 | 名字 |
---|---|
2014-08-10 | 鲍勃 |
2014-08-10 | 起诉 |
2014-08-11 | 鲍勃 |
2014-08-11 | 麦克 |
2014-08-12 | 鲍勃 |
2014-08-12 | 麦克 |
2014-08-05 | 鲍勃 |
2014-08-06 | 鲍勃 |
SELECT t.Name,COUNT(*) as frequency
FROM (
SELECT Name,Date,
row_number() OVER (
ORDER BY Date
) - row_number() OVER (
PARTITION BY Name ORDER BY Date
) + 1 seq
FROM orders
) t
GROUP BY Name,seq;
尝试运行 Tabibitosan 方法来查找间隙和岛屿,生成下表不正确。由于第 11 天和第 12 天是连续的,因此名称“mike”实际上应该计数为 2。我该如何解决这个问题?
名字 | 频率 |
---|---|
麦克 | 1 |
鲍勃 | 3 |
鲍勃 | 2 |
麦克 | 1 |
起诉 | 1 |
下面正确的预期输出:
名字 | 频率 |
---|---|
鲍勃 | 3 |
鲍勃 | 2 |
麦克 | 2 |
起诉 | 1 |
您使用了错误的逻辑。基本上,您想要连续的日期,因此您需要从日期中减去序列 :
SELECT t.Name, COUNT(*) as frequency
FROM (SELECT o.*,
row_number() OVER (PARTITION BY Name ORDER BY Date) as seqnum
FROM orders o
) t
GROUP BY Name, date - seqnum * interval '1 day';
drop table if exists foobar;
CREATE TABLE foobar( tick text, date_val date );
insert into foobar values('XYZ', '2021-01-03'); --island 1 has width 2
insert into foobar values('XYZ', '2021-01-04'); --island 1
insert into foobar values('XYZ', '2021-05-09'); --island 2 has width 3
insert into foobar values('XYZ', '2021-05-10'); --island 2
insert into foobar values('XYZ', '2021-05-11'); --island 2
insert into foobar values('XYZ', '2021-07-07'); --island 3 has width 4
insert into foobar values('XYZ', '2021-07-08'); --island 3
insert into foobar values('XYZ', '2021-07-09'); --island 3
insert into foobar values('XYZ', '2021-07-10'); --island 3
insert into foobar values('XYZ', '2022-10-10'); --island 4 has width 1
select * from foobar;
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val)
as gap_width from
(
select tick, count(*) as island_width,
min(date_val) min_val, max(date_val) max_val
from (
select t.*,
row_number() over ( partition by tick order by date_val ) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, date_val - seqnum * interval '1 day'
) t2 order by max_val desc
这样打印日期列的间隙和岛屿:
┌──────┬────────────┐
│ tick │ date_val │
├──────┼────────────┤
│ XYZ │ 2021-01-03 │
│ XYZ │ 2021-01-04 │
│ XYZ │ 2021-05-09 │
│ XYZ │ 2021-05-10 │
│ XYZ │ 2021-05-11 │
│ XYZ │ 2021-07-07 │
│ XYZ │ 2021-07-08 │
│ XYZ │ 2021-07-09 │
│ XYZ │ 2021-07-10 │
│ XYZ │ 2022-10-10 │
└──────┴────────────┘
┌──────┬──────────────┬────────────┬────────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼────────────┼────────────┼───────────┤
│ XYZ │ 1 │ 2022-10-10 │ 2022-10-10 │ 457 │
│ XYZ │ 4 │ 2021-07-07 │ 2021-07-10 │ 57 │
│ XYZ │ 3 │ 2021-05-09 │ 2021-05-11 │ 125 │
│ XYZ │ 2 │ 2021-01-03 │ 2021-01-04 │ ¤ │
└──────┴──────────────┴────────────┴────────────┴───────────┘
island_width
列给出了连续数据的宽度。 gap_width 为您提供缺失数据的宽度。Postgresql 中整数数据类型 Demo2 的间隙和孤岛问题:
drop table if exists foobar;
CREATE TABLE foobar( tick text, the_value int);
insert into foobar values('XYZ', -5); --island 1 has width 2
insert into foobar values('XYZ', -4); --end island 1
insert into foobar values('XYZ', 0); --island 2 has width 3
insert into foobar values('XYZ', 1); --island 2
insert into foobar values('XYZ', 2); --end island 2
insert into foobar values('XYZ', 34); --island 3 has width 4
insert into foobar values('XYZ', 35); --island 3
insert into foobar values('XYZ', 36); --island 3
insert into foobar values('XYZ', 37); --island 3
insert into foobar values('XYZ', 85); --island 4 has width 1
select * from foobar;
select tick, island_width, min_val, max_val,
min_val - lag(max_val) over (order by max_val)
as gap_width from
(
select tick, count(*) as island_width,
min(the_value) min_val, max(the_value) max_val
from (
select t.*,
row_number() over ( partition by tick order by the_value) as seqnum
from foobar t where tick = 'XYZ'
) t
group by tick, the_value - seqnum * 1
) t2 order by max_val desc
这样打印整数列的间隙和岛:
┌──────┬───────────┐
│ tick │ the_value │
├──────┼───────────┤
│ XYZ │ -5 │
│ XYZ │ -4 │
│ XYZ │ 0 │
│ XYZ │ 1 │
│ XYZ │ 2 │
│ XYZ │ 34 │
│ XYZ │ 35 │
│ XYZ │ 36 │
│ XYZ │ 37 │
│ XYZ │ 85 │
└──────┴───────────┘
┌──────┬──────────────┬─────────┬─────────┬───────────┐
│ tick │ island_width │ min_val │ max_val │ gap_width │
├──────┼──────────────┼─────────┼─────────┼───────────┤
│ XYZ │ 1 │ 85 │ 85 │ 48 │
│ XYZ │ 4 │ 34 │ 37 │ 32 │
│ XYZ │ 3 │ 0 │ 2 │ 4 │
│ XYZ │ 2 │ -5 │ -4 │ ¤ │
└──────┴──────────────┴─────────┴─────────┴───────────┘
island_width
列给出了连续数据的宽度。
gap_width
为您提供缺失数据的宽度。这允许您说:“显示岛屿之间的间隙”或“显示岛屿大小和开始/停止位置”