我有带有以下列的表:
[name_of_pos] varchar,
[date_from] datetime,
[date_to] datetime
下面是我的示例数据:
name_of_pos date_from date_to
----------------------------------------------------------------
Asystent 2015-08-26 08:57:49.000 2015-09-04 08:57:49.000
Biuro 2015-09-01 08:53:32.000 2015-09-01 08:53:32.000
Biuro 2015-09-02 09:00:41.000 2015-09-02 09:00:41.000
Biuro 2015-09-03 11:46:03.000 2015-09-03 11:46:03.000
Biuro 2015-09-10 09:02:11.000 2015-09-15 09:02:11.000
Koordynator 2015-09-01 09:04:06.000 2015-09-01 09:04:06.000
Projektant 2015-08-31 08:59:46.000 2015-09-01 08:59:46.000
Projektant 2015-09-02 08:00:54.000 2015-09-02 08:00:54.000
Projektant 2015-09-14 12:34:50.000 2015-09-14 12:34:50.000
我要返回的是每个date_from
的日期范围(从date_to
的最小值到name_of_pos
的最大值),但是仅在日期值是连续的(时间部分不重要并且可以在其中忽略的情况下)结果)。
所需的输出将是:
name_of_pos date_from date_to
------------------------------------
Asystent 2015-08-26 2015-09-04
Biuro 2015-09-01 2015-09-03
Biuro 2015-09-10 2015-09-15
Koordynator 2015-09-01 2015-09-01
Projektant 2015-08-31 2015-09-02
Projektant 2015-09-14 2015-09-14
我尝试使用类似于此问题的解决方案:
How do I group on continuous ranges
但是运气不好,因为我有两个日期时间列。
这是一种使用cte
来遍历行(在订购后)并在分组之前检查连续几天的解决方案:
-- dummy table
CREATE TABLE #TableA
(
[name_of_pos] VARCHAR(11) ,
[date_from] DATETIME ,
[date_to] DATETIME
);
-- insert dummy data
INSERT INTO #TableA
( [name_of_pos], [date_from], [date_to] )
VALUES ( 'Asystent', '2015-08-26 08:57:49', '2015-09-04 08:57:49' ),
( 'Biuro', '2015-09-01 08:53:32', '2015-09-01 08:53:32' ),
( 'Biuro', '2015-09-02 09:00:41', '2015-09-02 09:00:41' ),
( 'Biuro', '2015-09-03 11:46:03', '2015-09-03 11:46:03' ),
( 'Biuro', '2015-09-10 09:02:11', '2015-09-15 09:02:11' ),
( 'Koordynator', '2015-09-01 09:04:06', '2015-09-01 09:04:06' ),
( 'Projektant', '2015-08-31 08:59:46', '2015-09-01 08:59:46' ),
( 'Projektant', '2015-09-02 08:00:54', '2015-09-02 08:00:54' ),
( 'Projektant', '2015-09-14 12:34:50', '2015-09-14 12:34:50' );
-- new temp table used to add row numbers for data order
SELECT name_of_pos, CAST(date_from AS DATE) date_from, CAST(date_to AS DATE) date_to,
ROW_NUMBER() OVER ( ORDER BY name_of_pos, date_from ) rn
INTO #temp
FROM #TableA
-- GroupingColumn in cte used to identify and group consecutive dates
;WITH cte
AS ( SELECT name_of_pos ,
date_from ,
date_to ,
1 AS GroupingColumn ,
rn
FROM #temp
WHERE rn = 1
UNION ALL
SELECT t2.name_of_pos ,
t2.date_from ,
t2.date_to ,
CASE WHEN t2.date_from = DATEADD(day, 1, cte.date_to)
AND cte.name_of_pos = t2.name_of_pos
THEN cte.GroupingColumn
ELSE cte.GroupingColumn + 1
END AS GroupingColumn ,
t2.rn
FROM #temp t2
INNER JOIN cte ON t2.rn = cte.rn + 1
)
SELECT name_of_pos, MIN(date_from) AS date_from, MAX(date_to) AS date_to
FROM cte
GROUP BY name_of_pos, GroupingColumn
DROP TABLE #temp
DROP TABLE #TableA
产生所需的输出:
name_of_pos date_from date_to
Asystent 2015-08-26 2015-09-04
Biuro 2015-09-01 2015-09-03
Biuro 2015-09-10 2015-09-15
Koordynator 2015-09-01 2015-09-01
Projektant 2015-08-31 2015-09-02
Projektant 2015-09-14 2015-09-14
这是间隙和孤岛问题。这是tuned official的实现方式,将作为解决方案进行检查:
;with
cte as (
SELECT *,
dateadd( day,
- (ROW_NUMBER() OVER (
partition by name_of_pos
ORDER BY t.date_from
) + -- here starts tuned part --
isnull(
sum( datediff(day, date_from, date_to ) ) OVER (
partition by name_of_pos
ORDER BY t.date_from
ROWS BETWEEN UNBOUNDED PRECEDING and 1 PRECEDING
) ,0) -- here ends tuned part --
),
date_from
) as Grp
FROM t
)
SELECT name_of_pos
,min(date_from) AS date_from
,max(date_to) AS date_to
FROM cte
GROUP BY name_of_pos, Grp
ORDER BY name_of_pos, date_from
这里tested on sqlfiddle(带有一些不同的示例数据)。
您可以为此使用cte
,但是根据我的经验,最快的方法是循环使用update:
declare @temp table
(
name_of_pos varchar(128),
date_from datetime,
date_to datetime
)
insert into @temp (
name_of_pos, date_from, date_to
)
values
('Asystent', '2015-08-26 08:57:49', '2015-09-04 08:57:49'),
('Biuro', '2015-09-01 08:53:32', '2015-09-01 08:53:32'),
('Biuro', '2015-09-02 09:00:41', '2015-09-02 09:00:41'),
('Biuro', '2015-09-03 11:46:03', '2015-09-03 11:46:03'),
('Biuro', '2015-09-10 09:02:11', '2015-09-15 09:02:11'),
('Koordynator', '2015-09-01 09:04:06', '2015-09-01 09:04:06'),
('Projektant', '2015-08-31 08:59:46', '2015-09-01 08:59:46'),
('Projektant', '2015-09-02 08:00:54', '2015-09-02 08:00:54'),
('Projektant', '2015-09-14 12:34:50', '2015-09-14 12:34:50')
----------------------------------------------------------------------------------------------------
declare @temp_new table (
name_of_pos varchar(128),
date_from date,
date_to date
)
insert into @temp_new (
name_of_pos, date_from, date_to
)
select
name_of_pos, date_from, date_to
from @temp
while @@rowcount > 0
begin
update t1 set
date_to = t2.date_to
from @temp_new as t1
inner join @temp_new as t2 on
t2.name_of_pos = t1.name_of_pos and
dateadd(dd, 1, t1.date_to) = t2.date_from
end
select name_of_pos, min(date_from), date_to
from @temp_new
group by name_of_pos, date_to
order by name_of_pos, date_to
第一个解决方案效果很好,但是我遇到了一个超越它的问题:我不仅需要对连续的日期进行分组,还需要对重叠的时间段进行分组。
返回第一个解决方案,这是一段可运行的代码:
declare @t2 table (id int, start_date date, end_date date)
insert into @t2
values
(1, '2019-01-26', '2019-02-04'),
(1, '2019-02-05', '2019-02-07'), --- First group
(1, '2019-02-09', '2019-02-09'), --- Second group
(2, '2019-02-01', '2019-02-01'),
(2, '2019-02-02', '2019-02-09'),
(2, '2019-02-03', '2019-02-05'),
(2, '2019-02-07', '2019-02-15') --- Third group
;with
cte as (
SELECT *,
dateadd( day,
- (ROW_NUMBER() OVER (
partition by Id
ORDER BY t.start_date
) + -- here starts tuned part --
isnull(
sum( datediff(day, start_date, end_date ) ) OVER (
partition by Id
ORDER BY t.start_date
ROWS BETWEEN UNBOUNDED PRECEDING and 1 PRECEDING
) ,0) -- here ends tuned part --
),
start_date
) as Grp
FROM @t2 t
)
SELECT Id
,min(start_date) AS start_date
,max(end_date) AS end_date
FROM cte
GROUP BY Id, Grp
ORDER BY Id, start_date
结果是:
Id start_date end_date
1 2019-01-26 2019-02-07
1 2019-02-09 2019-02-09
2 2019-02-01 2019-02-09
2 2019-02-03 2019-02-05
2 2019-02-07 2019-02-15
我希望他们成为:
Id start_date end_date
1 2019-01-26 2019-02-07
1 2019-02-09 2019-02-09
2 2019-02-01 2019-02-15
有什么想法吗?
尝试一次:
SELECT name_of_pos, date_from,date_to
FROM table
ORDER BY
name_of_pos asc, date_from desc;