按SQL Server中的连续日期分组

问题描述 投票:3回答:5

我有带有以下列的表:

[name_of_pos] varchar,
[date_from] datetime,
[date_to] datetime

下面是我的示例数据:

name_of_pos  date_from                 date_to
----------------------------------------------------------------
Asystent     2015-08-26 08:57:49.000   2015-09-04 08:57:49.000
Biuro        2015-09-01 08:53:32.000   2015-09-01 08:53:32.000
Biuro        2015-09-02 09:00:41.000   2015-09-02 09:00:41.000
Biuro        2015-09-03 11:46:03.000   2015-09-03 11:46:03.000
Biuro        2015-09-10 09:02:11.000   2015-09-15 09:02:11.000
Koordynator  2015-09-01 09:04:06.000   2015-09-01 09:04:06.000
Projektant   2015-08-31 08:59:46.000   2015-09-01 08:59:46.000
Projektant   2015-09-02 08:00:54.000   2015-09-02 08:00:54.000
Projektant   2015-09-14 12:34:50.000   2015-09-14 12:34:50.000

我要返回的是每个date_from的日期范围(从date_to的最小值到name_of_pos的最大值),但是仅在日期值是连续的(时间部分不重要并且可以在其中忽略的情况下)结果)。

所需的输出将是:

  name_of_pos   date_from   date_to
  ------------------------------------
  Asystent      2015-08-26  2015-09-04 
  Biuro         2015-09-01  2015-09-03 
  Biuro         2015-09-10  2015-09-15 
  Koordynator   2015-09-01  2015-09-01 
  Projektant    2015-08-31  2015-09-02 
  Projektant    2015-09-14  2015-09-14 

我尝试使用类似于此问题的解决方案:

How do I group on continuous ranges

但是运气不好,因为我有两个日期时间列。

sql sql-server tsql gaps-and-islands
5个回答
1
投票

这是一种使用cte来遍历行(在订购后)并在分组之前检查连续几天的解决方案:

-- dummy table
CREATE TABLE #TableA
    (
      [name_of_pos] VARCHAR(11) ,
      [date_from] DATETIME ,
      [date_to] DATETIME
    );

-- insert dummy data
INSERT  INTO #TableA
        ( [name_of_pos], [date_from], [date_to] )
VALUES  ( 'Asystent', '2015-08-26 08:57:49', '2015-09-04 08:57:49' ),
        ( 'Biuro', '2015-09-01 08:53:32', '2015-09-01 08:53:32' ),
        ( 'Biuro', '2015-09-02 09:00:41', '2015-09-02 09:00:41' ),
        ( 'Biuro', '2015-09-03 11:46:03', '2015-09-03 11:46:03' ),
        ( 'Biuro', '2015-09-10 09:02:11', '2015-09-15 09:02:11' ),
        ( 'Koordynator', '2015-09-01 09:04:06', '2015-09-01 09:04:06' ),
        ( 'Projektant', '2015-08-31 08:59:46', '2015-09-01 08:59:46' ),
        ( 'Projektant', '2015-09-02 08:00:54', '2015-09-02 08:00:54' ),
        ( 'Projektant', '2015-09-14 12:34:50', '2015-09-14 12:34:50' );

-- new temp table used to add row numbers for data order
SELECT  name_of_pos, CAST(date_from AS DATE) date_from, CAST(date_to AS DATE) date_to,
        ROW_NUMBER() OVER ( ORDER BY name_of_pos, date_from ) rn
INTO    #temp
FROM    #TableA

-- GroupingColumn in cte used to identify and group consecutive dates
;WITH    cte
          AS ( SELECT   name_of_pos ,
                        date_from ,
                        date_to ,
                        1 AS GroupingColumn ,
                        rn
               FROM     #temp
               WHERE    rn = 1
               UNION ALL
               SELECT   t2.name_of_pos ,
                        t2.date_from ,
                        t2.date_to ,
                        CASE WHEN t2.date_from = DATEADD(day, 1, cte.date_to) 
                                  AND cte.name_of_pos = t2.name_of_pos
                             THEN cte.GroupingColumn
                             ELSE cte.GroupingColumn + 1
                        END AS GroupingColumn ,
                        t2.rn
               FROM     #temp t2
                        INNER JOIN cte ON t2.rn = cte.rn + 1
             )
    SELECT  name_of_pos, MIN(date_from) AS date_from, MAX(date_to) AS date_to
    FROM    cte
    GROUP BY name_of_pos, GroupingColumn

DROP TABLE #temp
DROP TABLE #TableA

产生所需的输出:

name_of_pos date_from   date_to
Asystent    2015-08-26  2015-09-04
Biuro       2015-09-01  2015-09-03
Biuro       2015-09-10  2015-09-15
Koordynator 2015-09-01  2015-09-01
Projektant  2015-08-31  2015-09-02
Projektant  2015-09-14  2015-09-14

1
投票

这是间隙和孤岛问题。这是tuned official的实现方式,将作为解决方案进行检查:

;with 

cte as (
    SELECT *,
          dateadd( day,
           - (ROW_NUMBER() OVER (
               partition by name_of_pos
               ORDER BY t.date_from
              ) +                           -- here starts tuned part --
              isnull( 
               sum( datediff(day, date_from, date_to ) ) OVER (
                partition by name_of_pos
                ORDER BY t.date_from
                ROWS BETWEEN UNBOUNDED PRECEDING and 1 PRECEDING
              ) ,0)                         -- here ends tuned part --
           ),
           date_from
          ) as Grp
    FROM t
)  

SELECT name_of_pos
    ,min(date_from) AS date_from
    ,max(date_to) AS date_to
FROM cte
GROUP BY name_of_pos, Grp
ORDER BY name_of_pos, date_from

这里tested on sqlfiddle(带有一些不同的示例数据)。


0
投票

您可以为此使用cte,但是根据我的经验,最快的方法是循环使用update:

declare @temp table
(
  name_of_pos varchar(128),
  date_from datetime,
  date_to datetime
)

insert into @temp (
    name_of_pos, date_from, date_to
) 
values
    ('Asystent', '2015-08-26 08:57:49', '2015-09-04 08:57:49'),
    ('Biuro', '2015-09-01 08:53:32', '2015-09-01 08:53:32'),
    ('Biuro', '2015-09-02 09:00:41', '2015-09-02 09:00:41'),
    ('Biuro', '2015-09-03 11:46:03', '2015-09-03 11:46:03'),
    ('Biuro', '2015-09-10 09:02:11', '2015-09-15 09:02:11'),
    ('Koordynator', '2015-09-01 09:04:06', '2015-09-01 09:04:06'),
    ('Projektant', '2015-08-31 08:59:46', '2015-09-01 08:59:46'),
    ('Projektant', '2015-09-02 08:00:54', '2015-09-02 08:00:54'),
    ('Projektant', '2015-09-14 12:34:50', '2015-09-14 12:34:50')

----------------------------------------------------------------------------------------------------
declare @temp_new table (
  name_of_pos varchar(128),
  date_from date,
  date_to date
)

insert into @temp_new (
    name_of_pos, date_from, date_to
)
select
    name_of_pos, date_from, date_to
from @temp

while @@rowcount > 0
begin
    update t1 set
        date_to = t2.date_to
    from @temp_new as t1
        inner join @temp_new as t2 on
            t2.name_of_pos = t1.name_of_pos and
            dateadd(dd, 1, t1.date_to) = t2.date_from
end

select name_of_pos, min(date_from), date_to
from @temp_new
group by name_of_pos, date_to
order by name_of_pos, date_to

0
投票

第一个解决方案效果很好,但是我遇到了一个超越它的问题:我不仅需要对连续的日期进行分组,还需要对重叠的时间段进行分组。

返回第一个解决方案,这是一段可运行的代码:

declare @t2 table  (id int, start_date date, end_date date)

insert into @t2 
values
    (1, '2019-01-26', '2019-02-04'),
    (1, '2019-02-05', '2019-02-07'), --- First group
    (1, '2019-02-09', '2019-02-09'),  --- Second group
    (2, '2019-02-01', '2019-02-01'),
    (2, '2019-02-02', '2019-02-09'),
    (2, '2019-02-03', '2019-02-05'),
    (2, '2019-02-07', '2019-02-15')   --- Third group

;with 
cte as (
    SELECT *,
          dateadd( day,
           - (ROW_NUMBER() OVER (
               partition by Id
               ORDER BY t.start_date
              ) +                           -- here starts tuned part --
              isnull( 
               sum( datediff(day, start_date, end_date ) ) OVER (
                partition by Id
                ORDER BY t.start_date
                ROWS BETWEEN UNBOUNDED PRECEDING and 1 PRECEDING
              ) ,0)                         -- here ends tuned part --
           ),
           start_date
          ) as Grp
    FROM @t2 t
)  

SELECT Id
    ,min(start_date) AS start_date
    ,max(end_date) AS end_date
FROM cte
GROUP BY Id, Grp
ORDER BY Id, start_date

结果是:

Id  start_date  end_date
1   2019-01-26  2019-02-07
1   2019-02-09  2019-02-09
2   2019-02-01  2019-02-09
2   2019-02-03  2019-02-05
2   2019-02-07  2019-02-15

我希望他们成为:

Id  start_date  end_date
1   2019-01-26  2019-02-07
1   2019-02-09  2019-02-09
2   2019-02-01  2019-02-15

有什么想法吗?


-2
投票

尝试一次:

SELECT name_of_pos, date_from,date_to
FROM table
ORDER BY
name_of_pos asc, date_from desc;
© www.soinside.com 2019 - 2024. All rights reserved.