SQL 日期窗口重置后

问题描述 投票:0回答:1

我正在查看用户的一系列会话日期,并且尝试“折叠”事件计数,即 7 天时间窗口中发生的任何事件仅计数一次,从第一个事件开始,然后最初的 7 天结束后再次开始,新的活动(和 7 天的窗口)再次开始。

我试图找出一个逻辑,其中计数“中断”并重置窗口。谢谢!

WITH data1 AS (
    SELECT 'bob' AS user, DATE('2024-04-08' AS session_date
    UNION ALL
    SELECT 'bob', DATE('2024-04-11')
    UNION ALL
    SELECT 'bob', DATE('2024-04-12')
    UNION ALL
    SELECT 'bob', DATE('2024-04-17')
    UNION ALL
    SELECT 'bob', DATE('2024-04-18')
    UNION ALL
    SELECT 'bob', DATE('2024-04-22')
    UNION ALL
    SELECT 'bob', DATE('2024-04-23')
    UNION ALL
    SELECT 'bob', DATE('2024-04-25')
    
)

SELECT
    *,
    DATE_ADD(session_date, INTERVAL 7 day) AS session_date_end,
    COUNT(session_date) OVER (PARTITION BY user
                ORDER BY UNIX_DATE(session_date)
                RANGE BETWEEN CURRENT ROW AND 6 FOLLOWING) 
            AS next_7_days
FROM data1
sql google-bigquery window-functions
1个回答
0
投票

以下查询仅适用于窗口函数和 CTE。如果用户从日期 X 开始每周至少登录一次,我们会将所有登录事件分成从该日期 X 开始的 7 天的块。假设用户从星期一开始每天登录,持续一个月。然后用户两天没有访问,并在周三再次登录并每天回来。由于休息时间还不到一周,我们仍然会从周一开始寻找 7 天范围内的登录事件。由于休息时间太短,因此日期间隔未更改,因此测量的这些天的活动较少。

WITH data1 AS (
    SELECT 'bob' AS user, DATE('2024-04-08') AS session_date
    UNION ALL
    SELECT 'bob', DATE('2024-04-11')
    UNION ALL
    SELECT 'bob', DATE('2024-04-12')
    UNION ALL
    SELECT 'bob', DATE('2024-04-17')
    UNION ALL
    SELECT 'bob', DATE('2024-04-18')
    UNION ALL
    SELECT 'bob', DATE('2024-04-22')
    UNION ALL
    SELECT 'bob', DATE('2024-04-23')
    UNION ALL
    SELECT 'bob', DATE('2024-04-25')
     UNION ALL
    SELECT 'bob', DATE('2024-01-25')
      UNION ALL
    SELECT 'bob', DATE('2024-01-29')
),

tbl1 as ( -- check if the last 7 days entries happend. 
  SELECT *, count(1) over win1 as temp1
  from data1
  window win1 as (partition by  user  ORDER BY UNIX_DATE(session_date) RANGE BETWEEN 8 preceding and 1 preceding )
),
tbl2 as ( -- check if the last 7 days entry happend. If not, then then increase main_session
  SELECT *, 
  countif(temp1=0) over win2 as main_session,
  from tbl1
  window win2 as (partition by  user  ORDER BY UNIX_DATE(session_date) RANGE BETWEEN unbounded preceding and CURRENT ROW  )
),
tbl3 as ( -- for each main_session get the starting date
SELECT *,
min(session_date) over win3 as main_session_date_start
from tbl2
window win3 as (partition by user,main_session)

),
tbl4 as ( -- count the day between starting date and the event
SELECT
    *,
    date_diff(session_date,main_session_date_start,day) as diff,
FROM tbl3)
,
tbl5 as ( -- divide by 7 days and round down. temp4 is the id the of session
  SELECT *,
  div(diff, 7) as temp3,
  main_session ||'-'||  div(diff, 7) as temp4
  from tbl4
),
tbl6 as ( -- get the last entry
  SELECT *, lag(temp4) over win2 as temp5 from tbl5
   window win2 as (partition by  user  ORDER BY UNIX_DATE(session_date))
),
tbl7 as ( -- if the temp4 is changing, then a new session started
  SELECT *, countif(ifnull(temp5!=temp4,true)) over win2 as session_counter
  from tbl6
  window win2 as (partition by  user  ORDER BY UNIX_DATE(session_date) RANGE BETWEEN unbounded preceding and CURRENT ROW  )
)


#SELECT * from tbl7

SELECT 
date_Add(main_session_date_start, interval 7*temp3 day)as session_start_end,
date_Add(main_session_date_start, interval 7*(1+temp3) day)as session_date_end,
*
from tbl7
© www.soinside.com 2019 - 2024. All rights reserved.