我一直在尝试解决以下问题 https://www.hackerrank.com/challenges/15-days-of-learning-sql/problem?isFullScreen=true 但看起来似乎一直在寻找在给定的开始日期之前按顺序的每个日期提交的 hacker_ids 的数量。下面是解决方案 max_submissions 的 2 个版本,如果多个最大日期正确,则给出每个日期的最大提交计数,但在最终的计数查询中,我无法获得正确的计数,它给出的计数为 35对于每个 hacker_id 每天提交的所有日期。只有第二列是唯一的黑客在输出中计数,我无法获得我得到的所有值或其他值的计数值 35,这似乎与预期输出不同,但从逻辑上看似乎是正确的
with max_submissions
as
(
Select t.submission_date,t.hacker_id,t.cnt,h.name From
(Select * from
(Select submission_date, hacker_id, cnt, dense_rank() over (partition by submission_date order by cnt desc,hacker_id asc) as rn
from
(Select
submission_date, hacker_id, count(submission_id) cnt
from
submissions
where submission_date between '2016-03-01' and '2016-03-15'
group by submission_date, hacker_id
)
)where rn =1
) t join
hackers h on t.hacker_id=h.hacker_id
),
t1
as
(
select hacker_id
from
(
Select
hacker_id, lead(submission_date) over ( order by hacker_id,submission_date)
-submission_date cnt
from
submissions
where submission_date between '2016-03-01' and '2016-03-15'
order by hacker_id asc, submission_date asc)
group by hacker_id having sum(case when cnt=1 then 1 else 0 end) =14)
select s.submission_date,count( t1.hacker_id)
from submissions s
join
t1 on
s.hacker_id=t1.hacker_id
group by s.submission_date;
这应该会给你正确的结果:
WITH calendar (day) AS (
-- Generate a calendar so we don't need to assume that there will always be a submission
-- every day.
SELECT DATE '2016-03-01' + LEVEL - 1 AS day
FROM DUAL
CONNECT BY LEVEL <= 15
),
daily_hacker_submissions (submission_date, hacker_id, num_submissions) AS (
-- Find the number of submissions for hackers on each day.
SELECT c.day,
hacker_id,
COUNT(*) AS num_submissions
FROM calendar c
LEFT OUTER JOIN submissions s
ON (
-- Don't assume dates are always midnight.
c.day <= s.submission_date
AND s.submission_date < c.day + 1
)
GROUP BY
c.day,
s.hacker_id
),
daily_submissions (submission_date, num_hackers, hacker_id ) AS (
-- Find the number of hackers on each day and the hacker with the greatest number of
-- submissions and the least hacker id.
SELECT submission_date,
COUNT(DISTINCT hacker_id),
MIN(hacker_id) KEEP (DENSE_RANK LAST ORDER BY num_submissions)
FROM daily_hacker_submissions
GROUP BY
submission_date
)
-- Include the hacker's name
SELECT d.submission_date,
d.num_hackers,
d.hacker_id,
h.name
FROM daily_submissions d
LEFT OUTER JOIN hackers h
ON (d.hacker_id = h.hacker_id)
对于样本数据:
CREATE TABLE submissions (submission_date, submission_id, hacker_id, score) AS
SELECT DATE '2016-03-01', 1, 1, 80 FROM DUAL UNION ALL
SELECT DATE '2016-03-01', 2, 1, 90 FROM DUAL UNION ALL
SELECT DATE '2016-03-01', 3, 1, 100 FROM DUAL UNION ALL
SELECT DATE '2016-03-01', 4, 2, 90 FROM DUAL UNION ALL
SELECT DATE '2016-03-01', 5, 2, 100 FROM DUAL UNION ALL
SELECT DATE '2016-03-02', 6, 1, 100 FROM DUAL UNION ALL
SELECT DATE '2016-03-02', 7, 2, 90 FROM DUAL UNION ALL
SELECT DATE '2016-03-02', 8, 2, 100 FROM DUAL UNION ALL
SELECT DATE '2016-03-02', 9, 3, 80 FROM DUAL UNION ALL
SELECT DATE '2016-03-02', 10, 3, 100 FROM DUAL;
CREATE TABLE hackers (hacker_id, name) AS
SELECT 1, 'Alice' FROM DUAL UNION ALL
SELECT 2, 'Betty' FROM DUAL UNION ALL
SELECT 3, 'Carol' FROM DUAL;
输出:
提交_日期 NUM_HACKERS HACKER_ID 姓名 2016-03-01 00:00:00 2 1 爱丽丝 2016-03-02 00:00:00 3 2 贝蒂 2016-03-03 00:00:00 0 空 空 ... ... ... ...
db<>小提琴这里
深入研究递归查询后能够解决这个问题。下面是它的代码
with dat as
(
select a.*,h.name
from
(
select submission_date,min(hacker_id) mn from
(
select submission_date,hacker_id,cnt, rank() over ( partition by submission_date order by submission_date asc, cnt desc) rn from
(
select submission_date,hacker_id,count(distinct submission_id) cnt
from
submissions
group by submission_date,hacker_id
)
) where rn=1
group by submission_date
order by submission_date
) a join
hackers h
on a.mn=h.hacker_id
)
,
dat1( submission_date, hacker_id,lev) as
(
select submission_date, hacker_id, 0 as lev
from submissions
WHERE submission_date='2016-03-01'
Union all
select s.submission_date, s.hacker_id, lev+1
from submissions s
join Dat1 d on
s.hacker_id =d.hacker_id
and s.submission_date=d.submission_date+1
)
,
dat2 as
(
select submission_date,count(distinct hacker_id) cnt from dat1
group by submission_date order by submission_date)
Select d.submission_date,d1.cnt,d.mn,d.name
from
(Select * from dat) d join
(Select * From dat2) d1
on d.submission_date=d1.submission_date
;
不使用递归CTE
WITH join_tables_sub_hacker AS (
-- Joining both tables : submission and hacker
SELECT
hacker_id,
hacker_name,
submission_date,
submission_id
FROM
(
SELECT
h.hacker_id AS hacker_id,
h.name AS hacker_name,
s.submission_date AS submission_date,
s.submission_id AS submission_id
FROM
submissions s
JOIN hackers h ON s.hacker_id = h.hacker_id
ORDER BY
s.submission_date,
h.name
)
), day_number AS (
-- daily participated day number
SELECT
hacker_id,
hacker_name,
submission_date,
submission_id,
DENSE_RANK()
OVER(
ORDER BY
submission_date
) AS day_num
FROM
join_tables_sub_hacker
), each_day_participated AS (
-- daily participated day number
SELECT
hacker_id,
hacker_name,
submission_date,
submission_id,
DENSE_RANK()
OVER(PARTITION BY hacker_id, hacker_name
ORDER BY
submission_date
) AS each_day
FROM
join_tables_sub_hacker
), get_cnt_of_hker_each_day AS (
SELECT
sdt AS submission_date,
COUNT(DISTINCT hid) AS no_of_count_participated
FROM
(
SELECT
a.hacker_id AS hid,
a.hacker_name AS hname,
b.submission_date AS sdt
FROM
day_number a
JOIN each_day_participated b ON a.hacker_id = b.hacker_id
AND a.hacker_name = b.hacker_name
AND a.submission_date = b.submission_date
AND a.day_num = b.each_day
ORDER BY
a.submission_date,
a.hacker_name
)
GROUP BY
sdt
ORDER BY
sdt
), get_submission_count AS (
SELECT
submission_date,
MIN(hacker_id) AS mhcker,
COUNT(*) AS each_count
FROM
submissions
GROUP BY
submission_date,
hacker_id
ORDER BY
submission_date,
hacker_id
), get_each_date_max_sub_count AS (
SELECT
submission_date,
MAX(each_count) AS mxcnt
FROM
get_submission_count
GROUP BY
submission_date
ORDER BY
submission_date
), final_data_min_hacker AS (
SELECT
sdt,
hk
FROM
(
SELECT
sdt,
hk,
ecnt,
mcnt,
ROW_NUMBER()
OVER(PARTITION BY sdt
ORDER BY
hk ASC
) AS rn
FROM
(
SELECT
a.submission_date AS sdt,
a.mhcker AS hk,
a.each_count AS ecnt,
b.mxcnt AS mcnt
FROM
get_submission_count a
JOIN get_each_date_max_sub_count b ON a.each_count = b.mxcnt
AND a.submission_date = b.submission_date
ORDER BY
a.submission_date,
a.mhcker ASC
)
)
WHERE
rn = 1
)
SELECT
a.submission_date,
a.no_of_count_participated,
f.hk,
h.name
FROM
get_cnt_of_hker_each_day a
JOIN final_data_min_hacker f ON a.submission_date = f.sdt
JOIN hackers h ON f.hk = h.hacker_id
ORDER BY
a.submission_date;
输出:
同样可以通过使用递归 CTE 来解决,但该解决方案不被 hackerrank 接受。可能他们正在使用旧版本的 Oracle。
具有递归CTE
WITH rcte (
hacker_id,
submission_date
) AS (
SELECT
hacker_id,
submission_date
FROM
submissions
WHERE
trunc(submission_date) = (
SELECT
MIN(trunc(submission_date))
FROM
submissions
)
UNION ALL
SELECT
r.hacker_id,
trunc(e.submission_date)
FROM
rcte r
JOIN submissions e ON e.hacker_id = r.hacker_id
AND trunc(e.submission_date) = trunc(r.submission_date + 1)
), get_count_participated AS (
SELECT
submission_date,
COUNT(DISTINCT hacker_id) AS no_of_count_participated
FROM
rcte
GROUP BY
submission_date
ORDER BY
submission_date
), get_submission_count AS (
SELECT
submission_date,
MIN(hacker_id) AS mhcker,
COUNT(*) AS each_count
FROM
submissions
GROUP BY
submission_date,
hacker_id
ORDER BY
submission_date,
hacker_id
), get_each_date_max_sub_count AS (
SELECT
submission_date,
MAX(each_count) AS mxcnt
FROM
get_submission_count
GROUP BY
submission_date
ORDER BY
submission_date
), final_data_min_hacker AS (
SELECT
sdt,
hk
FROM
(
SELECT
sdt,
hk,
ecnt,
mcnt,
ROW_NUMBER()
OVER(PARTITION BY sdt
ORDER BY
hk ASC
) AS rn
FROM
(
SELECT
a.submission_date AS sdt,
a.mhcker AS hk,
a.each_count AS ecnt,
b.mxcnt AS mcnt
FROM
get_submission_count a
JOIN get_each_date_max_sub_count b ON a.each_count = b.mxcnt
AND a.submission_date = b.submission_date
ORDER BY
a.submission_date,
a.mhcker ASC
)
)
WHERE
rn = 1
)
SELECT
a.submission_date,
a.no_of_count_participated,
f.hk,
h.name
FROM
get_count_participated a
JOIN final_data_min_hacker f ON a.submission_date = f.sdt
JOIN hackers h ON f.hk = h.hacker_id
ORDER BY
a.submission_date;
输出: