在PostgreSQL中将数据集分为两种方式

问题描述 投票:1回答:1

我有一个大型数据集,我需要以两种不同的方式进行分组。我希望我能够运行一次查询,这样我就不必再运行两个查询了。

我想这可能是使用ROLLUPGROUPING SETS,但我必须承认我不完全明白我如何使用它们。

这是我正在尝试做的一个基本的例子。我试图用一个查询回答的两个问题是:

  • 每个公司每天创建了多少用户?
  • 哪些公司在整个期间创造了最多的用户?排名前五的公司就足够了。
CREATE TABLE IF NOT EXISTS tmp_users (
    id INTEGER NOT NULL,
    name TEXT NOT NULL,
    created TIMESTAMP NOT NULL,
    companyid INTEGER NOT NULL
);

INSERT INTO tmp_users (id, name, created, companyid)
VALUES 
(1, 'Lindsay', '2019-01-01', 1),
(2, 'Michael', '2019-01-02', 1),
(3, 'Stan', '2019-01-04', 3),
(4, 'Gob', '2019-01-04', 1),
(5, 'Buster', '2019-01-01', 1),
(6, 'Lucille', '2019-01-03', 2),
(7, 'Sally', '2019-01-05', 3);

-- Get users created per day, per company
SELECT
    DATE_TRUNC('DAY', created) AS created,
    companyid,
    COUNT(*) AS numberofusers
FROM tmp_users
GROUP BY 
    DATE_TRUNC('DAY', created),
    companyid
ORDER BY DATE_TRUNC('DAY', created) DESC;

-- Users per company, with filter
SELECT
    companyid,
    COUNT(*) AS numberofusers
FROM tmp_users
GROUP BY 
    companyid
HAVING COUNT(*) > 1
ORDER BY COUNT(*) DESC;
sql postgresql postgresql-9.6
1个回答
1
投票

grouping sets可用于在单个Select中返回多个级别的聚合:

-- Get users created per day, per company
SELECT *
FROM
 (
   SELECT
      DATE_TRUNC('DAY', created) AS created,
      companyid,
      Count(*) AS numberofusers,
      Row_Number() -- instead of TOP n
      Over (PARTITION BY CASE WHEN DATE_TRUNC('DAY', created) IS NULL THEN 0 ELSE 1 END 
            ORDER BY Count(*) DESC) AS rn
   FROM tmp_users
   GROUP BY GROUPING SETS
    (
     (DATE_TRUNC('DAY', created), companyid)  -- daily data
     ,companyid                               -- company data
    )
 ) AS dt
WHERE created IS NOT NULL  -- all daily data
   OR rn <= 5              -- plus the TOP 5 companies
ORDER BY created ASC NULLS FIRST;

db<>fiddle

© www.soinside.com 2019 - 2024. All rights reserved.