CREATE TABLE sku_usage (
-- create a regular table
time timestamptz NOT NULL,
device_id uuid NOT NULL,
loyalty_program_id text NOT NULL,
sku text NOT NULL,
quantity double precision NOT NULL,
transaction_id text NOT NULL,
PRIMARY KEY (time, device_id, sku, transaction_id)
);
SELECT
create_hypertable ('sku_usage', 'time');
CREATE INDEX ix_device_sku_time ON sku_usage (time, device_id, sku_id DESC);
CREATE MATERIALIZED VIEW sku_usage_hourly
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
device_id,
sku,
SUM(quantity)
FROM sku_usage
GROUP BY bucket, device_id, sku
WITH NO DATA;
SELECT add_continuous_aggregate_policy('sku_usage_hourly',
start_offset => INTERVAL '3 hours',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour');
然后我投入了 2000 万行
当我运行此查询时
(
SELECT
time_bucket_gapfill ('1 hour', bucket) AS time_bucket,
device_id,
sku,
coalesce(max(sum), 0) AS usage
FROM
sku_usage_hourly
WHERE
bucket < now() - INTERVAL '100 days'
AND bucket > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
GROUP BY
time_bucket,
device_id,
sku
);
运行大约需要 6-6.5 秒(0.5 CPU,2 GB RAM)。我很震惊它这么慢。它只生产约 2200 行。
如果我这样做
select count(*) from sku_usage where
time < now() - INTERVAL '100 days'
AND time > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
它在 400 毫秒内返回大约 1000 条记录。如果我删除设备和 sku 条件,则只有 ~3500000
为什么我的
time_bucket_gapfill
这么慢?我该如何改进?
我的理解是物化视图应该有索引(bucket、device_id、sku)。是不是没有正确使用呢?还是
time_bucket_gapfill
做了什么奇怪的事?
谢谢
编辑:将我的查询更改为
SELECT
device_id,
FROM
sku_usage_hourly
WHERE
bucket < now() - INTERVAL '100 days'
AND bucket > now() - INTERVAL '110 days'
AND device_id IN ('111109ee-aaaa-4444-8765-123450273214')
AND sku IN ('10-49-0920')
仍然需要6秒。所以这不是 time_bucket_gapfill。
您能分享一下您在查询规划器中的内容吗?
您是否尝试过将
AND device_id IN (...)
更改为 AND device_id =
?
与使用
IN
的其他参数相同。