我使用下面的查询从表 Schedule 创建物化视图 Schedule_state
CREATE MATERIALIZED VIEW schedule_state AS (
WITH schedule_latest_events AS (
SELECT
*,
row_number() over (
PARTITION BY key_id
ORDER BY
header_event_timestamp DESC,
ktimestamp DESC,
raw_load_timestamp DESC,
update_timestamp DESC
) AS row_number
FROM
schedule
)
SELECT
*
FROM
schedule_latest_events
WHERE
row_number = 1
);
由于我使用多列进行排序,有什么方法可以找出哪一列用于对排名为 1 的列进行排序?
还可以获取四个按列排序的最大值并将结果分配给物化视图中的列吗? 谢谢!
Table schedule
key_id, header_event_timestamp, ktimestamp, raw_load_timestamp, update_timestamp
k1, 2023-12-22 08:50:59.930000, 2023-12-22 08:50:59.930000, 2023-12-22 08:52:36.960000, 2023-12-22 08:50:58.100000
k1, 2023-12-22 08:50:37.530000, 2023-12-22 08:50:37.530000, 2023-12-22 08:52:36.960000, 2023-12-22 06:41:02.483000
k2, 2023-12-22 06:41:03.080000, 2023-12-22 06:41:03.080000, 2023-12-22 06:52:33.189000, 2023-12-22 06:41:02.483000
k2, 2023-12-22 06:41:03.080000, 2023-12-22 06:45:03.060000, 2023-12-22 06:52:33.189000, 2023-12-22 06:41:02.483000
k2, 2023-12-22 06:41:03.080000, 2023-12-20 15:09:36.370000, 2023-12-21 02:52:37.557000, 2023-12-20 14:04:37.323000
K3, 2023-12-20 14:39:00.909832, 2023-12-20 14:39:00.909000, 2023-12-21 02:52:37.557000, 2023-12-20 14:04:37.323000
K3, 2023-12-20 14:39:00.909832, 2023-12-20 14:39:00.909000, 2023-12-21 02:55:37.557000, 2023-12-20 14:04:37.323000
k4, 2023-12-22 06:41:03.080000, 2023-12-22 06:41:03.080000, 2023-12-22 06:52:33.189000, 2023-12-22 06:41:02.483000
k4, 2023-12-22 06:41:03.080000, 2023-12-22 06:41:03.080000, 2023-12-22 05:52:33.189000, 2023-12-22 06:41:02.483000
k4, 2023-12-22 06:41:03.080000, 2023-12-22 05:41:03.080000, 2023-12-22 05:52:33.189000, 2023-12-22 06:41:02.483000
k4, 2023-12-22 05:41:03.080000, 2023-12-22 05:41:03.080000, 2023-12-22 05:52:33.189000, 2023-12-22 06:41:02.483000
MATERIALIZED VIEW schedule_state
key_id, header_event_timestamp, ktimestamp, raw_load_timestamp, update_timestamp row_number
k1, 2023-12-22 08:50:59.930000, 2023-12-22 08:50:59.930000, 2023-12-22 08:52:36.960000, 2023-12-22 08:50:58.100000 1
k2, 2023-12-22 06:41:03.080000, 2023-12-22 06:45:03.060000, 2023-12-22 06:52:33.189000, 2023-12-22 06:41:02.483000 1
K3, 2023-12-20 14:39:00.909832, 2023-12-20 14:39:00.909000, 2023-12-21 02:55:37.557000, 2023-12-20 14:04:37.323000 1
k4, 2023-12-22 06:41:03.080000, 2023-12-22 06:41:03.080000, 2023-12-22 06:52:33.189000, 2023-12-22 06:41:02.483000 1
在表中
可以确定定义顺序的多个列中的第一列,以区分前 2 条记录。
我已经准备好了下面的完整示例,我立即评论:
Schedule
CTE,您需要将其删除才能获得真正的查询。key_id = ksingle
的记录,以说明当一条记录单独存在时会发生什么情况。key_id = kequal
的两条记录是相同的,以说明完美平局时会发生什么。WINDOW
子句中,以避免重复 + 为了清楚起见,将它们放在一起。nth_value
,带参数 2
)。array_position
,返回第一次出现的 false
。WITH Schedule(key_id, header_event_timestamp, ktimestamp, raw_load_timestamp, update_timestamp) AS ( VALUES
('ksingle', '2023-12-22 08:50:59.930000', '2023-12-22 08:50:59.930000', '2023-12-22 08:52:36.960000', '2023-12-22 08:50:58.100000'),
('kequal', '2023-12-22 08:50:59.930000', '2023-12-22 08:50:59.930000', '2023-12-22 08:52:36.960000', '2023-12-22 08:50:58.100000'),
('kequal', '2023-12-22 08:50:59.930000', '2023-12-22 08:50:59.930000', '2023-12-22 08:52:36.960000', '2023-12-22 08:50:58.100000'),
('k1', '2023-12-22 08:50:59.930000', '2023-12-22 08:50:59.930000', '2023-12-22 08:52:36.960000', '2023-12-22 08:50:58.100000'),
('k1', '2023-12-22 08:50:37.530000', '2023-12-22 08:50:37.530000', '2023-12-22 08:52:36.960000', '2023-12-22 06:41:02.483000'),
('k2', '2023-12-22 06:41:03.080000', '2023-12-22 06:41:03.080000', '2023-12-22 06:52:33.189000', '2023-12-22 06:41:02.483000'),
('k2', '2023-12-22 06:41:03.080000', '2023-12-22 06:45:03.060000', '2023-12-22 06:52:33.189000', '2023-12-22 06:41:02.483000'),
('k2', '2023-12-22 06:41:03.080000', '2023-12-20 15:09:36.370000', '2023-12-21 02:52:37.557000', '2023-12-20 14:04:37.323000'),
('k3', '2023-12-20 14:39:00.909832', '2023-12-20 14:39:00.909000', '2023-12-21 02:52:37.557000', '2023-12-20 14:04:37.323000'),
('k3', '2023-12-20 14:39:00.909832', '2023-12-20 14:39:00.909000', '2023-12-21 02:55:37.557000', '2023-12-20 14:04:37.323000'),
('k4', '2023-12-22 06:41:03.080000', '2023-12-22 06:41:03.080000', '2023-12-22 06:52:33.189000', '2023-12-22 06:41:02.483000'),
('k4', '2023-12-22 06:41:03.080000', '2023-12-22 06:41:03.080000', '2023-12-22 05:52:33.189000', '2023-12-22 06:41:02.483000'),
('k4', '2023-12-22 06:41:03.080000', '2023-12-22 05:41:03.080000', '2023-12-22 05:52:33.189000', '2023-12-22 06:41:02.483000'),
('k4', '2023-12-22 05:41:03.080000', '2023-12-22 05:41:03.080000', '2023-12-22 05:52:33.189000', '2023-12-22 06:41:02.483000')
), schedule_latest_events AS (
SELECT *,
ROW_NUMBER() OVER w1 AS row_number,
nth_value(header_event_timestamp, 2) OVER w2 AS next_header_event_timestamp,
nth_value(ktimestamp, 2) OVER w2 AS next_ktimestamp,
nth_value(raw_load_timestamp, 2) OVER w2 AS next_raw_load_timestamp,
nth_value(update_timestamp, 2) OVER w2 AS next_update_timestamp
FROM schedule
WINDOW
w1 AS (PARTITION BY key_id ORDER BY header_event_timestamp DESC, ktimestamp DESC, raw_load_timestamp DESC, update_timestamp DESC),
w2 AS (PARTITION BY key_id ORDER BY header_event_timestamp DESC, ktimestamp DESC, raw_load_timestamp DESC, update_timestamp DESC ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
)
SELECT *,
header_event_timestamp = next_header_event_timestamp AS test_header_event_timestamp,
ktimestamp = next_ktimestamp AS test_ktimestamp,
raw_load_timestamp = next_raw_load_timestamp AS test_raw_load_timestamp,
update_timestamp = next_update_timestamp AS test_update_timestamp,
array_position(ARRAY[
header_event_timestamp = next_header_event_timestamp,
ktimestamp = next_ktimestamp,
raw_load_timestamp = next_raw_load_timestamp,
update_timestamp = next_update_timestamp
], 'false') AS first_different_column
FROM schedule_latest_events
WHERE row_number = 1
由于我进行查询是为了说明表示您想要的结果的几种方法,因此上述查询将返回给您的内容存在一些冗余。我让你删除不想保留的列。