我运行以下查询:
create table c.hello as
select a.*, b.timestamp, b.alert
from nice a
left join bye b
on a.number = b.number_nb
where (Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) >= 0)
and (Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) <= 86400)
or b.alert_timestamp is null;
为什么我的hello表返回的记录少于我的漂亮表具有的记录?我该如何解决此问题,我希望保留表中的所有记录。我以为我在where子句中的OR语句将处理此问题,不确定为什么不这样做。我知道一个where子句将查询转换为内部联接的事实,但是我认为with子句with为null应该可以解决该问题。您能帮忙吗?
因为您在WHERE
子句中放置了一个条件,所以正在过滤掉行。
将该条件作为JOIN
谓词的一部分,如:
create table c.hello as
select a.*, b.timestamp, b.alert
from nice a
left join bye b
on a.number = b.number_nb
and (Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) >= 0)
and (Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) <= 86400)
or b.alert_timestamp is null;
您看到区别了吗?
您可以通过将相关条件移到ON
子句来解决此问题:
create table c.hello as
select a.*, b.timestamp, b.alert
from nice a left join
bye b
on a.number = b.number_nb and
(Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) >= 0) and
(Unix_Timestamp(a.time) - Unix_Timestamp(b.timestamp) <= 86400) ;
您的代码失败的原因是a
中的某些行具有b
中的匹配行,它们不满足任何时序条件。因此,不能满足两个时间限制。并且该值不是null
,因为存在匹配的行。
拥有一些最小的可复制示例总是有帮助的。让我们考虑以下示例:
with A as (
select stack (3,
1, '2019-11-10 10:10:10.000',
2, '2019-12-10 10:10:10.000',
4, '2019-25-10 10:10:10.000'
) as (number_nb,ts)
),
B as (
select stack (2,
1, '2019-11-10 10:10:10.000',
2, '2019-25-10 10:10:10.000' --big difference in timestamp here
) as (number_nb,ts)
)
select a_number_nb, a_ts,
b_number_nb, b_ts,
ts_diff
from
(
select a.number_nb as a_number_nb,a.ts as a_ts, b.number_nb as b_number_nb, b.ts as b_ts, Unix_Timestamp(a.ts) - Unix_Timestamp(b.ts) as ts_diff
from A a
left join B b on a.number_nb=b.number_nb
)s;
返回:
a_number_nb a_ts b_number_nb b_ts ts_diff
1 2019-11-10 10:10:10.000 1 2019-11-10 10:10:10.000 0
2 2019-12-10 10:10:10.000 2 2019-25-10 10:10:10.000 -34300800
4 2019-25-10 10:10:10.000 NULL NULL NULL
现在,如果您添加WHERE子句
where (ts_diff between 0 and 86400) or b_number_nb is NULL
甚至是这个:
where (ts_diff between 0 and 86400) or ts_diff is NULL
结果是:
a_number_nb a_ts b_number_nb b_ts ts_diff
1 2019-11-10 10:10:10.000 1 2019-11-10 10:10:10.000 0
4 2019-25-10 10:10:10.000 NULL NULL NULL
由于未满足(0到86400之间的ts_diff),第二行被滤除。
如果您实际上想要此结果:
a_number_nb a_ts b_number_nb b_ts
1 2019-11-10 10:10:10.000 1 2019-11-10 10:10:10.000
2 2019-12-10 10:10:10.000 NULL NULL
4 2019-25-10 10:10:10.000 NULL NULL
然后删除WHERE条件,改为使用case语句:
... --Use same CTEs as in first example, skipped
select a_number_nb, a_ts,
case when (ts_diff between 0 and 86400) then b_number_nb end b_number_nb , --NULL if ts_diff NOT between 0 and 86400
case when (ts_diff between 0 and 86400) then b_ts end b_ts
--ts_diff
from
(
select a.number_nb as a_number_nb,a.ts as a_ts, b.number_nb as b_number_nb, b.ts as b_ts, Unix_Timestamp(a.ts) - Unix_Timestamp(b.ts) as ts_diff
from A a
left join B b on a.number_nb=b.number_nb
)s