我正在探索 BigQuery 过程。 我最担心的问题之一是 即使是除了单个局部变量之外不能与其他任何东西一起使用的纯代码也非常慢。
declare i int64 default 0;
declare l int64 default 1000;
declare started timestamp;
declare ended timestamp;
set started = current_timestamp();
WHILE i < l DO
set i = i + 1;
END WHILE;
set ended = current_timestamp();
select i, l, started, ended, timestamp_diff(ended, started, second);
-- 12 ms per iteration!!!
-- note that job scheduling is not included
上面的代码不触及任何表,也不会导致任何 IO 或 CPU 缓存未命中。
如何让代码更快?
将任何计算放入 Select SQL 查询中。可以使用JavaScript UDF。
declare i int64 default 0;
declare l int64 default 19000;
declare started timestamp;
declare ended timestamp;
set started = current_timestamp();
create temp function fun(i int64,l int64)
returns int64
language js as
"""
var tmp=i;
while(tmp < l ){
tmp=tmp*1+1;
}
return tmp;
""";
SET i = (Select fun(i,l) );
set ended = current_timestamp();
select i, l, started, ended, timestamp_diff(ended, started, second);