我有一个具有200万行的表。我创建了索引(唯一列的表达)。该表达式会将列的值映射到一个较小的数字空间,这意味着该表达式的结果不是唯一的。现在,我想在(该表达式的DISTINCT
)上创建索引。我想要在表达式上再次创建索引的原因是DISTINCT和GROUP BY查询运行缓慢(〜900ms)。
我需要的是这个,
CREATE INDEX idx ON mytable(DISTINCT myColumn|9223372036854775806); //invalid
我已经是,
CREATE INDEX idx ON mytable(myColumn|9223372036854775806);
我的最终目标是使以下查询更快(<100ms)
SELECT id FROM mytable GROUP BY myColumn|9223372036854775806;
当前执行上述查询需要900毫秒
DISTINCT只能用作SELECT的一部分,即它会删除重复的行。索引必须为其涵盖的所有行都包含一个条目,以便DISTINCT会违反索引的目的。
CREATE INDEX idx ON mytable(myColumn|9223372036854775806);
所做的是将索引分为两组,因为|9223372036854775806
会生成922337203685477575806或9223372036854775807,因为DISTINCT将导致2行,例如使用
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable;
结果(从填充有一百万行随机值的表中得出:-
[部分索引,其中使用WHERE子句减少索引行,为此使用WHERE子句,如果查询满足2个规则之一,则选择索引。
从理论上讲,您将使用(仅显示2行):-
CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn = (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
因此,您似乎想确定一种创建索引的方法,例如,您可以使用:-分成10个部分索引。
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10; CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20; CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30; ...... and so on CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90;
在这种情况下:-
EXPLAIN QUERY PLAN
从mytable中的mycolumn中选择mycolumn <9223372036854775806/10 LIMIT 100;
结果:-
即idx10已由查询计划者选择,因为它确定SELECT的WHERE子句适合使用索引ix10。
以上内容部分是通过使用以下代码进行测试而生成的,您可能会发现有帮助:-
DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
CREATE TABLE IF NOT EXISTS mytable (id INTEGER PRIMARY KEY, mycolumn INTEGER DEFAULT (CAST(abs(random()) AS INTEGER)));
CREATE INDEX idx1 ON mytable(myColumn|9223372036854775806);
-- CREATE INDEX idx2 ON mytable(DISTINCT myColumn|9223372036854775806); CANNOT BE USED AS DISTINCT is syntax error.
-- CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn = (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10;
CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20;
CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30;
CREATE INDEX idx40 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 30 AND mycolumn < 9223372036854775806 / 40;
CREATE INDEX idx50 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 40 AND mycolumn < 9223372036854775806 / 50;
CREATE INDEX idx60 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 50 AND mycolumn < 9223372036854775806 / 60;
CREATE INDEX idx70 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 60 AND mycolumn < 9223372036854775806 / 70;
CREATE INDEX idx80 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 70 AND mycolumn < 9223372036854775806 / 80;
CREATE INDEX idx90 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 80 AND mycolumn < 9223372036854775806 / 90;
CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90;
/* Load some data */
WITH RECURSIVE cte(x,y) AS (
SELECT 1,CAST(abs(random()) AS INTEGER)
UNION ALL SELECT x+1,CAST(abs(random()) AS INTEGER) FROM cte LIMIT 1000000
)
INSERT INTO mytable (mycolumn) SELECT y FROM cte;
/* Show some sample data */
SELECT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;
/* LOOK AT WHAT THE QUERY PLANNER COMES UP WITH */
EXPLAIN QUERY PLAN
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable;
EXPLAIN QUERY PLAN
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100;
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806;
EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100;
/* CLEAN UP */
DROP TABLE IF EXISTS mytable;
DROP INDEX IF EXISTS idx1;
DROP INDEX IF EXISTS idx2;
DROP INDEX IF EXISTS idx10;
DROP INDEX IF EXISTS idx20;
DROP INDEX IF EXISTS idx30;
DROP INDEX IF EXISTS idx40;
DROP INDEX IF EXISTS idx50;
DROP INDEX IF EXISTS idx60;
DROP INDEX IF EXISTS idx70;
DROP INDEX IF EXISTS idx80;
DROP INDEX IF EXISTS idx90;
DROP INDEX IF EXISTS idx100;
以下是上述运行中消息和时间的示例:-
DROP TABLE IF EXISTS mytable
> OK
> Time: 1.173s
DROP INDEX IF EXISTS idx1
> OK
> Time: 0s
DROP INDEX IF EXISTS idx2
> OK
> Time: 0s
DROP INDEX IF EXISTS idx10
> OK
> Time: 0s
DROP INDEX IF EXISTS idx20
> OK
> Time: 0s
DROP INDEX IF EXISTS idx30
> OK
> Time: 0s
DROP INDEX IF EXISTS idx40
> OK
> Time: 0s
DROP INDEX IF EXISTS idx50
> OK
> Time: 0s
DROP INDEX IF EXISTS idx60
> OK
> Time: 0s
DROP INDEX IF EXISTS idx70
> OK
> Time: 0s
DROP INDEX IF EXISTS idx80
> OK
> Time: 0s
DROP INDEX IF EXISTS idx90
> OK
> Time: 0s
DROP INDEX IF EXISTS idx100
> OK
> Time: 0s
CREATE TABLE IF NOT EXISTS mytable (id INTEGER PRIMARY KEY, mycolumn INTEGER DEFAULT (CAST(abs(random()) AS INTEGER)))
> OK
> Time: 0.056s
CREATE INDEX idx1 ON mytable(myColumn|9223372036854775806)
> OK
> Time: 0.024s
-- CREATE INDEX idx2 ON mytable(DISTINCT myColumn|9223372036854775806); CANNOT BE USED AS DISTINCT is syntax error.
-- CREATE INDEX idx3 ON mytable(myColumn) WHERE myColumn = (SELECT DISTINCT myColumn|9223372036854775806 FROM mytable);
CREATE INDEX idx10 ON mytable(mycolumn) WHERE mycolumn < 9223372036854775806 / 10
> OK
> Time: 0.024s
CREATE INDEX idx20 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 10 AND mycolumn < 9223372036854775806 / 20
> OK
> Time: 0.024s
CREATE INDEX idx30 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 20 AND mycolumn < 9223372036854775806 / 30
> OK
> Time: 0.024s
CREATE INDEX idx40 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 30 AND mycolumn < 9223372036854775806 / 40
> OK
> Time: 0.024s
CREATE INDEX idx50 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 40 AND mycolumn < 9223372036854775806 / 50
> OK
> Time: 0.024s
CREATE INDEX idx60 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 50 AND mycolumn < 9223372036854775806 / 60
> OK
> Time: 0.024s
CREATE INDEX idx70 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 60 AND mycolumn < 9223372036854775806 / 70
> OK
> Time: 0.024s
CREATE INDEX idx80 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 70 AND mycolumn < 9223372036854775806 / 80
> OK
> Time: 0.024s
CREATE INDEX idx90 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 80 AND mycolumn < 9223372036854775806 / 90
> OK
> Time: 0.024s
CREATE INDEX idx100 ON mytable(mycolumn) WHERE mycolumn >= 9223372036854775806 / 90
> OK
> Time: 0.024s
/* Load some data */
WITH RECURSIVE cte(x,y) AS (
SELECT 1,CAST(abs(random()) AS INTEGER)
UNION ALL SELECT x+1,CAST(abs(random()) AS INTEGER) FROM cte LIMIT 1000000
)
INSERT INTO mytable (mycolumn) SELECT y FROM cte
> Affected rows: 1000000
> Time: 14.096s
/* Show some sample data */
SELECT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100
> OK
> Time: 0.001s
/* LOOK AT WHAT THE QUERY PLANNER COMES UP WITH */
EXPLAIN QUERY PLAN
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable
> OK
> Time: 0s
EXPLAIN QUERY PLAN
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806
> OK
> Time: 0s
SELECT DISTINCT mycolumn,myColumn|9223372036854775806 AS x FROM mytable LIMIT 100
> OK
> Time: 0.001s
SELECT /*(SELECT count() FROM mytable) AS rows,*/*, mycolumn|9223372036854775806 AS x FROM mytable GROUP BY myColumn|9223372036854775806
> OK
> Time: 0.093s
EXPLAIN QUERY PLAN
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100
> OK
> Time: 0s
SELECT mycolumn FROM mytable WHERE mycolumn < 9223372036854775806 / 10 LIMIT 100
> OK
> Time: 0s
/* CLEAN UP */
DROP TABLE IF EXISTS mytable
> OK
> Time: 0.793s
DROP INDEX IF EXISTS idx1
> OK
> Time: 0s
DROP INDEX IF EXISTS idx2
> OK
> Time: 0s
DROP INDEX IF EXISTS idx10
> OK
> Time: 0s
DROP INDEX IF EXISTS idx20
> OK
> Time: 0s
DROP INDEX IF EXISTS idx30
> OK
> Time: 0s
DROP INDEX IF EXISTS idx40
> OK
> Time: 0s
DROP INDEX IF EXISTS idx50
> OK
> Time: 0s
DROP INDEX IF EXISTS idx60
> OK
> Time: 0s
DROP INDEX IF EXISTS idx70
> OK
> Time: 0s
DROP INDEX IF EXISTS idx80
> OK
> Time: 0s
DROP INDEX IF EXISTS idx90
> OK
> Time: 0s
DROP INDEX IF EXISTS idx100
> OK
> Time: 0s