MongoDB 多键索引性能

问题描述 投票:0回答:2

背景

我有一组具有如下文档结构的用户:

{
    "_id" : ObjectId("54e61137cca5d2ff0a8b4567"),
    "login" : "test1",
    "emails" : [
        {
            "email" : "[email protected]",
            "is_primary" : true,
            "_id" : ObjectId("57baf3e97323afb2688e639c")
        },
        {
            "email" : "[email protected]",
            "is_primary" : false,
            "_id" : ObjectId("57baf3e97323afb2688e639d")
        }
    ]
}

索引:

{
    "v" : 1,
    "key" : {
        "login" : 1
    },
    "name" : "login_1",
    "ns" : "mydb.users",
    "background" : true
},
{
    "v" : 1,
    "key" : {
        "emails.email" : 1
    },
    "name" : "emails.email_1",
    "ns" : "mydb.users"
}

文档数量约为 700000 份

场景

为了解释通过登录搜索用户,我这样做:

rs0:PRIMARY> db.users.explain('executionStats').find({'login' : /test123123123/})
{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "mydb.users",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "login" : /test123123123/
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "inputStage" : {
                "stage" : "IXSCAN",
                "filter" : {
                    "login" : /test123123123/
                },
                "keyPattern" : {
                    "login" : 1
                },
                "indexName" : "login_1",
                "isMultiKey" : false,
                "direction" : "forward",
                "indexBounds" : {
                    "login" : [
                        "[\"\", {})",
                        "[/test123123123/, /test123123123/]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 1040,
        "totalKeysExamined" : 698993,
        "totalDocsExamined" : 0,
        "executionStages" : {
            "stage" : "FETCH",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 930,
            "works" : 698994,
            "advanced" : 0,
            "needTime" : 698993,
            "needFetch" : 0,
            "saveState" : 5460,
            "restoreState" : 5460,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 0,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "filter" : {
                    "login" : /test123123123/
                },
                "nReturned" : 0,
                "executionTimeMillisEstimate" : 920,
                "works" : 698993,
                "advanced" : 0,
                "needTime" : 698993,
                "needFetch" : 0,
                "saveState" : 5460,
                "restoreState" : 5460,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "login" : 1
                },
                "indexName" : "login_1",
                "isMultiKey" : false,
                "direction" : "forward",
                "indexBounds" : {
                    "login" : [
                        "[\"\", {})",
                        "[/test123123123/, /test123123123/]"
                    ]
                },
                "keysExamined" : 698993,
                "dupsTested" : 0,
                "dupsDropped" : 0,
                "seenInvalidated" : 0,
                "matchTested" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "myhost",
        "port" : 27017,
        "version" : "3.0.12",
        "gitVersion" : "33934938e0e95d534cebbaff656cde916b9c3573"
    },
    "ok" : 1
}

你可以看到executionStats.executionStages.inputStage.nReturned是0,executionStats.totalDocsExamined是0。没关系,我猜没有像输入的登录文档。但如果我想通过电子邮件搜索用户,我接下来会做:

rs0:PRIMARY> db.users.explain('executionStats').find({'emails.email' : /test123123123/})
{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "mydb.users",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "emails.email" : /test123123123/
        },
        "winningPlan" : {
            "stage" : "FETCH",
            "filter" : {
                "emails.email" : /test123123123/
            },
            "inputStage" : {
                "stage" : "IXSCAN",
                "keyPattern" : {
                    "emails.email" : 1
                },
                "indexName" : "emails.email_1",
                "isMultiKey" : true,
                "direction" : "forward",
                "indexBounds" : {
                    "emails.email" : [
                        "[\"\", {})",
                        "[/test123123123/, /test123123123/]"
                    ]
                }
            }
        },
        "rejectedPlans" : [ ]
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 7666,
        "totalKeysExamined" : 699016,
        "totalDocsExamined" : 698993,
        "executionStages" : {
            "stage" : "FETCH",
            "filter" : {
                "emails.email" : /test123123123/
            },
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 7355,
            "works" : 699017,
            "advanced" : 0,
            "needTime" : 699016,
            "needFetch" : 0,
            "saveState" : 5462,
            "restoreState" : 5462,
            "isEOF" : 1,
            "invalidates" : 0,
            "docsExamined" : 698993,
            "alreadyHasObj" : 0,
            "inputStage" : {
                "stage" : "IXSCAN",
                "nReturned" : 698993,
                "executionTimeMillisEstimate" : 1630,
                "works" : 699016,
                "advanced" : 698993,
                "needTime" : 23,
                "needFetch" : 0,
                "saveState" : 5462,
                "restoreState" : 5462,
                "isEOF" : 1,
                "invalidates" : 0,
                "keyPattern" : {
                    "emails.email" : 1
                },
                "indexName" : "emails.email_1",
                "isMultiKey" : true,
                "direction" : "forward",
                "indexBounds" : {
                    "emails.email" : [
                        "[\"\", {})",
                        "[/test123123123/, /test123123123/]"
                    ]
                },
                "keysExamined" : 699016,
                "dupsTested" : 699016,
                "dupsDropped" : 23,
                "seenInvalidated" : 0,
                "matchTested" : 0
            }
        }
    },
    "serverInfo" : {
        "host" : "myhost",
        "port" : 27017,
        "version" : "3.0.12",
        "gitVersion" : "33934938e0e95d534cebbaff656cde916b9c3573"
    },
    "ok" : 1
}

这里executionStats.executionStages.inputStage.nReturned(和executionStats.totalDocsExamined)等于698993(executionStats.nReturned是0,就像第一个查询一样)

问题

为什么当我在 ixscan 阶段使用多键索引 (users.user) 进行搜索时,会返回所有集合,而 fetch 阶段会出现所有集合。但是,如果我使用非多键索引(登录)搜索,ixscan 阶段会扫描预期值,并在获取阶段给出我想要的内容。

UPD:当我使用正则表达式而不是 /smth/ 时,而是 /^smth/ 然后通过 emails.email 字段扫描也返回 0 个元素。为什么多键和普通索引对于像 /smth/ 这样的正则表达式给出不同的结果?

mongodb mongodb-query database-indexes mongodb-indexes multikey
2个回答
1
投票

因为是多键索引。 在这里解释

当查询过滤器指定整个数组的精确匹配时,MongoDB 可以使用多键索引来查找查询数组的第一个元素,但不能使用多键索引扫描来查找整个数组。相反,在使用多键索引查找查询数组的第一个元素后,MongoDB 会检索关联的文档并筛选其数组与查询中的数组匹配的文档。


0
投票

当我使用正则表达式时,不是/smth/,而是/^smth/,然后扫描 通过 emails.email 字段也返回 0 个元素。为什么多键和 普通索引给我不同的正则表达式结果,例如 /smth/

这是因为 MongoDB 能够优化搜索,因为后一个查询包含一个锚点,特别是它应该以给定的字符串开头。因此它可以构造一个已索引的范围并很快找不到结果。否则,仅使用“包含”之类的搜索,无法完成此类优化(不会构建索引元素范围),因此 MongoDB 必须使用全集合扫描(返回所有元素)。请参阅https://stackoverflow.com/a/33219393/1527469

© www.soinside.com 2019 - 2024. All rights reserved.