查询文本搜索索引上的复合文本搜索和时间戳 mongodb

问题描述 投票:1回答:1

我有一个藏品,共有文件约6500万条这样的记录。

    {
    "_id" : ObjectId("5e0b814660da38d499ecf178"),
    "brands" : null,
    "client_id" : null,
    "code_co_owner" : ",7359562, ",
    "code_segment" : "7359562",
    "core" : "",
    "created" : "01-01-2020",
    "created_full" : "01-01-2020 00:00:27",
    "created_int" : NumberLong(1577811627),
    "email" : ",phamthanhlam17_gmail_com, "
.....
}

我做了一个复合索引(email, created_int): {"email": text, created_int: -1},用于搜索和过滤在created_int范围内的名字,但我看到它的搜索性能很差。

我试着在查询中使用解释。

    db.getCollection('profile_20201').explain().find({"$text":{"$search":"phamthanhlam17_gmail_com"},
"created_int":{"$lte":1585627013, "$gte":1583035013}}).count()

解释的结果是:

{
    "queryPlanner" : {
        "plannerVersion" : 1,
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [ 
                {
                    "created_int" : {
                        "$lte" : 1585627013.0
                    }
                }, 
                {
                    "created_int" : {
                        "$gte" : 1583035013.0
                    }
                }, 
                {
                    "$text" : {
                        "$search" : "phamthanhlam17_gmail_com",
                        "$language" : "english",
                        "$caseSensitive" : false,
                        "$diacriticSensitive" : false
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "TEXT",
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {}
                            }
                        }
                    }
                }
            }
        },
        "rejectedPlans" : []
    },
    "serverInfo" : {
    },
    "ok" : 1.0
}

是explain stats:

    "queryPlanner" : {
        "plannerVersion" : 1,
        "namespace" : "namespace",
        "indexFilterSet" : false,
        "parsedQuery" : {
            "$and" : [ 
                {
                    "created_int" : {
                        "$lte" : 1585627013.0
                    }
                }, 
                {
                    "created_int" : {
                        "$gte" : 1583035013.0
                    }
                }, 
                {
                    "$text" : {
                        "$search" : "phamthanhlam17_gmail_com",
                        "$language" : "english",
                        "$caseSensitive" : false,
                        "$diacriticSensitive" : false
                    }
                }
            ]
        },
        "winningPlan" : {
            "stage" : "COUNT",
            "inputStage" : {
                "stage" : "TEXT",
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "inputStage" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {}
                            }
                        }
                    }
                }
            }
        },
        "rejectedPlans" : []
    },
    "executionStats" : {
        "executionSuccess" : true,
        "nReturned" : 0,
        "executionTimeMillis" : 1499057,
        "totalKeysExamined" : 72544123,
        "totalDocsExamined" : 39448083,
        "executionStages" : {
            "stage" : "COUNT",
            "nReturned" : 0,
            "executionTimeMillisEstimate" : 1483861,
            "works" : 72544124,
            "advanced" : 0,
            "needTime" : 72544123,
            "needYield" : 0,
            "saveState" : 578233,
            "restoreState" : 578233,
            "isEOF" : 1,
            "invalidates" : 0,
            "nCounted" : 39448083,
            "nSkipped" : 0,
            "inputStage" : {
                "stage" : "TEXT",
                "nReturned" : 39448083,
                "executionTimeMillisEstimate" : 1475831,
                "works" : 72544124,
                "advanced" : 39448083,
                "needTime" : 33096040,
                "needYield" : 0,
                "saveState" : 578233,
                "restoreState" : 578233,
                "isEOF" : 1,
                "invalidates" : 0,
                "indexPrefix" : {},
                "indexName" : "email_text_created_int_-1",
                "parsedTextQuery" : {
                    "terms" : [ 
                        "phamthanhlam17_gmail_com"
                    ],
                    "negatedTerms" : [],
                    "phrases" : [],
                    "negatedPhrases" : []
                },
                "textIndexVersion" : 3,
                "inputStage" : {
                    "stage" : "TEXT_MATCH",
                    "nReturned" : 39448083,
                    "executionTimeMillisEstimate" : 1473041,
                    "works" : 72544124,
                    "advanced" : 39448083,
                    "needTime" : 33096040,
                    "needYield" : 0,
                    "saveState" : 578233,
                    "restoreState" : 578233,
                    "isEOF" : 1,
                    "invalidates" : 0,
                    "docsRejected" : 0,
                    "inputStage" : {
                        "stage" : "FETCH",
                        "nReturned" : 39448083,
                        "executionTimeMillisEstimate" : 1465951,
                        "works" : 72544124,
                        "advanced" : 39448083,
                        "needTime" : 33096040,
                        "needYield" : 0,
                        "saveState" : 578233,
                        "restoreState" : 578233,
                        "isEOF" : 1,
                        "invalidates" : 0,
                        "docsExamined" : 39448083,
                        "alreadyHasObj" : 0,
                        "inputStage" : {
                            "stage" : "OR",
                            "filter" : {
                                "$and" : [ 
                                    {
                                        "created_int" : {
                                            "$lte" : 1585627013.0
                                        }
                                    }, 
                                    {
                                        "created_int" : {
                                            "$gte" : 1583035013.0
                                        }
                                    }
                                ]
                            },
                            "nReturned" : 39448083,
                            "executionTimeMillisEstimate" : 439664,
                            "works" : 72544124,
                            "advanced" : 39448083,
                            "needTime" : 33096040,
                            "needYield" : 0,
                            "saveState" : 578233,
                            "restoreState" : 578233,
                            "isEOF" : 1,
                            "invalidates" : 0,
                            "dupsTested" : 72544123,
                            "dupsDropped" : 0,
                            "recordIdsForgotten" : 0,
                            "inputStage" : {
                                "stage" : "IXSCAN",
                                "nReturned" : 72544123,
                                "executionTimeMillisEstimate" : 291188,
                                "works" : 72544124,
                                "advanced" : 72544123,
                                "needTime" : 0,
                                "needYield" : 0,
                                "saveState" : 578233,
                                "restoreState" : 578233,
                                "isEOF" : 1,
                                "invalidates" : 0,
                                "keyPattern" : {
                                    "_fts" : "text",
                                    "_ftsx" : 1,
                                    "created_int" : -1.0
                                },
                                "indexName" : "email_text_created_int_-1",
                                "isMultiKey" : true,
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "backward",
                                "indexBounds" : {},
                                "keysExamined" : 72544123,
                                "seeks" : 1,
                                "dupsTested" : 72544123,
                                "dupsDropped" : 0,
                                "seenInvalidated" : 0
                            }
                        }
                    }
                }
            }
        }
    },
    "serverInfo" : {
    },
    "ok" : 1.0
}```

So, is the index is cover the query?

Or which index will give me better performance for this problem?

Thank you.
mongodb mongodb-query full-text-search query-optimization compound-index
1个回答
1
投票

好吧,看来你已经创建了复合索引,并使用了 text. 但在MongoDB官方 文件资料,它说。

一个复合索引可以包括一个文本索引键与升序、降序索引键的组合。但是,这些复合索引有以下限制。

  • 复合文本索引不能包括任何其他特殊索引类型,如多键或地理空间索引字段。
  • 如果复合文本索引包括文本索引键前面的键,要执行$文本搜索,查询谓词必须包括前面键的平等匹配条件。(这里使用的是范围查询)
  • 在创建复合文本索引时,所有文本索引键必须在索引规范文档中相邻列出。

所以,这是第一个问题。


接下来,我想让大家看一看以下内容 前缀它将帮助你理解复合索引将如何在你的查询中使用。

希望这能帮助你理解这个问题:)

© www.soinside.com 2019 - 2024. All rights reserved.