仅在没有 FETCH 阶段的情况下计算 Java MongoDB 驱动程序中的文档

Question

我正在尝试为给定的过滤器计算集合中的文档。使用

countDocuments

最多使用

1s per 100'000 documents

，这有点慢并且没有考虑如果多个用户每隔几秒到几分钟触发一次此计数对数据库的影响。

引擎盖下

countDocuments()

翻译成这个

aggregate

查询：

db.collection.aggregate([
  {
    "$match": {
      "_id": {"$exists": true}
    }
  },
  {
    "$group": {
      "_id": 1,
      "n": {"$sum": 1}
    }
  }
])

调用

explain()

会产生：

{
  explainVersion: '1',
  stages: [
    {
      '$cursor': {
        queryPlanner: {
          namespace: 'collection',
          indexFilterSet: false,
          parsedQuery: { _id: { '$exists': true } },
          queryHash: 'BA029CD5',
          planCacheKey: '4D66BB31',
          maxIndexedOrSolutionsReached: false,
          maxIndexedAndSolutionsReached: false,
          maxScansToExplodeReached: false,
          winningPlan: {
            stage: 'FETCH',
            filter: { _id: { '$exists': true } },
            inputStage: {
              stage: 'IXSCAN',
              keyPattern: { _id: 1 },
              indexName: '_id_',
              isMultiKey: false,
              multiKeyPaths: { _id: [] },
              isUnique: true,
              isSparse: false,
              isPartial: false,
              indexVersion: 2,
              direction: 'forward',
              indexBounds: { _id: [ '[MinKey, MaxKey]' ] }
            }
          },
          rejectedPlans: []
        },
        executionStats: {
          executionSuccess: true,
          nReturned: 614833,
          executionTimeMillis: 6589,
          totalKeysExamined: 614833,
          totalDocsExamined: 614833,
          executionStages: {
            stage: 'FETCH',
            filter: { _id: { '$exists': true } },
            nReturned: 614833,
            executionTimeMillisEstimate: 4219,
            works: 614834,
            advanced: 614833,
            needTime: 0,
            needYield: 0,
            saveState: 668,
            restoreState: 668,
            isEOF: 1,
            docsExamined: 614833,
            alreadyHasObj: 0,
            inputStage: {
              stage: 'IXSCAN',
              nReturned: 614833,
              executionTimeMillisEstimate: 568,
              works: 614834,
              advanced: 614833,
              needTime: 0,
              needYield: 0,
              saveState: 668,
              restoreState: 668,
              isEOF: 1,
              keyPattern: { _id: 1 },
              indexName: '_id_',
              isMultiKey: false,
              multiKeyPaths: { _id: [] },
              isUnique: true,
              isSparse: false,
              isPartial: false,
              indexVersion: 2,
              direction: 'forward',
              indexBounds: { _id: [ '[MinKey, MaxKey]' ] },
              keysExamined: 614833,
              seeks: 1,
              dupsTested: 0,
              dupsDropped: 0
            }
          },
          allPlansExecution: []
        }
      },
      nReturned: Long("614833"),
      executionTimeMillisEstimate: Long("6510")
    },
    {
      '$group': { _id: { '$const': 1 }, n: { '$sum': { '$const': 1 } } },
      maxAccumulatorMemoryUsageBytes: { n: Long("80") },
      totalOutputDataSizeBytes: Long("237"),
      usedDisk: false,
      spills: Long("0"),
      nReturned: Long("1"),
      executionTimeMillisEstimate: Long("6584")
    }
  ],
  serverInfo: {
    host: '04daded1988b',
    port: 27017,
    version: '6.0.2',
    gitVersion: '94fb7dfc8b974f1f5343e7ea394d0d9deedba50e'
  },
  serverParameters: {
    internalQueryFacetBufferSizeBytes: 104857600,
    internalQueryFacetMaxOutputDocSizeBytes: 104857600,
    internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
    internalDocumentSourceGroupMaxMemoryBytes: 104857600,
    internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
    internalQueryProhibitBlockingMergeOnMongoS: 0,
    internalQueryMaxAddToSetBytes: 104857600,
    internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
  },
  command: {
    aggregate: 'collection',
    pipeline: [
      { '$match': { _id: { '$exists': true } } },
      { '$group': { _id: 1, n: { '$sum': 1 } } }
    ],
    cursor: {},
    '$db': 'ivdataCache'
  },
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1677593996, i: 1 }),
    signature: {
      hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
      keyId: Long("0")
    }
  },
  operationTime: Timestamp({ t: 1677593996, i: 1 })
}

注意获胜策略是如何由两个阶段组成的：首先是

FETCH

，然后是

IXSCAN

使用

find()

的查询不包括

_id

字段，而是投影到索引字段（称为

origin

）将只去一个

IXSCAN

代替：

db.collection.find({}, {"_id":0, "origin": 1}).count()

会计算正确的结果，也可以使用

filter

。

.explain()

将产生：

{
    explainVersion: '1',
    queryPlanner: {
        namespace: 'db.collection',
        indexFilterSet: false,
        parsedQuery: { origin: { '$eq': 'WF' } },
        queryHash: '2428EDD1',
        planCacheKey: '85C94249',
        maxIndexedOrSolutionsReached: false,
        maxIndexedAndSolutionsReached: false,
        maxScansToExplodeReached: false,
        winningPlan: {
        stage: 'PROJECTION_COVERED',
        transformBy: { _id: 0, origin: 1 },
        inputStage: {
            stage: 'IXSCAN',
            keyPattern: { origin: 1, 'metadata.iv': -1 },
            indexName: 'originAscending',
            isMultiKey: false,
            multiKeyPaths: { origin: [], 'metadata.iv': [] },
            isUnique: false,
            isSparse: false,
            isPartial: false,
            indexVersion: 2,
            direction: 'forward',
            indexBounds: {
            origin: [ '["WF", "WF"]' ],
            'metadata.invoiceNumberPadded': [ '[MaxKey, MinKey]' ]
            }
        }
        },
        rejectedPlans: [
        {
            stage: 'PROJECTION_COVERED',
            transformBy: { _id: 0, origin: 1 },
            inputStage: {
            stage: 'IXSCAN',
            keyPattern: { origin: -1, 'metadata.iv': -1 },
            indexName: 'originDescending',
            isMultiKey: false,
            multiKeyPaths: { origin: [], 'metadata.iv': [] },
            isUnique: false,
            isSparse: false,
            isPartial: false,
            indexVersion: 2,
            direction: 'forward',
            indexBounds: {
                origin: [ '["WF", "WF"]' ],
                'metadata.invoiceNumberPadded': [ '[MaxKey, MinKey]' ]
            }
            }
        }
        ]
    },
    command: {
        find: 'collection',
        filter: { origin: 'WF' },
        projection: { _id: 0, origin: 1 },
        '$db': 'ivdataCache'
    },
    serverInfo: {
        host: '04daded1988b',
        port: 27017,
        version: '6.0.2',
        gitVersion: '94fb7dfc8b974f1f5343e7ea394d0d9deedba50e'
    },
    serverParameters: {
        internalQueryFacetBufferSizeBytes: 104857600,
        internalQueryFacetMaxOutputDocSizeBytes: 104857600,
        internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
        internalDocumentSourceGroupMaxMemoryBytes: 104857600,
        internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
        internalQueryProhibitBlockingMergeOnMongoS: 0,
        internalQueryMaxAddToSetBytes: 104857600,
        internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
    },
    ok: 1,
    '$clusterTime': {
        clusterTime: Timestamp({ t: 1677593816, i: 1 }),
        signature: {
        hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
        keyId: Long("0")
        }
    },
    operationTime: Timestamp({ t: 1677593816, i: 1 })
}

这只会使用一个

IXSCAN

阶段并且执行得更快。

我尝试在 MongoDB Java 驱动程序版本 4.8.0 中构建此查询，方法是编写以下内容（仍在进行中）：

private int aggregate(InvoiceQuery query) {
    var filter = new MongoInvoiceFilterCriteria(query.getFilter()).asBson();

    var projection = Projections.fields(Projections.excludeId(), Projections.include("origin"));

    var findPublisher =
        this.collection.find(filter).projection(projection).explain();

    var result = Flowable.fromPublisher(findPublisher).blockingFirst();

    LOGGER.info("Aggregate " + result);

    return 0;
  }

这将产生以下结果（强调我的）：

{
  {
    explainVersion = 1, queryPlanner = Document {
      {
        namespace = db.collection, indexFilterSet = false, parsedQuery = Document {
          {
            metadata.responsible.displayNameLowerCase = Document {
              {
                $regex = ^ \Q % user_name % \E
              }
            }
          }
        }, queryHash = AA3D6B5D, planCacheKey = 8569 EF40, maxIndexedOrSolutionsReached = false, maxIndexedAndSolutionsReached = false, maxScansToExplodeReached = false, winningPlan = Document {
          {
            stage = PROJECTION_SIMPLE, transformBy = Document {
              {
                _id = 0, origin = 1
              }
            }, inputStage = Document {
              {
                stage = FETCH, inputStage = Document {
                  {
                    stage = IXSCAN, keyPattern = Document {
                      {
                        metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
                      }
                    }, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [], metadata.iv = []
                      }
                    }, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [
                          ["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
                        ], metadata.iv = [
                          [MaxKey, MinKey]
                        ]
                      }
                    }
                  }
                }
              }
            }
          }
        }, rejectedPlans = [Document {
          {
            stage = PROJECTION_SIMPLE, transformBy = Document {
              {
                _id = 0, origin = 1
              }
            }, inputStage = Document {
              {
                stage = FETCH, inputStage = Document {
                  {
                    stage = IXSCAN, keyPattern = Document {
                      {
                        metadata.responsible.displayNameLowerCase = -1, metadata.iv = -1
                      }
                    }, indexName = responsibleDisplayNameLowerCaseDescending, isMultiKey = false, multiKeyPaths = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [], metadata.iv = []
                      }
                    }, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [
                          [/^\Q%user_name%\E/, /^\Q%user_name%\E/], ("%user_name%", "%user_name%"]
                        ], metadata.iv = [
                          [MaxKey, MinKey]
                        ]
                      }
                    }
                  }
                }
              }
            }
          }
        }]
      }
    }, executionStats = Document {
      {
        executionSuccess = true, nReturned = 0, executionTimeMillis = 0, totalKeysExamined = 1, totalDocsExamined = 0, executionStages = Document {
          {
            stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 3, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, transformBy = Document {
              {
                _id = 0, origin = 1
              }
            }, inputStage = Document {
              {
                stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 3, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
                  {
                    stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, keyPattern = Document {
                      {
                        metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
                      }
                    }, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [], metadata.iv = []
                      }
                    }, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
                      {
                        metadata.responsible.displayNameLowerCase = [
                          ["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
                        ], metadata.iv = [
                          [MaxKey, MinKey]
                        ]
                      }
                    }, keysExamined = 1, seeks = 2, dupsTested = 0, dupsDropped = 0
                  }
                }
              }
            }
          }
        }, allPlansExecution = [Document {
          {
            nReturned = 0, executionTimeMillisEstimate = 0, totalKeysExamined = 1, totalDocsExamined = 0, score = 2.0002, executionStages = Document {
              {
                stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, transformBy = Document {
                  {
                    _id = 0, origin = 1
                  }
                }, inputStage = Document {
                  {
                    stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
                      {
                        stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, keyPattern = Document {
                          {
                            metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
                          }
                        }, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
                          {
                            metadata.responsible.displayNameLowerCase = [], metadata.iv = []
                          }
                        }, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
                          {
                            metadata.responsible.displayNameLowerCase = [
                              ["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
                            ], metadata.iv = [
                              [MaxKey, MinKey]
                            ]
                          }
                        }, keysExamined = 1, seeks = 2, dupsTested = 0, dupsDropped = 0
                      }
                    }
                  }
                }
              }
            }
          }
        }, Document {
          {
            nReturned = 0, executionTimeMillisEstimate = 0, totalKeysExamined = 2, totalDocsExamined = 0, score = 2.0002, executionStages = Document {
              {
                stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, transformBy = Document {
                  {
                    _id = 0, origin = 1
                  }
                }, inputStage = Document {
                  {
                    stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
                      {
                        stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, keyPattern = Document {
                          {
                            metadata.responsible.displayNameLowerCase = -1, metadata.iv = -1
                          }
                        }, indexName = responsibleDisplayNameLowerCaseDescending, isMultiKey = false, multiKeyPaths = Document {
                          {
                            metadata.responsible.displayNameLowerCase = [], metadata.iv = []
                          }
                        }, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
                          {
                            metadata.responsible.displayNameLowerCase = [
                              [/^\Q%user_name%\E/, /^\Q%user_name%\E/], ("%user_name%", "%user_name%"]
                            ], metadata.iv = [
                              [MaxKey, MinKey]
                            ]
                          }
                        }, keysExamined = 2, seeks = 2, dupsTested = 0, dupsDropped = 0
                      }
                    }
                  }
                }
              }
            }
          }
        }]
      }
    }, command = Document {
      {
        find = collection, filter = Document {
          {
            $or = [Document {
              {
                metadata.responsible.displayNameLowerCase = BsonRegularExpression {
                  pattern = '^\Q%user_name%\E', options = ''
                }
              }
            }]
          }
        }, projection = Document {
          {
            _id = 0, origin = 1
          }
        }, $db = db
      }
    }, serverInfo = Document {
      {
        host = brunb513, port = 27147, version = 6.0 .4, gitVersion = 44 ff59461c1353638a71e710f385a566bcd2f547
      }
    }, serverParameters = Document {
      {
        internalQueryFacetBufferSizeBytes = 104857600, internalQueryFacetMaxOutputDocSizeBytes = 104857600, internalLookupStageIntermediateDocumentMaxSizeBytes = 104857600, internalDocumentSourceGroupMaxMemoryBytes = 104857600, internalQueryMaxBlockingSortMemoryUsageBytes = 104857600, internalQueryProhibitBlockingMergeOnMongoS = 0, internalQueryMaxAddToSetBytes = 104857600, internalDocumentSourceSetWindowFieldsMaxMemoryBytes = 104857600
      }
    }, ok = 1.0, $clusterTime = Document {
      {
        clusterTime = Timestamp {
          value = 7205862903914692609, seconds = 1677745698, inc = 1
        }, signature = Document {
          {
            hash = org.bson.types.Binary @c98f581, keyId = 0
          }
        }
      }
    }, operationTime = Timestamp {
      value = 7205862903914692609, seconds = 1677745698, inc = 1
    }
  }
}

因此，出于某种原因，mongo 再次执行了一个

FETCH

和一个

IXSCAN

阶段，尽管根据我的理解，查询应该执行相同的操作。

我考虑过使用

documentCount()

、

estimatedDocumentCount()

、不同的

aggregates()

和

find()

查询。但我无法直接在 mongosh 上找到任何与

db.collection.find({}, {"_id":0, "origin": 1}).count()

一样高效的查询。

如何提高 java 文档计数的性能？

编辑@Noel 提供的答案：

// build the aggregation pipeline
List<Bson> pipeline = Arrays.asList(
    Aggregates.match(Filters.gte("_id", new MinKey())),
    Aggregates.group("$1", Accumulators.sum("n", 1)));

// explain the execution stats of the aggregation pipeline
var findPublisher = 
    this.cachedFormalizedInvoices.aggregate(pipeline).explain();

还有一个

FETCH

第一步：

{
   {
      explainVersion=2,
      "queryPlanner=Document"{
         {
            "namespace=db.collection",
            "indexFilterSet=false",
            "parsedQuery=Document"{
               {
                  "_id=Document"{
                     {
                        "$gte=MinKey"
                     }
                  }
               }
            },
            queryHash=D1046F5E,
            planCacheKey=7E518BFB,
            "optimizedPipeline=true",
            "maxIndexedOrSolutionsReached=false",
            "maxIndexedAndSolutionsReached=false",
            "maxScansToExplodeReached=false",
            "winningPlan=Document"{
               {
                  "queryPlan=Document"{
                     {
                        "stage=GROUP",
                        planNodeId=3,
                        "inputStage=Document"{
                           {
                              "stage=FETCH",
                              planNodeId=2,
                              "inputStage=Document"{
                                 {
                                    "stage=IXSCAN",
                                    planNodeId=1,
                                    "keyPattern=Document"{
                                       {
                                          _id=1
                                       }
                                    },
                                    "indexName=_id_",
                                    "isMultiKey=false",
                                    "multiKeyPaths=Document"{
                                       {
                                          "_id="[
                                             
                                          ]
                                       }
                                    },
                                    "isUnique=true",
                                    "isSparse=false",
                                    "isPartial=false",
                                    indexVersion=2,
                                    "direction=forward",
                                    "indexBounds=Document"{
                                       {
                                          "_id="[
                                             [
                                                "MinKey",
                                                "MaxKey"
                                             ]
                                          ]
                                       }
                                    }
                                 }
                              }
                           }
                        }
                     }
                  }...
}

Answer 1

我认为您应该将 COUNT_SCAN 作为获胜计划的目标。此查询使用 _id 上的索引来计数。

db.collection.explain("executionStats").aggregate([
{
    $match: {
        "_id": {"$gte": MinKey}
    }
},
{
    $group: {
        "_id": 1, "n": {"$sum": 1}
    }
}
]);

注意：这可能不会在分片集合上触发 COUNT_SCAN。

统计：

"winningPlan" : {
    "stage" : "COUNT_SCAN",
    "keyPattern" : {
        "_id" : 1
    },
    "indexName" : "_id_",
    "isMultiKey" : false,
    "multiKeyPaths" : {
        "_id" : []
    },
    "isUnique" : true,
    "isSparse" : false,
    "isPartial" : false,
    "indexVersion" : 2,
    "indexBounds" : {
        "startKey" : {
            "_id" : { "$minKey" : 1 }
        },
        "startKeyInclusive" : true,
        "endKey" : {
            "_id" : { "$maxKey" : 1 }
        },
        "endKeyInclusive" : true
    }
}
                
"executionStats" : {
    "executionSuccess" : true,
    "nReturned" : 9,
    "executionTimeMillis" : 0,
    "totalKeysExamined" : 10,
    "totalDocsExamined" : 0,
    "executionStages" : {
        "stage" : "COUNT_SCAN",
        "nReturned" : 9,
        "executionTimeMillisEstimate" : 0,
        "works" : 10,
        "advanced" : 9,
        "needTime" : 0,
        "needYield" : 0,
        "saveState" : 1,
        "restoreState" : 1,
        "isEOF" : 1,
        "keysExamined" : 10,
        "keyPattern" : {
            "_id" : 1
        },
        "indexName" : "_id_",
        "isMultiKey" : false,
        "multiKeyPaths" : {
            "_id" : []
        },
        "isUnique" : true,
        "isSparse" : false,
        "isPartial" : false,
        "indexVersion" : 2,
        "indexBounds" : {
            "startKey" : {
                "_id" : { "$minKey" : 1 }
            },
            "startKeyInclusive" : true,
            "endKey" : {
                "_id" : { "$maxKey" : 1 }
            },
            "endKeyInclusive" : true
        }
    }

仅在没有 FETCH 阶段的情况下计算 Java MongoDB 驱动程序中的文档

问题描述投票：0回答：1

1个回答

最新问题

仅在没有 FETCH 阶段的情况下计算 Java MongoDB 驱动程序中的文档

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1