我正在尝试为给定的过滤器计算集合中的文档。使用
countDocuments
最多使用1s per 100'000 documents
,这有点慢并且没有考虑如果多个用户每隔几秒到几分钟触发一次此计数对数据库的影响。
引擎盖下
countDocuments()
翻译成这个aggregate
查询:
db.collection.aggregate([
{
"$match": {
"_id": {"$exists": true}
}
},
{
"$group": {
"_id": 1,
"n": {"$sum": 1}
}
}
])
调用
explain()
会产生:
{
explainVersion: '1',
stages: [
{
'$cursor': {
queryPlanner: {
namespace: 'collection',
indexFilterSet: false,
parsedQuery: { _id: { '$exists': true } },
queryHash: 'BA029CD5',
planCacheKey: '4D66BB31',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'FETCH',
filter: { _id: { '$exists': true } },
inputStage: {
stage: 'IXSCAN',
keyPattern: { _id: 1 },
indexName: '_id_',
isMultiKey: false,
multiKeyPaths: { _id: [] },
isUnique: true,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { _id: [ '[MinKey, MaxKey]' ] }
}
},
rejectedPlans: []
},
executionStats: {
executionSuccess: true,
nReturned: 614833,
executionTimeMillis: 6589,
totalKeysExamined: 614833,
totalDocsExamined: 614833,
executionStages: {
stage: 'FETCH',
filter: { _id: { '$exists': true } },
nReturned: 614833,
executionTimeMillisEstimate: 4219,
works: 614834,
advanced: 614833,
needTime: 0,
needYield: 0,
saveState: 668,
restoreState: 668,
isEOF: 1,
docsExamined: 614833,
alreadyHasObj: 0,
inputStage: {
stage: 'IXSCAN',
nReturned: 614833,
executionTimeMillisEstimate: 568,
works: 614834,
advanced: 614833,
needTime: 0,
needYield: 0,
saveState: 668,
restoreState: 668,
isEOF: 1,
keyPattern: { _id: 1 },
indexName: '_id_',
isMultiKey: false,
multiKeyPaths: { _id: [] },
isUnique: true,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: { _id: [ '[MinKey, MaxKey]' ] },
keysExamined: 614833,
seeks: 1,
dupsTested: 0,
dupsDropped: 0
}
},
allPlansExecution: []
}
},
nReturned: Long("614833"),
executionTimeMillisEstimate: Long("6510")
},
{
'$group': { _id: { '$const': 1 }, n: { '$sum': { '$const': 1 } } },
maxAccumulatorMemoryUsageBytes: { n: Long("80") },
totalOutputDataSizeBytes: Long("237"),
usedDisk: false,
spills: Long("0"),
nReturned: Long("1"),
executionTimeMillisEstimate: Long("6584")
}
],
serverInfo: {
host: '04daded1988b',
port: 27017,
version: '6.0.2',
gitVersion: '94fb7dfc8b974f1f5343e7ea394d0d9deedba50e'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
command: {
aggregate: 'collection',
pipeline: [
{ '$match': { _id: { '$exists': true } } },
{ '$group': { _id: 1, n: { '$sum': 1 } } }
],
cursor: {},
'$db': 'ivdataCache'
},
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1677593996, i: 1 }),
signature: {
hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
keyId: Long("0")
}
},
operationTime: Timestamp({ t: 1677593996, i: 1 })
}
注意获胜策略是如何由两个阶段组成的:首先是
FETCH
,然后是IXSCAN
使用
find()
的查询不包括 _id
字段,而是投影到索引字段(称为 origin
)将只去一个 IXSCAN
代替:
db.collection.find({}, {"_id":0, "origin": 1}).count()
会计算正确的结果,也可以使用
filter
。 .explain()
将产生:
{
explainVersion: '1',
queryPlanner: {
namespace: 'db.collection',
indexFilterSet: false,
parsedQuery: { origin: { '$eq': 'WF' } },
queryHash: '2428EDD1',
planCacheKey: '85C94249',
maxIndexedOrSolutionsReached: false,
maxIndexedAndSolutionsReached: false,
maxScansToExplodeReached: false,
winningPlan: {
stage: 'PROJECTION_COVERED',
transformBy: { _id: 0, origin: 1 },
inputStage: {
stage: 'IXSCAN',
keyPattern: { origin: 1, 'metadata.iv': -1 },
indexName: 'originAscending',
isMultiKey: false,
multiKeyPaths: { origin: [], 'metadata.iv': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
origin: [ '["WF", "WF"]' ],
'metadata.invoiceNumberPadded': [ '[MaxKey, MinKey]' ]
}
}
},
rejectedPlans: [
{
stage: 'PROJECTION_COVERED',
transformBy: { _id: 0, origin: 1 },
inputStage: {
stage: 'IXSCAN',
keyPattern: { origin: -1, 'metadata.iv': -1 },
indexName: 'originDescending',
isMultiKey: false,
multiKeyPaths: { origin: [], 'metadata.iv': [] },
isUnique: false,
isSparse: false,
isPartial: false,
indexVersion: 2,
direction: 'forward',
indexBounds: {
origin: [ '["WF", "WF"]' ],
'metadata.invoiceNumberPadded': [ '[MaxKey, MinKey]' ]
}
}
}
]
},
command: {
find: 'collection',
filter: { origin: 'WF' },
projection: { _id: 0, origin: 1 },
'$db': 'ivdataCache'
},
serverInfo: {
host: '04daded1988b',
port: 27017,
version: '6.0.2',
gitVersion: '94fb7dfc8b974f1f5343e7ea394d0d9deedba50e'
},
serverParameters: {
internalQueryFacetBufferSizeBytes: 104857600,
internalQueryFacetMaxOutputDocSizeBytes: 104857600,
internalLookupStageIntermediateDocumentMaxSizeBytes: 104857600,
internalDocumentSourceGroupMaxMemoryBytes: 104857600,
internalQueryMaxBlockingSortMemoryUsageBytes: 104857600,
internalQueryProhibitBlockingMergeOnMongoS: 0,
internalQueryMaxAddToSetBytes: 104857600,
internalDocumentSourceSetWindowFieldsMaxMemoryBytes: 104857600
},
ok: 1,
'$clusterTime': {
clusterTime: Timestamp({ t: 1677593816, i: 1 }),
signature: {
hash: Binary(Buffer.from("0000000000000000000000000000000000000000", "hex"), 0),
keyId: Long("0")
}
},
operationTime: Timestamp({ t: 1677593816, i: 1 })
}
这只会使用一个
IXSCAN
阶段并且执行得更快。
我尝试在 MongoDB Java 驱动程序版本 4.8.0 中构建此查询,方法是编写以下内容(仍在进行中):
private int aggregate(InvoiceQuery query) {
var filter = new MongoInvoiceFilterCriteria(query.getFilter()).asBson();
var projection = Projections.fields(Projections.excludeId(), Projections.include("origin"));
var findPublisher =
this.collection.find(filter).projection(projection).explain();
var result = Flowable.fromPublisher(findPublisher).blockingFirst();
LOGGER.info("Aggregate " + result);
return 0;
}
这将产生以下结果(强调我的):
{
{
explainVersion = 1, queryPlanner = Document {
{
namespace = db.collection, indexFilterSet = false, parsedQuery = Document {
{
metadata.responsible.displayNameLowerCase = Document {
{
$regex = ^ \Q % user_name % \E
}
}
}
}, queryHash = AA3D6B5D, planCacheKey = 8569 EF40, maxIndexedOrSolutionsReached = false, maxIndexedAndSolutionsReached = false, maxScansToExplodeReached = false, winningPlan = Document {
{
stage = PROJECTION_SIMPLE, transformBy = Document {
{
_id = 0, origin = 1
}
}, inputStage = Document {
{
stage = FETCH, inputStage = Document {
{
stage = IXSCAN, keyPattern = Document {
{
metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
}
}, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
{
metadata.responsible.displayNameLowerCase = [], metadata.iv = []
}
}, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
{
metadata.responsible.displayNameLowerCase = [
["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
], metadata.iv = [
[MaxKey, MinKey]
]
}
}
}
}
}
}
}
}, rejectedPlans = [Document {
{
stage = PROJECTION_SIMPLE, transformBy = Document {
{
_id = 0, origin = 1
}
}, inputStage = Document {
{
stage = FETCH, inputStage = Document {
{
stage = IXSCAN, keyPattern = Document {
{
metadata.responsible.displayNameLowerCase = -1, metadata.iv = -1
}
}, indexName = responsibleDisplayNameLowerCaseDescending, isMultiKey = false, multiKeyPaths = Document {
{
metadata.responsible.displayNameLowerCase = [], metadata.iv = []
}
}, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
{
metadata.responsible.displayNameLowerCase = [
[/^\Q%user_name%\E/, /^\Q%user_name%\E/], ("%user_name%", "%user_name%"]
], metadata.iv = [
[MaxKey, MinKey]
]
}
}
}
}
}
}
}
}]
}
}, executionStats = Document {
{
executionSuccess = true, nReturned = 0, executionTimeMillis = 0, totalKeysExamined = 1, totalDocsExamined = 0, executionStages = Document {
{
stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 3, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, transformBy = Document {
{
_id = 0, origin = 1
}
}, inputStage = Document {
{
stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 3, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
{
stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, keyPattern = Document {
{
metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
}
}, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
{
metadata.responsible.displayNameLowerCase = [], metadata.iv = []
}
}, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
{
metadata.responsible.displayNameLowerCase = [
["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
], metadata.iv = [
[MaxKey, MinKey]
]
}
}, keysExamined = 1, seeks = 2, dupsTested = 0, dupsDropped = 0
}
}
}
}
}
}, allPlansExecution = [Document {
{
nReturned = 0, executionTimeMillisEstimate = 0, totalKeysExamined = 1, totalDocsExamined = 0, score = 2.0002, executionStages = Document {
{
stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, transformBy = Document {
{
_id = 0, origin = 1
}
}, inputStage = Document {
{
stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
{
stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 0, restoreState = 0, isEOF = 1, keyPattern = Document {
{
metadata.responsible.displayNameLowerCase = 1, metadata.iv = -1
}
}, indexName = responsibleDisplayNameLowerCaseAscending, isMultiKey = false, multiKeyPaths = Document {
{
metadata.responsible.displayNameLowerCase = [], metadata.iv = []
}
}, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
{
metadata.responsible.displayNameLowerCase = [
["%user_name%", "%user_name%"), [/^\Q%user_name%\E/, /^\Q%user_name%\E/]
], metadata.iv = [
[MaxKey, MinKey]
]
}
}, keysExamined = 1, seeks = 2, dupsTested = 0, dupsDropped = 0
}
}
}
}
}
}
}
}, Document {
{
nReturned = 0, executionTimeMillisEstimate = 0, totalKeysExamined = 2, totalDocsExamined = 0, score = 2.0002, executionStages = Document {
{
stage = PROJECTION_SIMPLE, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, transformBy = Document {
{
_id = 0, origin = 1
}
}, inputStage = Document {
{
stage = FETCH, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, docsExamined = 0, alreadyHasObj = 0, inputStage = Document {
{
stage = IXSCAN, nReturned = 0, executionTimeMillisEstimate = 0, works = 2, advanced = 0, needTime = 1, needYield = 0, saveState = 1, restoreState = 0, isEOF = 1, keyPattern = Document {
{
metadata.responsible.displayNameLowerCase = -1, metadata.iv = -1
}
}, indexName = responsibleDisplayNameLowerCaseDescending, isMultiKey = false, multiKeyPaths = Document {
{
metadata.responsible.displayNameLowerCase = [], metadata.iv = []
}
}, isUnique = false, isSparse = false, isPartial = false, indexVersion = 2, direction = forward, indexBounds = Document {
{
metadata.responsible.displayNameLowerCase = [
[/^\Q%user_name%\E/, /^\Q%user_name%\E/], ("%user_name%", "%user_name%"]
], metadata.iv = [
[MaxKey, MinKey]
]
}
}, keysExamined = 2, seeks = 2, dupsTested = 0, dupsDropped = 0
}
}
}
}
}
}
}
}]
}
}, command = Document {
{
find = collection, filter = Document {
{
$or = [Document {
{
metadata.responsible.displayNameLowerCase = BsonRegularExpression {
pattern = '^\Q%user_name%\E', options = ''
}
}
}]
}
}, projection = Document {
{
_id = 0, origin = 1
}
}, $db = db
}
}, serverInfo = Document {
{
host = brunb513, port = 27147, version = 6.0 .4, gitVersion = 44 ff59461c1353638a71e710f385a566bcd2f547
}
}, serverParameters = Document {
{
internalQueryFacetBufferSizeBytes = 104857600, internalQueryFacetMaxOutputDocSizeBytes = 104857600, internalLookupStageIntermediateDocumentMaxSizeBytes = 104857600, internalDocumentSourceGroupMaxMemoryBytes = 104857600, internalQueryMaxBlockingSortMemoryUsageBytes = 104857600, internalQueryProhibitBlockingMergeOnMongoS = 0, internalQueryMaxAddToSetBytes = 104857600, internalDocumentSourceSetWindowFieldsMaxMemoryBytes = 104857600
}
}, ok = 1.0, $clusterTime = Document {
{
clusterTime = Timestamp {
value = 7205862903914692609, seconds = 1677745698, inc = 1
}, signature = Document {
{
hash = org.bson.types.Binary @c98f581, keyId = 0
}
}
}
}, operationTime = Timestamp {
value = 7205862903914692609, seconds = 1677745698, inc = 1
}
}
}
因此,出于某种原因,mongo 再次执行了一个
FETCH
和一个IXSCAN
阶段,尽管根据我的理解,查询应该执行相同的操作。
我考虑过使用
documentCount()
、estimatedDocumentCount()
、不同的 aggregates()
和 find()
查询。但我无法直接在 mongosh 上找到任何与 db.collection.find({}, {"_id":0, "origin": 1}).count()
一样高效的查询。
如何提高 java 文档计数的性能?
编辑@Noel 提供的答案:
// build the aggregation pipeline
List<Bson> pipeline = Arrays.asList(
Aggregates.match(Filters.gte("_id", new MinKey())),
Aggregates.group("$1", Accumulators.sum("n", 1)));
// explain the execution stats of the aggregation pipeline
var findPublisher =
this.cachedFormalizedInvoices.aggregate(pipeline).explain();
还有一个
FETCH
第一步:
{
{
explainVersion=2,
"queryPlanner=Document"{
{
"namespace=db.collection",
"indexFilterSet=false",
"parsedQuery=Document"{
{
"_id=Document"{
{
"$gte=MinKey"
}
}
}
},
queryHash=D1046F5E,
planCacheKey=7E518BFB,
"optimizedPipeline=true",
"maxIndexedOrSolutionsReached=false",
"maxIndexedAndSolutionsReached=false",
"maxScansToExplodeReached=false",
"winningPlan=Document"{
{
"queryPlan=Document"{
{
"stage=GROUP",
planNodeId=3,
"inputStage=Document"{
{
"stage=FETCH",
planNodeId=2,
"inputStage=Document"{
{
"stage=IXSCAN",
planNodeId=1,
"keyPattern=Document"{
{
_id=1
}
},
"indexName=_id_",
"isMultiKey=false",
"multiKeyPaths=Document"{
{
"_id="[
]
}
},
"isUnique=true",
"isSparse=false",
"isPartial=false",
indexVersion=2,
"direction=forward",
"indexBounds=Document"{
{
"_id="[
[
"MinKey",
"MaxKey"
]
]
}
}
}
}
}
}
}
}...
}
我认为您应该将 COUNT_SCAN 作为获胜计划的目标。 此查询使用 _id 上的索引来计数。
db.collection.explain("executionStats").aggregate([
{
$match: {
"_id": {"$gte": MinKey}
}
},
{
$group: {
"_id": 1, "n": {"$sum": 1}
}
}
]);
注意:这可能不会在分片集合上触发 COUNT_SCAN。
统计:
"winningPlan" : {
"stage" : "COUNT_SCAN",
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : []
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"_id" : { "$minKey" : 1 }
},
"startKeyInclusive" : true,
"endKey" : {
"_id" : { "$maxKey" : 1 }
},
"endKeyInclusive" : true
}
}
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 9,
"executionTimeMillis" : 0,
"totalKeysExamined" : 10,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "COUNT_SCAN",
"nReturned" : 9,
"executionTimeMillisEstimate" : 0,
"works" : 10,
"advanced" : 9,
"needTime" : 0,
"needYield" : 0,
"saveState" : 1,
"restoreState" : 1,
"isEOF" : 1,
"keysExamined" : 10,
"keyPattern" : {
"_id" : 1
},
"indexName" : "_id_",
"isMultiKey" : false,
"multiKeyPaths" : {
"_id" : []
},
"isUnique" : true,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"indexBounds" : {
"startKey" : {
"_id" : { "$minKey" : 1 }
},
"startKeyInclusive" : true,
"endKey" : {
"_id" : { "$maxKey" : 1 }
},
"endKeyInclusive" : true
}
}