在Elasticsearch的单个索引中提供这样的数据集:
entityId |创建|状态--------- + ------------ + -----------1 | 2000/01/01 |草案1 | 2001/01/02 |已批准2 | 2000/01/01 |草案2 | 2000/01/02 |已批准2 | 2001/01/03 |拒绝3 | 2000/01/01 |草案3 | 2001/01/03 |已批准
我只想过滤已批准最新状态的实体。
因此,我一直在尝试使用聚合和子聚合,并且设法设法使所有实体都只包含最新状态,如下所示:
{
"size": 0,
"aggs": {
"newest-event-query": {
"terms": {
"field": "entityId"
},
"aggs": {
"newest-event": {
"top_hits": {
"size": 1,
"sort": [
{
"created": {
"order": "desc"
}
}
]
}
}
}
}
}
}
哪个应该给出这样的结果:
entityId |创建|状态--------- + ------------ + -----------1 | 2001/01/02 |已批准2 | 2001/01/02 |拒绝3 | 2001/01/03 |已批准
但是我想进一步过滤该结果,使其仅包括批准的记录(1、3),然后最终能够查询该结果。
向top_hits aggs添加额外的aggs似乎不起作用:
{
"size": 0,
"aggs": {
"newest-event-query": {
"terms": {
"field": "entityId"
},
"aggs": {
"newest-event": {
"top_hits": {
"size": 1,
"sort": [
{
"created": {
"order": "desc"
}
}
],
"aggs": {
"approved-only": {
"filter": {
"term": {
"status": "approved"
}
}
}
}
}
}
}
}
}
}
结果:
"error": "SearchPhaseExecutionException[Failed to execute phase [query], all shards failed; shardFailures {[gupa9nwpQWmGa3JqFmF2NA][creations][0]: SearchParseException[[creations][0]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][0]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][0]: SearchParseException[[events][0]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][0]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][1]: SearchParseException[[creations][1]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][1]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][1]: SearchParseException[[events][1]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][1]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][2]: SearchParseException[[creations][2]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][2]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][2]: SearchParseException[[events][2]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][2]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][3]: SearchParseException[[creations][3]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][3]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][3]: SearchParseException[[events][3]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][3]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][creations][4]: SearchParseException[[creations][4]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[creations][4]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }{[gupa9nwpQWmGa3JqFmF2NA][events][4]: SearchParseException[[events][4]: from[-1],size[0]: Parse Failure [Failed to parse source [{"size":0,"aggs":{"newest-event-query":{"terms":{"field":"entityId"},"aggs":{"newest-event":{"top_hits":{"size":1,"sort":[{"created":{"order":"desc"}}],"aggs":{"aproved-only":{"filter":{"term":{"status":"approved"}}}}}}}}}}]]]; nested: SearchParseException[[events][4]: from[-1],size[0]: Parse Failure [Unknown key for a START_OBJECT in [newest-event]: [aggs].]]; }]",
"status": 400
感谢任何帮助。
编辑:对已批准的过滤将不起作用,因为事件可能会从已批准转移回另一状态。我总是需要按最新状态进行过滤。这项工作的重点是创建一个不变的数据结构-单个实体可以经历很多阶段,但是我们总是应该只查询最新的实体。
Edit 2:为了找到一个解决方案,我还查看了父子结构,并且在关闭时仍然有一些限制,例如has_parent或has_child需要具有固定的“ id”。另一个明显而高效的解决方案是在写入时简单标记最新项目-例如。一个布尔值,但是我想要原子性并在一个文档上重置该布尔值并将其设置在新文档上不是原子操作。
我已经使用了terms aggregation和bucket selector aggreation。在一个术语下,我使用创建日期字段上的最大聚合创建了一个最新条目的存储桶,还创建了一个状态被批准的创建日期的存储桶。”使用存储桶选择器,我保留了最新日期和最新批准日期相同的术语] >
Entity: 1 --> using terms aggregation "Latest created date":"2001-01-02" --> using max aggregation "Latest approved doc": --> using filter aggregation "Latest approved date":"2000-01-01" --> Using max aggregation "Bucket where Latest created date==Latest approved doc>Latest approved date" --> using bucket selector aggregation
映射
{ "index90" : { "mappings" : { "properties" : { "created" : { "type" : "date", "format" : "[yyyy-MM-dd]" }, "entityId" : { "type" : "integer" }, "status" : { "type" : "text", "fields" : { "keyword" : { "type" : "text" } } } } } } }
数据:
"hits" : [ { "_index" : "index90", "_type" : "_doc", "_id" : "xZsmY3EBdTQt60iNXDQB", "_score" : 1.0, "_source" : { "entityId" : 1, "created" : "2000-01-01", "status" : "draft" } }, { "_index" : "index90", "_type" : "_doc", "_id" : "xpsmY3EBdTQt60iNojQc", "_score" : 1.0, "_source" : { "entityId" : 1, "created" : "2001-01-02", "status" : "approved" } }, { "_index" : "index90", "_type" : "_doc", "_id" : "x5smY3EBdTQt60iN7DQc", "_score" : 1.0, "_source" : { "entityId" : 2, "created" : "2000-01-01", "status" : "draft" } }, { "_index" : "index90", "_type" : "_doc", "_id" : "yJsnY3EBdTQt60iNAzT7", "_score" : 1.0, "_source" : { "entityId" : 2, "created" : "2000-01-02", "status" : "approved" } }, { "_index" : "index90", "_type" : "_doc", "_id" : "yZsnY3EBdTQt60iNIjQY", "_score" : 1.0, "_source" : { "entityId" : 2, "created" : "2000-01-03", "status" : "rejected" } } ]
查询:
{ "aggs": { "entitites": { "terms": { "field": "entityId", "size": 10 }, "aggs": { "latest_entry": { "max": { "field": "created" } }, "latest_approved_entry":{ "filter": { "term": { "status.keyword": "approved" } }, "aggs": { "approved_date": { "max": { "field": "created" } } } }, "select_bucket_with":{ "bucket_selector": { "buckets_path": { "latest_entry":"latest_entry", "latest_approved_entry":"latest_approved_entry>approved_date" }, "script": "if(params['latest_entry']==params['latest_approved_entry']) return true;" } } } } } }
结果:
"aggregations" : {
"entitites" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 2,
"latest_entry" : {
"value" : 9.783936E11,
"value_as_string" : "2001-01-02"
},
"latest_approved_entry" : {
"doc_count" : 1,
"approved_date" : {
"value" : 9.783936E11,
"value_as_string" : "2001-01-02"
}
}
}
]
}
}