在聚合 ElasticSearch 中增强存储桶

问题描述 投票:0回答:1

我有一个 elasticsearch 查询,用于通过文档中名为

category
的文本字段对结果进行分组。我的 _docs 中的另一个键称为整数类型的
id
。现在,有两个要求:(1)我的聚合结果(存储桶响应)必须包含某些“id”所属的存储桶/类别,然后其余的 _docs 可以遵循同一类别/其他类别; (2) 结果必须根据
priority
itemPrioirty
字段进行排序,这两个字段也是整数。这是我的查询,一次返回 3 个类别,每个类别一次返回 10 个 _docs:

{
    "size": 0,
    "sort": [
        {
            "itemPriority": "asc"
        },
        {
            "priority": "asc"
        }
    ],
    "query": {
        "bool": {
            "should": [
                {
                    "terms": {
                        "id": [
                            8061848,
                            8061847,
                            8061846,
                            8061845,
                            8061844,
                            8061843,
                            8061842
                        ]
                    }
                }
            ],
            "must": [
                {
                    "match_all": {}
                },
                {
                    "nested": {
                        "path": "zipData.zipDistribution",
                        "query": {
                            "bool": {
                                "must_not": [
                                    {
                                        "match_phrase_prefix": {
                                            "zipData.zipDistribution.flags.itemStatus": "Removed from catalog"
                                        }
                                    },
                                    {
                                        "match_phrase_prefix": {
                                            "zipData.zipDistribution.flags.itemStatus": "Out of Stock"
                                        }
                                    }
                                ],
                                "must": [
                                    {
                                        "match": {
                                            "zipData.zipDistribution.zip": "55311"
                                        }
                                    }
                                ]
                            }
                        }
                    }
                }
            ]
        }
    },
    "aggs": {
        "categories": {
            "terms": {
                "field": "category",
                "size": 100
            },
            "aggs": {
                "filtered_docs": {
                    "top_hits": {
                        "_source": {
                            "includes": [
                                "id",
                                "name"
                            ]
                        },
                        "from": 0,
                        "size": 10
                    }
                },
                "bucket_sort": {
                    "bucket_sort": {
                        "from": 0,
                        "size": 3
                    }
                }
            }
        }
    }
}

...问题是,这不会按照我在

id
子句中添加的
terms
数组重新排列聚合结果。此外,如果不使用
should
,而是将
term
子句推入
must
中,我只会得到带有这些
id
的存储桶,而不会返回其他
_doc
。 总而言之,我需要一个解决方案,首先使用这些
id
获取类别,然后可以遵循其余的 _docs/类别。而整个数据最后需要根据上面提到的两个字段
priority
itemPriority
进行排序。请帮忙!

elasticsearch elasticsearch-aggregation
1个回答
0
投票

我通过运行时字段满足了您的第一个要求

简化绘图

PUT /categories
{
    "mappings": {
        "properties": {
            "id": {
                "type": "integer"
            },
            "category": {
                "type": "keyword"
            }
        }
    }
}

文件

PUT /categories/_bulk
{"create":{"_id":1}}
{"id": 8001, "category": "1"}
{"create":{"_id":2}}
{"id": 8002, "category": "2"}
{"create":{"_id":3}}
{"id": 8003, "category": "3"}
{"create":{"_id":4}}
{"id": 8004, "category": "2"}
{"create":{"_id":5}}
{"id": 8005, "category": "1"}
{"create":{"_id":6}}
{"id": 8006, "category": "1"}

使用脚本进行聚合查询

GET /categories/_search?filter_path=aggregations
{
    "runtime_mappings": {
        "category_filterable": {
            "type": "keyword",
            "script": {
                "source": """
                    long longDocumentId = doc['id'].value;
                    Integer documentId = Integer.valueOf((int) longDocumentId);
                    List certainIds = params.certain_ids;
                    String documentCategory = doc['category'].value;
                    
                    if (certainIds.contains(documentId)) {
                        emit(documentCategory);
                    } else {
                        String categoryNameFormat = params.other_category_name_format;
                        def[] args = [documentCategory].toArray();
                        emit(String.format(categoryNameFormat, args));
                    }
                """,
                "params": {
                    "certain_ids": [
                        8001,
                        8002,
                        8004,
                        8005
                    ],
                    "other_category_name_format": "%s_other_doc"
                }
            }
        }
    },
    "aggs": {
        "per_category": {
            "terms": {
                "field": "category_filterable"
            }
        }
    }
}

回应

{
    "aggregations" : {
        "per_category" : {
            "doc_count_error_upper_bound" : 0,
            "sum_other_doc_count" : 0,
            "buckets" : [
                {
                    "key" : "1",
                    "doc_count" : 2
                },
                {
                    "key" : "2",
                    "doc_count" : 2
                },
                {
                    "key" : "1_other_doc",
                    "doc_count" : 1
                },
                {
                    "key" : "3_other_doc",
                    "doc_count" : 1
                }
            ]
        }
    }
}
© www.soinside.com 2019 - 2024. All rights reserved.