我希望对重要术语聚合返回的术语进行分组。
需要对此做出重要条款回应的事情:
[
{
"key" : "ok",
"doc_count" : 200,
"score" : 8.583258052060206E-4,
"bg_count" : 213
},
{
"key" : "okay",
"doc_count" : 117,
"score" : 4.814546694690713E-4,
"bg_count" : 126
},
{
"key" : "something else",
"doc_count" : 100,
"score" : 2.3240213379936128E-4,
"bg_count" : 78
}
]
也将其更改为类似这样的
[
{
"grouped_keys" : ["ok","okay"],
"doc_count" : 317,
"score" : 8.583258052060206E-4,
"bg_count" : 339
},
{
"grouped_keys" : ["something else"],
"doc_count" : 100,
"score" : 2.3240213379936128E-4,
"bg_count" : 78
}
]
我还没有真正尝试太多,因为我不知道从哪里开始。我读了一些链接,但我不太确定它有多相关https://discuss.elastic.co/t/group-documents-by-similarity-using-elser/342913/3
我看到的唯一方法是在运行时字段中手动对术语进行分组
文件
PUT /grouped_keys/_bulk
{"create":{"_id":1}}
{"key":"ok","fictive":1}
{"create":{"_id":2}}
{"key":"okay","fictive":1}
{"create":{"_id":3}}
{"key":"okay","fictive":1}
{"create":{"_id":4}}
{"key":"ok","fictive":1}
{"create":{"_id":5}}
{"key":"something else","fictive":1}
{"create":{"_id":6}}
{"key":"ok","fictive":1}
{"create":{"_id":7}}
{"key":"something else","fictive":1}
{"create":{"_id":8}}
{"key":"ok","fictive":2}
{"create":{"_id":9}}
{"key":"something else","fictive":2}
significant_terms
查询
GET /grouped_keys/_search?filter_path=aggregations
{
"runtime_mappings": {
"grouped_key": {
"type": "keyword",
"script": {
"source": """
List groupedKeys = new LinkedList();
groupedKeys.add(['ok', 'okay']);
groupedKeys.add(['something else']);
groupedKeys.add(['default key']);
String key = doc['key.keyword'].value;
for (List groupedKey : groupedKeys) {
int position = Collections.binarySearch(groupedKey, key);
if (position > -1) {
emit(String.join(', ', groupedKey));
return;
}
}
emit(String.join(', ', groupedKeys[groupedKeys.size() - 1]));
"""
}
}
},
"fields": [
"grouped_key"
],
"query": {
"term": {
"fictive": "1"
}
},
"aggs": {
"by_grouped_key": {
"significant_terms": {
"field": "grouped_key"
}
}
}
}
回应
{
"aggregations" : {
"by_grouped_key" : {
"doc_count" : 7,
"bg_count" : 9,
"buckets" : [
{
"key" : "ok, okay",
"doc_count" : 5,
"score" : 0.05102040816326537,
"bg_count" : 6
}
]
}
}
}