我有一个包含对象数组的文档,该数组具有带有逗号分隔标签的属性。其他文件可以有相同的玩具。
[
{
"documentId": 1,
"toys": [
{
"id": "A",
"tags": "foo, bar, baz"
},
{
"id": "B",
"tags": "test"
},
{
"id": "C",
"tags": "bar"
}
]
},
{
"documentId": 2,
"toys": [
{
"id": "A",
"tags": "foo, bar, baz"
},
{
"id": "B",
"tags": "test"
}
]
}
]
我需要基于用户搜索(在标签中)的独特玩具对象列表。这是我想出的,但这对于整个索引来说是独一无二的。不完全是我需要的。
GET my-index/_search
{
"size": "0",
"query": {
"nested": {
"path": "toys",
"query": {
"bool": {
"must": [
{
"match": {
"toys.tags.ngram": {
"analyzer": "standard",
"query": "bar",
"operator": "AND"
}
}
}
],
"should": [
{
"match_phrase": {
"toys.tags": {
"query": "bar"
}
}
}
]
}
}
}
},
"aggs": {
"top_matches": {
"nested": {
"path": "toys"
},
"aggs": {
"top-result": {
"top_hits": {
"size": 1
}
}
}
}
}
}
我希望根据“bar”的搜索得到 A 和 C。有什么想法吗?
您可以使用父级和同级
terms
聚合。查询文本位于父级 include
聚合的 terms
参数中。搜索查询结果可能有重复,但聚合查询结果不会有重复
映射
PUT /my-index
{
"mappings": {
"properties": {
"toys": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"tags": {
"type": "text",
"fielddata": true
}
}
}
}
}
}
tags
字段必须具有带有fielddata
的true
参数。它需要terms
聚合
您的文件
PUT /my-index/_bulk
{"create":{"_id":1}}
{"documentId":1,"toys":[{"id":"A","tags":"foo, bar, baz"},{"id":"B","tags":"test"},{"id":"C","tags":"bar"}]}
{"create":{"_id":2}}
{"documentId":2,"toys":[{"id":"A","tags":"foo, bar, baz"},{"id":"B","tags":"test"}]}
聚合查询
GET /my-index/_search?filter_path=aggregations.inside_toys.by_tag
{
"aggs": {
"inside_toys": {
"nested": {
"path": "toys"
},
"aggs": {
"by_tag": {
"terms": {
"field": "toys.tags",
"include": "bar",
"size": 10
},
"aggs": {
"by_id": {
"terms": {
"field": "toys.id",
"size": 10
}
}
}
}
}
}
}
}
回应
{
"aggregations" : {
"inside_toys" : {
"by_tag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bar",
"doc_count" : 3,
"by_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "A",
"doc_count" : 2
},
{
"key" : "C",
"doc_count" : 1
}
]
}
}
]
}
}
}
}