From 4ff7144b7405d625788c53c504746c645f8d9fa9 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 23 Jan 2017 16:16:15 -0500 Subject: [PATCH] Add tests for top_hits aggregation Add unit tests for `TopHitsAggregator` and convert some snippets in docs for `top_hits` aggregation to `// CONSOLE`. Relates to #22278 Relates to #18160 --- .../resources/checkstyle_suppressions.xml | 1 - .../metrics/tophits/TopHitsAggregator.java | 15 +- .../aggregations/AggregatorTestCase.java | 18 +- .../tophits/TopHitsAggregatorTests.java | 137 +++++++++---- .../metrics/tophits-aggregation.asciidoc | 183 ++++++++++-------- 5 files changed, 225 insertions(+), 129 deletions(-) diff --git a/buildSrc/src/main/resources/checkstyle_suppressions.xml b/buildSrc/src/main/resources/checkstyle_suppressions.xml index 61e9a205d3d2d..9c477c59c6a9e 100644 --- a/buildSrc/src/main/resources/checkstyle_suppressions.xml +++ b/buildSrc/src/main/resources/checkstyle_suppressions.xml @@ -500,7 +500,6 @@ - diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java index d3647f454b220..92041747730a5 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregator.java @@ -122,7 +122,13 @@ public void collect(int docId, long bucket) throws IOException { // In the QueryPhase we don't need this protection, because it is build into the IndexSearcher, // but here we create collectors ourselves and we need prevent OOM because of crazy an offset and size. topN = Math.min(topN, subSearchContext.searcher().getIndexReader().maxDoc()); - TopDocsCollector topLevelCollector = sort != null ? TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), subSearchContext.trackScores()) : TopScoreDocCollector.create(topN); + TopDocsCollector topLevelCollector; + if (sort == null) { + topLevelCollector = TopScoreDocCollector.create(topN); + } else { + topLevelCollector = TopFieldCollector.create(sort.sort, topN, true, subSearchContext.trackScores(), + subSearchContext.trackScores()); + } collectors = new TopDocsAndLeafCollector(topLevelCollector); collectors.leafCollector = collectors.topLevelCollector.getLeafCollector(ctx); collectors.leafCollector.setScorer(scorer); @@ -170,8 +176,8 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) { searchHitFields.sortValues(fieldDoc.fields, subSearchContext.sort().formats); } } - topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), pipelineAggregators(), - metaData()); + topHits = new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, fetchResult.hits(), + pipelineAggregators(), metaData()); } return topHits; } @@ -184,7 +190,8 @@ public InternalTopHits buildEmptyAggregation() { } else { topDocs = Lucene.EMPTY_TOP_DOCS; } - return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), pipelineAggregators(), metaData()); + return new InternalTopHits(name, subSearchContext.from(), subSearchContext.size(), topDocs, InternalSearchHits.empty(), + pipelineAggregators(), metaData()); } @Override diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java index 76031df629808..af6def4aaa557 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java @@ -94,6 +94,7 @@ public boolean shouldCache(Query query) throws IOException { CircuitBreakerService circuitBreakerService = new NoneCircuitBreakerService(); SearchContext searchContext = mock(SearchContext.class); + when(searchContext.numberOfShards()).thenReturn(1); when(searchContext.searcher()).thenReturn(contextIndexSearcher); when(searchContext.bigArrays()).thenReturn(new MockBigArrays(Settings.EMPTY, circuitBreakerService)); when(searchContext.fetchPhase()) @@ -163,13 +164,17 @@ protected A searchAndReduc List aggs = new ArrayList<> (); Query rewritten = searcher.rewrite(query); Weight weight = searcher.createWeight(rewritten, true); - try (C root = createAggregator(builder, searcher, fieldTypes)) { + C root = createAggregator(builder, searcher, fieldTypes); + try { for (ShardSearcher subSearcher : subSearchers) { - try (C a = createAggregator(builder, subSearcher, fieldTypes)) { + C a = createAggregator(builder, subSearcher, fieldTypes); + try { a.preCollection(); subSearcher.search(weight, a); a.postCollection(); aggs.add(a.buildAggregation(0L)); + } finally { + closeAgg(a); } } if (aggs.isEmpty()) { @@ -179,6 +184,15 @@ protected A searchAndReduc A internalAgg = (A) aggs.get(0).doReduce(aggs, new InternalAggregation.ReduceContext(root.context().bigArrays(), null)); return internalAgg; } + } finally { + closeAgg(root); + } + } + + private void closeAgg(Aggregator agg) { + agg.close(); + for (Aggregator sub : ((AggregatorBase) agg).subAggregators) { + closeAgg(sub); } } diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java index b151469f62dbb..da8ba39bfafb2 100644 --- a/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java +++ b/core/src/test/java/org/elasticsearch/search/aggregations/metrics/tophits/TopHitsAggregatorTests.java @@ -18,14 +18,18 @@ */ package org.elasticsearch.search.aggregations.metrics.tophits; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.KeywordFieldMapper; @@ -33,55 +37,114 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.UidFieldMapper; import org.elasticsearch.search.SearchHits; +import org.elasticsearch.search.aggregations.Aggregation; +import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregatorTestCase; +import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.sort.SortOrder; +import java.io.IOException; + +import static org.elasticsearch.search.aggregations.AggregationBuilders.terms; +import static org.elasticsearch.search.aggregations.AggregationBuilders.topHits; + public class TopHitsAggregatorTests extends AggregatorTestCase { + public void testTopLevel() throws Exception { + Aggregation result; + if (randomBoolean()) { + result = testCase(new MatchAllDocsQuery(), topHits("_name").sort("string", SortOrder.DESC)); + } else { + Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1"); + result = testCase(query, topHits("_name")); + } + SearchHits searchHits = ((TopHits) result).getHits(); + assertEquals(3L, searchHits.getTotalHits()); + assertEquals("3", searchHits.getAt(0).getId()); + assertEquals("type", searchHits.getAt(0).getType()); + assertEquals("2", searchHits.getAt(1).getId()); + assertEquals("type", searchHits.getAt(1).getType()); + assertEquals("1", searchHits.getAt(2).getId()); + assertEquals("type", searchHits.getAt(2).getType()); + } + + public void testNoResults() throws Exception { + TopHits result = (TopHits) testCase(new MatchNoDocsQuery(), topHits("_name").sort("string", SortOrder.DESC)); + SearchHits searchHits = ((TopHits) result).getHits(); + assertEquals(0L, searchHits.getTotalHits()); + } + + /** + * Tests {@code top_hits} inside of {@code terms}. While not strictly a unit test this is a fairly common way to run {@code top_hits} + * and serves as a good example of running {@code top_hits} inside of another aggregation. + */ + public void testInsideTerms() throws Exception { + Aggregation result; + if (randomBoolean()) { + result = testCase(new MatchAllDocsQuery(), + terms("term").field("string") + .subAggregation(topHits("top").sort("string", SortOrder.DESC))); + } else { + Query query = new QueryParser("string", new KeywordAnalyzer()).parse("d^1000 c^100 b^10 a^1"); + result = testCase(query, + terms("term").field("string") + .subAggregation(topHits("top"))); + } + Terms terms = (Terms) result; + + // The "a" bucket + TopHits hits = (TopHits) terms.getBucketByKey("a").getAggregations().get("top"); + SearchHits searchHits = (hits).getHits(); + assertEquals(2L, searchHits.getTotalHits()); + assertEquals("2", searchHits.getAt(0).getId()); + assertEquals("1", searchHits.getAt(1).getId()); + + // The "b" bucket + searchHits = ((TopHits) terms.getBucketByKey("b").getAggregations().get("top")).getHits(); + assertEquals(2L, searchHits.getTotalHits()); + assertEquals("3", searchHits.getAt(0).getId()); + assertEquals("1", searchHits.getAt(1).getId()); - public void testTermsAggregator() throws Exception { + // The "c" bucket + searchHits = ((TopHits) terms.getBucketByKey("c").getAggregations().get("top")).getHits(); + assertEquals(1L, searchHits.getTotalHits()); + assertEquals("2", searchHits.getAt(0).getId()); + + // The "d" bucket + searchHits = ((TopHits) terms.getBucketByKey("d").getAggregations().get("top")).getHits(); + assertEquals(1L, searchHits.getTotalHits()); + assertEquals("3", searchHits.getAt(0).getId()); + } + + private static final MappedFieldType STRING_FIELD_TYPE = new KeywordFieldMapper.KeywordFieldType(); + static { + STRING_FIELD_TYPE.setName("string"); + STRING_FIELD_TYPE.setHasDocValues(true); + } + + private Aggregation testCase(Query query, AggregationBuilder builder) throws IOException { Directory directory = newDirectory(); - RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory); - Document document = new Document(); - document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "1"), UidFieldMapper.Defaults.FIELD_TYPE)); - document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); - indexWriter.addDocument(document); - document = new Document(); - document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "2"), UidFieldMapper.Defaults.FIELD_TYPE)); - document.add(new SortedSetDocValuesField("string", new BytesRef("c"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("a"))); - indexWriter.addDocument(document); - document = new Document(); - document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", "3"), UidFieldMapper.Defaults.FIELD_TYPE)); - document.add(new SortedSetDocValuesField("string", new BytesRef("b"))); - document.add(new SortedSetDocValuesField("string", new BytesRef("d"))); - indexWriter.addDocument(document); - indexWriter.close(); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory); + iw.addDocument(document("1", "a", "b")); + iw.addDocument(document("2", "c", "a")); + iw.addDocument(document("3", "b", "d")); + iw.close(); IndexReader indexReader = DirectoryReader.open(directory); IndexSearcher indexSearcher = newSearcher(indexReader, true, true); - MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType(); - fieldType.setName("string"); - fieldType.setHasDocValues(true ); - TopHitsAggregationBuilder aggregationBuilder = new TopHitsAggregationBuilder("_name"); - aggregationBuilder.sort("string", SortOrder.DESC); - try (TopHitsAggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType)){ - aggregator.preCollection(); - indexSearcher.search(new MatchAllDocsQuery(), aggregator); - aggregator.postCollection(); - TopHits topHits = (TopHits) aggregator.buildAggregation(0L); - SearchHits searchHits = topHits.getHits(); - assertEquals(3L, searchHits.getTotalHits()); - assertEquals("3", searchHits.getAt(0).getId()); - assertEquals("type", searchHits.getAt(0).getType()); - assertEquals("2", searchHits.getAt(1).getId()); - assertEquals("type", searchHits.getAt(1).getType()); - assertEquals("1", searchHits.getAt(2).getId()); - assertEquals("type", searchHits.getAt(2).getType()); - } + Aggregation result = searchAndReduce(indexSearcher, query, builder, STRING_FIELD_TYPE); indexReader.close(); directory.close(); + return result; } + private Document document(String id, String... stringValues) { + Document document = new Document(); + document.add(new Field(UidFieldMapper.NAME, Uid.createUid("type", id), UidFieldMapper.Defaults.FIELD_TYPE)); + for (String stringValue : stringValues) { + document.add(new Field("string", stringValue, STRING_FIELD_TYPE)); + document.add(new SortedSetDocValuesField("string", new BytesRef(stringValue))); + } + return document; + } } diff --git a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc index 20312a7de64be..6935354c0acee 100644 --- a/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/tophits-aggregation.asciidoc @@ -33,27 +33,26 @@ only the title field is being included in the source. [source,js] -------------------------------------------------- +POST /sales/_search?size=0 { "aggs": { - "top-tags": { + "top_tags": { "terms": { - "field": "tags", + "field": "type", "size": 3 }, "aggs": { - "top_tag_hits": { + "top_sales_hits": { "top_hits": { "sort": [ { - "last_activity_date": { + "date": { "order": "desc" } } ], "_source": { - "includes": [ - "title" - ] + "includes": [ "date", "price" ] }, "size" : 1 } @@ -63,90 +62,105 @@ only the title field is being included in the source. } } -------------------------------------------------- +// CONSOLE +// TEST[setup:sales] -Possible response snippet: +Possible response: [source,js] -------------------------------------------------- -"aggregations": { - "top-tags": { - "buckets": [ - { - "key": "windows-7", - "doc_count": 25365, - "top_tags_hits": { - "hits": { - "total": 25365, - "max_score": 1, - "hits": [ - { - "_index": "stack", - "_type": "question", - "_id": "602679", - "_score": 1, - "_source": { - "title": "Windows port opening" - }, - "sort": [ - 1370143231177 - ] - } - ] - } - } - }, - { - "key": "linux", - "doc_count": 18342, - "top_tags_hits": { - "hits": { - "total": 18342, - "max_score": 1, - "hits": [ - { - "_index": "stack", - "_type": "question", - "_id": "602672", - "_score": 1, - "_source": { - "title": "Ubuntu RFID Screensaver lock-unlock" - }, - "sort": [ - 1370143379747 - ] - } - ] - } - } - }, - { - "key": "windows", - "doc_count": 18119, - "top_tags_hits": { - "hits": { - "total": 18119, - "max_score": 1, - "hits": [ - { - "_index": "stack", - "_type": "question", - "_id": "602678", - "_score": 1, - "_source": { - "title": "If I change my computers date / time, what could be affected?" - }, - "sort": [ - 1370142868283 - ] - } - ] - } - } - } - ] +{ + ... + "aggregations": { + "top_tags": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "hat", + "doc_count": 3, + "top_sales_hits": { + "hits": { + "total": 3, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_type": "sale", + "_id": "AVnNBmauCQpcRyxw6ChK", + "_source": { + "date": "2015/03/01 00:00:00", + "price": 200 + }, + "sort": [ + 1425168000000 + ], + "_score": null + } + ] + } + } + }, + { + "key": "t-shirt", + "doc_count": 3, + "top_sales_hits": { + "hits": { + "total": 3, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_type": "sale", + "_id": "AVnNBmauCQpcRyxw6ChL", + "_source": { + "date": "2015/03/01 00:00:00", + "price": 175 + }, + "sort": [ + 1425168000000 + ], + "_score": null + } + ] + } + } + }, + { + "key": "bag", + "doc_count": 1, + "top_sales_hits": { + "hits": { + "total": 1, + "max_score": null, + "hits": [ + { + "_index": "sales", + "_type": "sale", + "_id": "AVnNBmatCQpcRyxw6ChH", + "_source": { + "date": "2015/01/01 00:00:00", + "price": 150 + }, + "sort": [ + 1420070400000 + ], + "_score": null + } + ] + } + } + } + ] + } } } -------------------------------------------------- +// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] +// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChK/$body.aggregations.top_tags.buckets.0.top_sales_hits.hits.hits.0._id/] +// TESTRESPONSE[s/AVnNBmauCQpcRyxw6ChL/$body.aggregations.top_tags.buckets.1.top_sales_hits.hits.hits.0._id/] +// TESTRESPONSE[s/AVnNBmatCQpcRyxw6ChH/$body.aggregations.top_tags.buckets.2.top_sales_hits.hits.hits.0._id/] + ==== Field collapse example @@ -184,7 +198,6 @@ relevancy order of the most relevant document in a bucket. "top_hit" : { "max": { "script": { - "lang": "painless", "inline": "_score" } }