diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java index 92995d5fab464..34fada101f81e 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.function.Function; import java.util.stream.Collectors; @@ -99,4 +100,21 @@ protected long getSupersetSize() { protected SignificanceHeuristic getSignificanceHeuristic() { return significanceHeuristic; } + + @Override + protected boolean doEquals(Object obj) { + InternalMappedSignificantTerms that = (InternalMappedSignificantTerms) obj; + return super.doEquals(obj) + && Objects.equals(format, that.format) + && subsetSize == that.subsetSize + && supersetSize == that.supersetSize + && Objects.equals(significanceHeuristic, that.significanceHeuristic) + && Objects.equals(buckets, that.buckets) + && Objects.equals(bucketMap, that.bucketMap); + } + + @Override + protected int doHashCode() { + return Objects.hash(super.doHashCode(), format, subsetSize, supersetSize, significanceHeuristic, buckets, bucketMap); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java index 6fcee8e937e53..6414a5eb8255a 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java @@ -36,6 +36,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import static java.util.Collections.unmodifiableList; @@ -127,6 +128,27 @@ public B reduce(List buckets, ReduceContext context) { public double getSignificanceScore() { return score; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + Bucket that = (Bucket) o; + return bucketOrd == that.bucketOrd && + Double.compare(that.score, score) == 0 && + Objects.equals(aggregations, that.aggregations) && + Objects.equals(format, that.format); + } + + @Override + public int hashCode() { + return Objects.hash(getClass(), bucketOrd, aggregations, score, format); + } } protected final int requiredSize; @@ -226,4 +248,16 @@ public InternalAggregation doReduce(List aggregations, Redu protected abstract long getSupersetSize(); protected abstract SignificanceHeuristic getSignificanceHeuristic(); + + @Override + protected int doHashCode() { + return Objects.hash(minDocCount, requiredSize); + } + + @Override + protected boolean doEquals(Object obj) { + InternalSignificantTerms that = (InternalSignificantTerms) obj; + return Objects.equals(minDocCount, that.minDocCount) + && Objects.equals(requiredSize, that.requiredSize); + } } diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java index ae33cc87308b8..ff2ea5b045b05 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; /** * Result of the running the significant terms aggregation on a numeric field. @@ -109,6 +110,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); return builder; } + + @Override + public boolean equals(Object obj) { + return super.equals(obj) && Objects.equals(term, ((Bucket) obj).term); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), term); + } } public SignificantLongTerms(String name, int requiredSize, long minDocCount, List pipelineAggregators, diff --git a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java index e73da337a0fc0..b2c1f9ce1b047 100644 --- a/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java +++ b/core/src/main/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java @@ -30,6 +30,7 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Objects; /** * Result of the running the significant terms aggregation on a String field. @@ -111,6 +112,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); return builder; } + + @Override + public boolean equals(Object obj) { + return super.equals(obj) && Objects.equals(termBytes, ((SignificantStringTerms.Bucket) obj).termBytes); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), termBytes); + } } public SignificantStringTerms(String name, int requiredSize, long minDocCount, List pipelineAggregators, diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java new file mode 100644 index 0000000000000..dde1562e1afb8 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java @@ -0,0 +1,68 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.significant; + +import org.elasticsearch.search.aggregations.InternalAggregationTestCase; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public abstract class InternalSignificantTermsTestCase extends InternalAggregationTestCase> { + + @Override + protected InternalSignificantTerms createUnmappedInstance(String name, + List pipelineAggregators, + Map metaData) { + InternalSignificantTerms testInstance = createTestInstance(name, pipelineAggregators, metaData); + return new UnmappedSignificantTerms(name, testInstance.requiredSize, testInstance.minDocCount, pipelineAggregators, metaData); + } + + @Override + protected void assertReduced(InternalSignificantTerms reduced, List> inputs) { + assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSubsetSize).sum(), reduced.getSubsetSize()); + assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSupersetSize).sum(), reduced.getSupersetSize()); + + List> counts = Arrays.asList( + SignificantTerms.Bucket::getSubsetDf, + SignificantTerms.Bucket::getSupersetDf, + SignificantTerms.Bucket::getDocCount + ); + + for (Function count : counts) { + Map reducedCounts = toCounts(reduced.getBuckets().stream(), count); + Map totalCounts = toCounts(inputs.stream().map(SignificantTerms::getBuckets).flatMap(List::stream), count); + + Map expectedReducedCounts = new HashMap<>(totalCounts); + expectedReducedCounts.keySet().retainAll(reducedCounts.keySet()); + assertEquals(expectedReducedCounts, reducedCounts); + } + } + + private static Map toCounts(Stream buckets, + Function fn) { + return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum)); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java new file mode 100644 index 0000000000000..7e80cf61608e5 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java @@ -0,0 +1,91 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.significant; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY; + +public class SignificantLongTermsTests extends InternalSignificantTermsTestCase { + + private SignificanceHeuristic significanceHeuristic; + + @Before + public void setUpSignificanceHeuristic() { + significanceHeuristic = randomSignificanceHeuristic(); + } + + @Override + protected InternalSignificantTerms createTestInstance(String name, + List pipelineAggregators, + Map metaData) { + DocValueFormat format = DocValueFormat.RAW; + int requiredSize = randomIntBetween(1, 5); + int shardSize = requiredSize + 2; + final int numBuckets = randomInt(shardSize); + + long globalSubsetSize = 0; + long globalSupersetSize = 0; + + List buckets = new ArrayList<>(numBuckets); + Set terms = new HashSet<>(); + for (int i = 0; i < numBuckets; ++i) { + long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong); + + int subsetDf = randomIntBetween(1, 10); + int supersetDf = randomIntBetween(subsetDf, 20); + int supersetSize = randomIntBetween(supersetDf, 30); + + globalSubsetSize += subsetDf; + globalSupersetSize += supersetSize; + + buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, EMPTY, format)); + } + return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize, + globalSupersetSize, significanceHeuristic, buckets); + } + + @Override + protected Writeable.Reader> instanceReader() { + return SignificantLongTerms::new; + } + + private static SignificanceHeuristic randomSignificanceHeuristic() { + return randomFrom( + new JLHScore(), + new MutualInformation(randomBoolean(), randomBoolean()), + new GND(randomBoolean()), + new ChiSquare(randomBoolean(), randomBoolean())); + } +} diff --git a/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java new file mode 100644 index 0000000000000..f957836b3e4c0 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java @@ -0,0 +1,92 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.search.aggregations.bucket.significant; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.search.DocValueFormat; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation; +import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY; + +public class SignificantStringTermsTests extends InternalSignificantTermsTestCase { + + private SignificanceHeuristic significanceHeuristic; + + @Before + public void setUpSignificanceHeuristic() { + significanceHeuristic = randomSignificanceHeuristic(); + } + + @Override + protected InternalSignificantTerms createTestInstance(String name, + List pipelineAggregators, + Map metaData) { + DocValueFormat format = DocValueFormat.RAW; + int requiredSize = randomIntBetween(1, 5); + int shardSize = requiredSize + 2; + final int numBuckets = randomInt(shardSize); + + long globalSubsetSize = 0; + long globalSupersetSize = 0; + + List buckets = new ArrayList<>(numBuckets); + Set terms = new HashSet<>(); + for (int i = 0; i < numBuckets; ++i) { + BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10))); + + int subsetDf = randomIntBetween(1, 10); + int supersetDf = randomIntBetween(subsetDf, 20); + int supersetSize = randomIntBetween(supersetDf, 30); + + globalSubsetSize += subsetDf; + globalSupersetSize += supersetSize; + + buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format)); + } + return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize, + globalSupersetSize, significanceHeuristic, buckets); + } + + @Override + protected Writeable.Reader> instanceReader() { + return SignificantStringTerms::new; + } + + private static SignificanceHeuristic randomSignificanceHeuristic() { + return randomFrom( + new JLHScore(), + new MutualInformation(randomBoolean(), randomBoolean()), + new GND(randomBoolean()), + new ChiSquare(randomBoolean(), randomBoolean())); + } +}