Tests: Add unit test for SignificantLongTerms and SignificantStringT…

…erms (#23428) Relates to #22278
elastic · Mar 2, 2017 · 5a668c4 · 5a668c4
1 parent 1228084
commit 5a668c4
Show file tree

Hide file tree

Showing 7 changed files with 325 additions and 0 deletions.
diff --git a/.../elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java b/.../elasticsearch/search/aggregations/bucket/significant/InternalMappedSignificantTerms.java
@@ -28,6 +28,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 
@@ -99,4 +100,21 @@ protected long getSupersetSize() {
     protected SignificanceHeuristic getSignificanceHeuristic() {
         return significanceHeuristic;
     }
+
+    @Override
+    protected boolean doEquals(Object obj) {
+        InternalMappedSignificantTerms<?, ?> that = (InternalMappedSignificantTerms<?, ?>) obj;
+        return super.doEquals(obj)
+                && Objects.equals(format, that.format)
+                && subsetSize == that.subsetSize
+                && supersetSize == that.supersetSize
+                && Objects.equals(significanceHeuristic, that.significanceHeuristic)
+                && Objects.equals(buckets, that.buckets)
+                && Objects.equals(bucketMap, that.bucketMap);
+    }
+
+    @Override
+    protected int doHashCode() {
+        return Objects.hash(super.doHashCode(), format, subsetSize, supersetSize, significanceHeuristic, buckets, bucketMap);
+    }
 }
diff --git a/...va/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java b/...va/org/elasticsearch/search/aggregations/bucket/significant/InternalSignificantTerms.java
@@ -36,6 +36,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 import static java.util.Collections.unmodifiableList;
 
@@ -127,6 +128,27 @@ public B reduce(List<B> buckets, ReduceContext context) {
         public double getSignificanceScore() {
             return score;
         }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) {
+                return true;
+            }
+            if (o == null || getClass() != o.getClass()) {
+                return false;
+            }
+
+            Bucket<?> that = (Bucket<?>) o;
+            return bucketOrd == that.bucketOrd &&
+                    Double.compare(that.score, score) == 0 &&
+                    Objects.equals(aggregations, that.aggregations) &&
+                    Objects.equals(format, that.format);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(getClass(), bucketOrd, aggregations, score, format);
+        }
     }
 
     protected final int requiredSize;
@@ -226,4 +248,16 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
     protected abstract long getSupersetSize();
 
     protected abstract SignificanceHeuristic getSignificanceHeuristic();
+
+    @Override
+    protected int doHashCode() {
+        return Objects.hash(minDocCount, requiredSize);
+    }
+
+    @Override
+    protected boolean doEquals(Object obj) {
+        InternalSignificantTerms<?, ?> that = (InternalSignificantTerms<?, ?>) obj;
+        return Objects.equals(minDocCount, that.minDocCount)
+                && Objects.equals(requiredSize, that.requiredSize);
+    }
 }
diff --git a/...n/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java b/...n/java/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTerms.java
@@ -29,6 +29,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * Result of the running the significant terms aggregation on a numeric field.
@@ -109,6 +110,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             builder.endObject();
             return builder;
         }
+
+        @Override
+        public boolean equals(Object obj) {
+            return super.equals(obj) && Objects.equals(term, ((Bucket) obj).term);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), term);
+        }
     }
 
     public SignificantLongTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,

diff --git a/...java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java b/...java/org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTerms.java
@@ -30,6 +30,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 
 /**
  * Result of the running the significant terms aggregation on a String field.
@@ -111,6 +112,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
             builder.endObject();
             return builder;
         }
+
+        @Override
+        public boolean equals(Object obj) {
+            return super.equals(obj) && Objects.equals(termBytes, ((SignificantStringTerms.Bucket) obj).termBytes);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), termBytes);
+        }
     }
 
     public SignificantStringTerms(String name, int requiredSize, long minDocCount, List<PipelineAggregator> pipelineAggregators,

diff --git a/...lasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java b/...lasticsearch/search/aggregations/bucket/significant/InternalSignificantTermsTestCase.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.elasticsearch.search.aggregations.InternalAggregationTestCase;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+public abstract class InternalSignificantTermsTestCase extends InternalAggregationTestCase<InternalSignificantTerms<?, ?>> {
+
+    @Override
+    protected InternalSignificantTerms createUnmappedInstance(String name,
+                                                              List<PipelineAggregator> pipelineAggregators,
+                                                              Map<String, Object> metaData) {
+        InternalSignificantTerms<?, ?> testInstance = createTestInstance(name, pipelineAggregators, metaData);
+        return new UnmappedSignificantTerms(name, testInstance.requiredSize, testInstance.minDocCount, pipelineAggregators, metaData);
+    }
+
+    @Override
+    protected void assertReduced(InternalSignificantTerms<?, ?> reduced, List<InternalSignificantTerms<?, ?>> inputs) {
+        assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSubsetSize).sum(), reduced.getSubsetSize());
+        assertEquals(inputs.stream().mapToLong(InternalSignificantTerms::getSupersetSize).sum(), reduced.getSupersetSize());
+
+        List<Function<SignificantTerms.Bucket, Long>> counts = Arrays.asList(
+                SignificantTerms.Bucket::getSubsetDf,
+                SignificantTerms.Bucket::getSupersetDf,
+                SignificantTerms.Bucket::getDocCount
+        );
+
+        for (Function<SignificantTerms.Bucket, Long> count : counts) {
+            Map<Object, Long> reducedCounts = toCounts(reduced.getBuckets().stream(), count);
+            Map<Object, Long> totalCounts = toCounts(inputs.stream().map(SignificantTerms::getBuckets).flatMap(List::stream), count);
+
+            Map<Object, Long> expectedReducedCounts = new HashMap<>(totalCounts);
+            expectedReducedCounts.keySet().retainAll(reducedCounts.keySet());
+            assertEquals(expectedReducedCounts, reducedCounts);
+        }
+    }
+
+    private static Map<Object, Long> toCounts(Stream<? extends SignificantTerms.Bucket> buckets,
+                                              Function<SignificantTerms.Bucket, Long> fn) {
+        return buckets.collect(Collectors.toMap(SignificantTerms.Bucket::getKey, fn, Long::sum));
+    }
+}
diff --git a/...a/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java b/...a/org/elasticsearch/search/aggregations/bucket/significant/SignificantLongTermsTests.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
+
+public class SignificantLongTermsTests extends InternalSignificantTermsTestCase {
+
+    private SignificanceHeuristic significanceHeuristic;
+
+    @Before
+    public void setUpSignificanceHeuristic() {
+        significanceHeuristic = randomSignificanceHeuristic();
+    }
+
+    @Override
+    protected InternalSignificantTerms createTestInstance(String name,
+                                                          List<PipelineAggregator> pipelineAggregators,
+                                                          Map<String, Object> metaData) {
+        DocValueFormat format = DocValueFormat.RAW;
+        int requiredSize = randomIntBetween(1, 5);
+        int shardSize = requiredSize + 2;
+        final int numBuckets = randomInt(shardSize);
+
+        long globalSubsetSize = 0;
+        long globalSupersetSize = 0;
+
+        List<SignificantLongTerms.Bucket> buckets = new ArrayList<>(numBuckets);
+        Set<Long> terms = new HashSet<>();
+        for (int i = 0; i < numBuckets; ++i) {
+            long term = randomValueOtherThanMany(l -> terms.add(l) == false, random()::nextLong);
+
+            int subsetDf = randomIntBetween(1, 10);
+            int supersetDf = randomIntBetween(subsetDf, 20);
+            int supersetSize = randomIntBetween(supersetDf, 30);
+
+            globalSubsetSize += subsetDf;
+            globalSupersetSize += supersetSize;
+
+            buckets.add(new SignificantLongTerms.Bucket(subsetDf, subsetDf, supersetDf, supersetSize, term, EMPTY, format));
+        }
+        return new SignificantLongTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
+                globalSupersetSize, significanceHeuristic, buckets);
+    }
+
+    @Override
+    protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
+        return SignificantLongTerms::new;
+    }
+
+    private static SignificanceHeuristic randomSignificanceHeuristic() {
+        return randomFrom(
+                new JLHScore(),
+                new MutualInformation(randomBoolean(), randomBoolean()),
+                new GND(randomBoolean()),
+                new ChiSquare(randomBoolean(), randomBoolean()));
+    }
+}
diff --git a/...org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java b/...org/elasticsearch/search/aggregations/bucket/significant/SignificantStringTermsTests.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.significant;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.ChiSquare;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.GND;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.JLHScore;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.MutualInformation;
+import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.junit.Before;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import static org.elasticsearch.search.aggregations.InternalAggregations.EMPTY;
+
+public class SignificantStringTermsTests extends InternalSignificantTermsTestCase {
+
+    private SignificanceHeuristic significanceHeuristic;
+
+    @Before
+    public void setUpSignificanceHeuristic() {
+        significanceHeuristic = randomSignificanceHeuristic();
+    }
+
+    @Override
+    protected InternalSignificantTerms createTestInstance(String name,
+                                                          List<PipelineAggregator> pipelineAggregators,
+                                                          Map<String, Object> metaData) {
+        DocValueFormat format = DocValueFormat.RAW;
+        int requiredSize = randomIntBetween(1, 5);
+        int shardSize = requiredSize + 2;
+        final int numBuckets = randomInt(shardSize);
+
+        long globalSubsetSize = 0;
+        long globalSupersetSize = 0;
+
+        List<SignificantStringTerms.Bucket> buckets = new ArrayList<>(numBuckets);
+        Set<BytesRef> terms = new HashSet<>();
+        for (int i = 0; i < numBuckets; ++i) {
+            BytesRef term = randomValueOtherThanMany(b -> terms.add(b) == false, () -> new BytesRef(randomAsciiOfLength(10)));
+
+            int subsetDf = randomIntBetween(1, 10);
+            int supersetDf = randomIntBetween(subsetDf, 20);
+            int supersetSize = randomIntBetween(supersetDf, 30);
+
+            globalSubsetSize += subsetDf;
+            globalSupersetSize += supersetSize;
+
+            buckets.add(new SignificantStringTerms.Bucket(term, subsetDf, subsetDf, supersetDf, supersetSize, EMPTY, format));
+        }
+        return new SignificantStringTerms(name, requiredSize, 1L, pipelineAggregators, metaData, format, globalSubsetSize,
+                globalSupersetSize, significanceHeuristic, buckets);
+    }
+
+    @Override
+    protected Writeable.Reader<InternalSignificantTerms<?, ?>> instanceReader() {
+        return SignificantStringTerms::new;
+    }
+
+    private static SignificanceHeuristic randomSignificanceHeuristic() {
+        return randomFrom(
+                new JLHScore(),
+                new MutualInformation(randomBoolean(), randomBoolean()),
+                new GND(randomBoolean()),
+                new ChiSquare(randomBoolean(), randomBoolean()));
+    }
+}