From 76410335a4e4a224daca4f165ec15b64fbc844d4 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 3 Apr 2017 09:50:44 +0200 Subject: [PATCH] Synchronized CollapseTopFieldDocs with lucenes relatives (#23854) TopDocs et.al. got additional parameters to incrementally reduce top docs. In order to add incremental reduction `CollapseTopFieldDocs` needs to have the same properties. --- .../search/grouping/CollapseTopFieldDocs.java | 69 +++++++++++++------ .../action/search/SearchPhaseController.java | 2 +- .../CollapsingTopDocsCollectorTests.java | 2 +- 3 files changed, 50 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/org/apache/lucene/search/grouping/CollapseTopFieldDocs.java b/core/src/main/java/org/apache/lucene/search/grouping/CollapseTopFieldDocs.java index 169a89edbcf63..b4d3c82343957 100644 --- a/core/src/main/java/org/apache/lucene/search/grouping/CollapseTopFieldDocs.java +++ b/core/src/main/java/org/apache/lucene/search/grouping/CollapseTopFieldDocs.java @@ -26,7 +26,6 @@ import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.util.PriorityQueue; -import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -35,7 +34,7 @@ /** * Represents hits returned by {@link CollapsingTopDocsCollector#getTopDocs()}. */ -public class CollapseTopFieldDocs extends TopFieldDocs { +public final class CollapseTopFieldDocs extends TopFieldDocs { /** The field used for collapsing **/ public final String field; /** The collapse value for each top doc */ @@ -49,22 +48,59 @@ public CollapseTopFieldDocs(String field, int totalHits, ScoreDoc[] scoreDocs, } // Refers to one hit: - private static class ShardRef { + private static final class ShardRef { // Which shard (index into shardHits[]): final int shardIndex; + // True if we should use the incoming ScoreDoc.shardIndex for sort order + final boolean useScoreDocIndex; + // Which hit within the shard: int hitIndex; - ShardRef(int shardIndex) { + ShardRef(int shardIndex, boolean useScoreDocIndex) { this.shardIndex = shardIndex; + this.useScoreDocIndex = useScoreDocIndex; } @Override public String toString() { return "ShardRef(shardIndex=" + shardIndex + " hitIndex=" + hitIndex + ")"; } - }; + + int getShardIndex(ScoreDoc scoreDoc) { + if (useScoreDocIndex) { + if (scoreDoc.shardIndex == -1) { + throw new IllegalArgumentException("setShardIndex is false but TopDocs[" + + shardIndex + "].scoreDocs[" + hitIndex + "] is not set"); + } + return scoreDoc.shardIndex; + } else { + // NOTE: we don't assert that shardIndex is -1 here, because caller could in fact have set it but asked us to ignore it now + return shardIndex; + } + } + } + + /** + * if we need to tie-break since score / sort value are the same we first compare shard index (lower shard wins) + * and then iff shard index is the same we use the hit index. + */ + static boolean tieBreakLessThan(ShardRef first, ScoreDoc firstDoc, ShardRef second, ScoreDoc secondDoc) { + final int firstShardIndex = first.getShardIndex(firstDoc); + final int secondShardIndex = second.getShardIndex(secondDoc); + // Tie break: earlier shard wins + if (firstShardIndex < secondShardIndex) { + return true; + } else if (firstShardIndex > secondShardIndex) { + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } private static class MergeSortQueue extends PriorityQueue { // These are really FieldDoc instances: @@ -72,7 +108,7 @@ private static class MergeSortQueue extends PriorityQueue { final FieldComparator[] comparators; final int[] reverseMul; - MergeSortQueue(Sort sort, CollapseTopFieldDocs[] shardHits) throws IOException { + MergeSortQueue(Sort sort, CollapseTopFieldDocs[] shardHits) { super(shardHits.length); this.shardHits = new ScoreDoc[shardHits.length][]; for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) { @@ -115,18 +151,7 @@ public boolean lessThan(ShardRef first, ShardRef second) { return cmp < 0; } } - - // Tie break: earlier shard wins - if (first.shardIndex < second.shardIndex) { - return true; - } else if (first.shardIndex > second.shardIndex) { - return false; - } else { - // Tie break in same shard: resolve however the - // shard had resolved it: - assert first.hitIndex != second.hitIndex; - return first.hitIndex < second.hitIndex; - } + return tieBreakLessThan(first, firstFD, second, secondFD); } } @@ -135,7 +160,7 @@ public boolean lessThan(ShardRef first, ShardRef second) { * the provided CollapseTopDocs, sorting by score. Each {@link CollapseTopFieldDocs} instance must be sorted. **/ public static CollapseTopFieldDocs merge(Sort sort, int start, int size, - CollapseTopFieldDocs[] shardHits) throws IOException { + CollapseTopFieldDocs[] shardHits, boolean setShardIndex) { String collapseField = shardHits[0].field; for (int i = 1; i < shardHits.length; i++) { if (collapseField.equals(shardHits[i].field) == false) { @@ -155,7 +180,7 @@ public static CollapseTopFieldDocs merge(Sort sort, int start, int size, totalHitCount += shard.totalHits; if (shard.scoreDocs != null && shard.scoreDocs.length > 0) { availHitCount += shard.scoreDocs.length; - queue.add(new ShardRef(shardIDX)); + queue.add(new ShardRef(shardIDX, setShardIndex == false)); maxScore = Math.max(maxScore, shard.getMaxScore()); } } @@ -192,7 +217,9 @@ public static CollapseTopFieldDocs merge(Sort sort, int start, int size, continue; } seen.add(collapseValue); - hit.shardIndex = ref.shardIndex; + if (setShardIndex) { + hit.shardIndex = ref.shardIndex; + } if (hitUpto >= start) { hitList.add(hit); collapseList.add(collapseValue); diff --git a/core/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java b/core/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java index f3f9313af3d38..810530b5507e9 100644 --- a/core/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java +++ b/core/src/main/java/org/elasticsearch/action/search/SearchPhaseController.java @@ -234,7 +234,7 @@ public ScoreDoc[] sortDocs(boolean ignoreFrom, Collection void assertSearchCollapse(CollapsingDocValuesProd subSearcher.search(weight, c); shardHits[shardIDX] = c.getTopDocs(); } - CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits); + CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits, true); assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs); w.close(); reader.close();