Skip to content

Commit

Permalink
Synchronized CollapseTopFieldDocs with lucenes relatives (#23854)
Browse files Browse the repository at this point in the history
TopDocs et.al. got additional parameters to incrementally reduce
top docs. In order to add incremental reduction `CollapseTopFieldDocs`
needs to have the same properties.
  • Loading branch information
s1monw committed Apr 3, 2017
1 parent fa81e2a commit 7641033
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.util.PriorityQueue;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
Expand All @@ -35,7 +34,7 @@
/**
* Represents hits returned by {@link CollapsingTopDocsCollector#getTopDocs()}.
*/
public class CollapseTopFieldDocs extends TopFieldDocs {
public final class CollapseTopFieldDocs extends TopFieldDocs {
/** The field used for collapsing **/
public final String field;
/** The collapse value for each top doc */
Expand All @@ -49,30 +48,67 @@ public CollapseTopFieldDocs(String field, int totalHits, ScoreDoc[] scoreDocs,
}

// Refers to one hit:
private static class ShardRef {
private static final class ShardRef {
// Which shard (index into shardHits[]):
final int shardIndex;

// True if we should use the incoming ScoreDoc.shardIndex for sort order
final boolean useScoreDocIndex;

// Which hit within the shard:
int hitIndex;

ShardRef(int shardIndex) {
ShardRef(int shardIndex, boolean useScoreDocIndex) {
this.shardIndex = shardIndex;
this.useScoreDocIndex = useScoreDocIndex;
}

@Override
public String toString() {
return "ShardRef(shardIndex=" + shardIndex + " hitIndex=" + hitIndex + ")";
}
};

int getShardIndex(ScoreDoc scoreDoc) {
if (useScoreDocIndex) {
if (scoreDoc.shardIndex == -1) {
throw new IllegalArgumentException("setShardIndex is false but TopDocs["
+ shardIndex + "].scoreDocs[" + hitIndex + "] is not set");
}
return scoreDoc.shardIndex;
} else {
// NOTE: we don't assert that shardIndex is -1 here, because caller could in fact have set it but asked us to ignore it now
return shardIndex;
}
}
}

/**
* if we need to tie-break since score / sort value are the same we first compare shard index (lower shard wins)
* and then iff shard index is the same we use the hit index.
*/
static boolean tieBreakLessThan(ShardRef first, ScoreDoc firstDoc, ShardRef second, ScoreDoc secondDoc) {
final int firstShardIndex = first.getShardIndex(firstDoc);
final int secondShardIndex = second.getShardIndex(secondDoc);
// Tie break: earlier shard wins
if (firstShardIndex < secondShardIndex) {
return true;
} else if (firstShardIndex > secondShardIndex) {
return false;
} else {
// Tie break in same shard: resolve however the
// shard had resolved it:
assert first.hitIndex != second.hitIndex;
return first.hitIndex < second.hitIndex;
}
}

private static class MergeSortQueue extends PriorityQueue<ShardRef> {
// These are really FieldDoc instances:
final ScoreDoc[][] shardHits;
final FieldComparator<?>[] comparators;
final int[] reverseMul;

MergeSortQueue(Sort sort, CollapseTopFieldDocs[] shardHits) throws IOException {
MergeSortQueue(Sort sort, CollapseTopFieldDocs[] shardHits) {
super(shardHits.length);
this.shardHits = new ScoreDoc[shardHits.length][];
for (int shardIDX = 0; shardIDX < shardHits.length; shardIDX++) {
Expand Down Expand Up @@ -115,18 +151,7 @@ public boolean lessThan(ShardRef first, ShardRef second) {
return cmp < 0;
}
}

// Tie break: earlier shard wins
if (first.shardIndex < second.shardIndex) {
return true;
} else if (first.shardIndex > second.shardIndex) {
return false;
} else {
// Tie break in same shard: resolve however the
// shard had resolved it:
assert first.hitIndex != second.hitIndex;
return first.hitIndex < second.hitIndex;
}
return tieBreakLessThan(first, firstFD, second, secondFD);
}
}

Expand All @@ -135,7 +160,7 @@ public boolean lessThan(ShardRef first, ShardRef second) {
* the provided CollapseTopDocs, sorting by score. Each {@link CollapseTopFieldDocs} instance must be sorted.
**/
public static CollapseTopFieldDocs merge(Sort sort, int start, int size,
CollapseTopFieldDocs[] shardHits) throws IOException {
CollapseTopFieldDocs[] shardHits, boolean setShardIndex) {
String collapseField = shardHits[0].field;
for (int i = 1; i < shardHits.length; i++) {
if (collapseField.equals(shardHits[i].field) == false) {
Expand All @@ -155,7 +180,7 @@ public static CollapseTopFieldDocs merge(Sort sort, int start, int size,
totalHitCount += shard.totalHits;
if (shard.scoreDocs != null && shard.scoreDocs.length > 0) {
availHitCount += shard.scoreDocs.length;
queue.add(new ShardRef(shardIDX));
queue.add(new ShardRef(shardIDX, setShardIndex == false));
maxScore = Math.max(maxScore, shard.getMaxScore());
}
}
Expand Down Expand Up @@ -192,7 +217,9 @@ public static CollapseTopFieldDocs merge(Sort sort, int start, int size,
continue;
}
seen.add(collapseValue);
hit.shardIndex = ref.shardIndex;
if (setShardIndex) {
hit.shardIndex = ref.shardIndex;
}
if (hitUpto >= start) {
hitList.add(hit);
collapseList.add(collapseValue);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ public ScoreDoc[] sortDocs(boolean ignoreFrom, Collection<? extends SearchPhaseR
final CollapseTopFieldDocs[] shardTopDocs = new CollapseTopFieldDocs[numShards];
fillTopDocs(shardTopDocs, results, new CollapseTopFieldDocs(firstTopDocs.field, 0, new FieldDoc[0],
sort.getSort(), new Object[0], Float.NaN));
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs);
mergedTopDocs = CollapseTopFieldDocs.merge(sort, from, topN, shardTopDocs, true);
} else if (result.queryResult().topDocs() instanceof TopFieldDocs) {
TopFieldDocs firstTopDocs = (TopFieldDocs) result.queryResult().topDocs();
final Sort sort = new Sort(firstTopDocs.fields);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ private <T extends Comparable> void assertSearchCollapse(CollapsingDocValuesProd
subSearcher.search(weight, c);
shardHits[shardIDX] = c.getTopDocs();
}
CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits);
CollapseTopFieldDocs mergedFieldDocs = CollapseTopFieldDocs.merge(sort, 0, expectedNumGroups, shardHits, true);
assertTopDocsEquals(mergedFieldDocs, collapseTopFieldDocs);
w.close();
reader.close();
Expand Down

0 comments on commit 7641033

Please sign in to comment.