Skip to content

Commit

Permalink
Backport: Support hits to log count (#805)
Browse files Browse the repository at this point in the history
* Support hits to log count

* Updated PR with last changes
  • Loading branch information
fragosoluana authored Jan 7, 2025
1 parent 321d299 commit beddcad
Show file tree
Hide file tree
Showing 7 changed files with 362 additions and 41 deletions.
3 changes: 3 additions & 0 deletions clientlib/src/main/proto/yelp/nrtsearch/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,9 @@ message LoggingHits {
string name = 1;
//Optional logging parameters
google.protobuf.Struct params = 2;
// number of hits to log. The number of final hits to be logged can be less than this number
// if a query has less hits.
int32 hitsToLog = 3;
}

// Specify how to highlight matched text in SearchRequest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,27 @@ public SearchResponse handle(IndexState indexState, SearchRequest searchRequest)

long t0 = System.nanoTime();

hits = getHitsFromOffset(hits, searchContext.getStartHit(), searchContext.getTopHits());
// hits to be logged also need to have their fields fetched
hits =
getHitsFromOffset(
hits,
searchContext.getStartHit(),
Math.max(
searchContext.getTopHits(),
searchContext.getHitsToLog() + searchContext.getStartHit()));

// create Hit.Builder for each hit, and populate with lucene doc id and ranking info
setResponseHits(searchContext, hits);

// fill Hit.Builder with requested fields
fetchFields(searchContext);

// if there were extra hits for the logging, the response size needs to be reduced to match
// the topHits
if (searchContext.getFetchTasks().getHitsLoggerFetchTask() != null) {
setResponseTopHits(searchContext);
}

SearchState.Builder searchState = SearchState.newBuilder();
searchContext.getResponseBuilder().setSearchState(searchState);
searchState.setTimestamp(searchContext.getTimestampSec());
Expand Down Expand Up @@ -444,17 +457,17 @@ private void fetchFields(SearchContext searchContext)

/**
* Given all the top documents, produce a slice of the documents starting from a start offset and
* going up to the query needed maximum hits. There may be more top docs than the topHits limit,
* going up to the query needed maximum hits. There may be more top docs than the hitsCount limit,
* if top docs sampling facets are used.
*
* @param hits all hits
* @param startHit offset into top docs
* @param topHits maximum number of hits needed for search response
* @param hitsCount maximum number of hits needed for the query
* @return slice of hits starting at given offset, or empty slice if there are less than startHit
* docs
*/
public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int topHits) {
int retrieveHits = Math.min(topHits, hits.scoreDocs.length);
public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int hitsCount) {
int retrieveHits = Math.min(hitsCount, hits.scoreDocs.length);
if (startHit != 0 || retrieveHits != hits.scoreDocs.length) {
// Slice:
int count = Math.max(0, retrieveHits - startHit);
Expand All @@ -467,6 +480,20 @@ public static TopDocs getHitsFromOffset(TopDocs hits, int startHit, int topHits)
return hits;
}

/**
* Reduce response size by removing any extra hits used for logging. Final search response should
* only return top hits.
*
* @param context search context
*/
private static void setResponseTopHits(SearchContext context) {
while (context.getResponseBuilder().getHitsCount()
> context.getTopHits() - context.getStartHit()) {
int hitLastIdx = context.getResponseBuilder().getHitsCount() - 1;
context.getResponseBuilder().removeHits(hitLastIdx);
}
}

/**
* Add {@link com.yelp.nrtsearch.server.grpc.SearchResponse.Hit.Builder}s to the context {@link
* SearchResponse.Builder} for each of the query hits. Populate the builders with the lucene doc
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
public class HitsLoggerFetchTask implements FetchTask {
private static final double TEN_TO_THE_POWER_SIX = Math.pow(10, 6);
private final HitsLogger hitsLogger;
private final int hitsToLog;
private final DoubleAdder timeTakenMs = new DoubleAdder();

public HitsLoggerFetchTask(LoggingHits loggingHits) {
this.hitsLogger = HitsLoggerCreator.getInstance().createHitsLogger(loggingHits);
this.hitsToLog = loggingHits.getHitsToLog();
}

/**
Expand All @@ -46,7 +48,15 @@ public HitsLoggerFetchTask(LoggingHits loggingHits) {
@Override
public void processAllHits(SearchContext searchContext, List<SearchResponse.Hit.Builder> hits) {
long startTime = System.nanoTime();
hitsLogger.log(searchContext, hits);

// hits list can contain extra hits that don't need to be logged, otherwise, pass all hits that
// can be logged
if (searchContext.getHitsToLog() < hits.size()) {
hitsLogger.log(searchContext, hits.subList(0, searchContext.getHitsToLog()));
} else {
hitsLogger.log(searchContext, hits);
}

timeTakenMs.add(((System.nanoTime() - startTime) / TEN_TO_THE_POWER_SIX));
}

Expand All @@ -58,4 +68,13 @@ public void processAllHits(SearchContext searchContext, List<SearchResponse.Hit.
public double getTimeTakenMs() {
return timeTakenMs.doubleValue();
}

/**
* Get the total number of hits to log.
*
* @return Total number of hits to log.
*/
public int getHitsToLog() {
return hitsToLog;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,14 @@ public int getTopHits() {
return topHits;
}

/** Get the number of hits to log */
public int getHitsToLog() {
if (this.getFetchTasks().getHitsLoggerFetchTask() != null) {
return getFetchTasks().getHitsLoggerFetchTask().getHitsToLog();
}
return 0;
}

/**
* Get map of all fields usable for this query. This includes all fields defined in the index and
* dynamic fields from the request.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,12 @@ public DocCollector(
this.indexState = context.getIndexState();
this.additionalCollectors = additionalCollectors;

// determine how many hits to collect based on request, facets and rescore window
// determine how many hits to collect based on request, facets, rescore window and hits to log
int collectHits = request.getTopHits();
if (request.hasLoggingHits()) {
collectHits =
Math.max(collectHits, request.getLoggingHits().getHitsToLog() + request.getStartHit());
}
for (Facet facet : request.getFacetsList()) {
int facetSample = facet.getSampleTopDocs();
if (facetSample > 0 && facetSample > collectHits) {
Expand Down
Loading

0 comments on commit beddcad

Please sign in to comment.