Skip to content

Commit

Permalink
change into topBoostOnly
Browse files Browse the repository at this point in the history
  • Loading branch information
waziqi89 committed Jan 9, 2025
1 parent fdbbca7 commit 26c0db4
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 128 deletions.
4 changes: 2 additions & 2 deletions clientlib/src/main/proto/yelp/nrtsearch/search.proto
Original file line number Diff line number Diff line change
Expand Up @@ -1132,8 +1132,8 @@ message Highlight {
google.protobuf.UInt32Value boundary_max_scan = 15;
// Locale used in boundary scanner when using "word" or "sentence" boundary_scanner. Examples: "en-US", "ch-ZH".
google.protobuf.StringValue boundary_scanner_locale = 16;
// Only highlight the top matched phrase (with the highest boost value) once per segment. By default, it is false.
google.protobuf.BoolValue top_phrase_once = 17;
// Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false.
google.protobuf.BoolValue top_boost_only = 17;
}

// Highlight settings
Expand Down
4 changes: 2 additions & 2 deletions grpc-gateway/luceneserver.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1766,9 +1766,9 @@
"type": "string",
"description": "Locale used in boundary scanner when using \"word\" or \"sentence\" boundary_scanner. Examples: \"en-US\", \"ch-ZH\"."
},
"topPhraseOnce": {
"topBoostOnly": {
"type": "boolean",
"description": "Only highlight the top matched phrase (with the highest boost value) once per segment. By default, it is false."
"description": "Only highlight the top matched phrases (with the highest boost value) per fragment. By default, it is false."
}
}
},
Expand Down
170 changes: 85 additions & 85 deletions grpc-gateway/search.pb.go

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public class HighlightSettings {
private final Character[] boundaryChars;
private final int boundaryMaxScan;
private final Locale boundaryScannerLocale;
private final boolean topPhraseOnce;
private final boolean topBoostOnly;

public HighlightSettings(
Highlighter highlighter,
Expand All @@ -55,7 +55,7 @@ public HighlightSettings(
Character[] boundaryChars,
int boundaryMaxScan,
Locale boundaryScannerLocale,
boolean topPhraseOnce,
boolean topBoostOnly,
Map<String, Object> customHighlighterParams) {
this.highlighter = highlighter;
this.preTags = preTags;
Expand All @@ -71,7 +71,7 @@ public HighlightSettings(
this.boundaryChars = boundaryChars;
this.boundaryMaxScan = boundaryMaxScan;
this.boundaryScannerLocale = boundaryScannerLocale;
this.topPhraseOnce = topPhraseOnce;
this.topBoostOnly = topBoostOnly;
this.customHighlighterParams = customHighlighterParams;
}

Expand All @@ -91,7 +91,7 @@ public Builder toBuilder() {
.withBoundaryChars(this.boundaryChars)
.withBoundaryMaxScan(this.boundaryMaxScan)
.withBoundaryScannerLocale(this.boundaryScannerLocale)
.withTopPhraseOnce(this.topPhraseOnce)
.withTopBoostOnly(this.topBoostOnly)
.withCustomHighlighterParams(this.customHighlighterParams);
}

Expand Down Expand Up @@ -151,8 +151,8 @@ public Locale getBoundaryScannerLocale() {
return boundaryScannerLocale;
}

public boolean getTopPhraseOnce() {
return topPhraseOnce;
public boolean getTopBoostOnly() {
return topBoostOnly;
}

public Map<String, Object> getCustomHighlighterParams() {
Expand Down Expand Up @@ -194,8 +194,8 @@ public String toString() {
+ boundaryMaxScan
+ ", boundaryScannerLocale="
+ boundaryScannerLocale
+ ", topPhraseOnce="
+ topPhraseOnce
+ ", topBoostOnly="
+ topBoostOnly
+ '}';
}

Expand All @@ -215,7 +215,7 @@ public static final class Builder {
private Character[] boundaryChars;
private int boundaryMaxScan;
private Locale boundaryScannerLocale;
private boolean topPhraseOnce;
private boolean topBoostOnly;
private Map<String, Object> customHighlighterParams;

public Builder() {}
Expand Down Expand Up @@ -290,8 +290,8 @@ public Builder withBoundaryScannerLocale(Locale boundaryScannerLocale) {
return this;
}

public Builder withTopPhraseOnce(boolean topPhraseOnce) {
this.topPhraseOnce = topPhraseOnce;
public Builder withTopBoostOnly(boolean topBoostOnly) {
this.topBoostOnly = topBoostOnly;
return this;
}

Expand All @@ -316,7 +316,7 @@ public HighlightSettings build() {
boundaryChars,
boundaryMaxScan,
boundaryScannerLocale,
topPhraseOnce,
topBoostOnly,
customHighlighterParams);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ static Map<String, HighlightSettings> createPerFieldSettings(
settings.hasBoundaryScannerLocale()
? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue())
: globalSettings.getBoundaryScannerLocale())
.withTopPhraseOnce(
settings.hasTopPhraseOnce()
? settings.getTopPhraseOnce().getValue()
: globalSettings.getTopPhraseOnce())
.withTopBoostOnly(
settings.hasTopBoostOnly()
? settings.getTopBoostOnly().getValue()
: globalSettings.getTopBoostOnly())
.withCustomHighlighterParams(
settings.hasCustomHighlighterParams()
? StructValueTransformer.transformStruct(
Expand Down Expand Up @@ -217,9 +217,9 @@ private static HighlightSettings createGlobalFieldSettings(
settings.hasBoundaryScannerLocale()
? Locale.forLanguageTag(settings.getBoundaryScannerLocale().getValue())
: DEFAULT_BOUNDARY_SCANNER_LOCALE)
.withTopPhraseOnce(
settings.hasTopPhraseOnce()
? settings.getTopPhraseOnce().getValue()
.withTopBoostOnly(
settings.hasTopBoostOnly()
? settings.getTopBoostOnly().getValue()
: DEFAULT_TOP_PHRASE_ONCE)
.withCustomHighlighterParams(
settings.hasCustomHighlighterParams()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,12 @@ public String[] getHighlights(
}

BaseFragmentsBuilder fragmentsBuilder =
new TopPhraseOnceFragmentsBuilderAdaptor(
new TopBoostOnlyFragmentsBuilderAdaptor(
settings.isScoreOrdered()
? new ScoreOrderFragmentsBuilder()
: new SimpleFragmentsBuilder(),
boundaryScanner,
settings.getTopPhraseOnce());
settings.getTopBoostOnly());
fragmentsBuilder.setDiscreteMultiValueHighlighting(settings.getDiscreteMultivalue());

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
*/
package com.yelp.nrtsearch.server.luceneserver.highlights;

import java.util.Comparator;
import java.util.List;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.highlight.Encoder;
Expand All @@ -25,18 +24,18 @@
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;

public class TopPhraseOnceFragmentsBuilderAdaptor extends BaseFragmentsBuilder {
public class TopBoostOnlyFragmentsBuilderAdaptor extends BaseFragmentsBuilder {
private final BaseFragmentsBuilder innerBaseFragmentsBuilder;
private final boolean topPhraseOnce;
private final boolean topBoostOnly;

/** a constructor. */
public TopPhraseOnceFragmentsBuilderAdaptor(
public TopBoostOnlyFragmentsBuilderAdaptor(
BaseFragmentsBuilder baseFragmentsBuilder,
BoundaryScanner boundaryScanner,
boolean topPhraseOnce) {
boolean topBoostOnly) {
super(boundaryScanner);
this.innerBaseFragmentsBuilder = baseFragmentsBuilder;
this.topPhraseOnce = topPhraseOnce;
this.topBoostOnly = topBoostOnly;
}

@Override
Expand All @@ -53,7 +52,7 @@ protected String makeFragment(
String[] preTags,
String[] postTags,
Encoder encoder) {
if (!topPhraseOnce) {
if (!topBoostOnly) {
return super.makeFragment(buffer, index, values, fragInfo, preTags, postTags, encoder);
}
StringBuilder fragment = new StringBuilder();
Expand All @@ -63,11 +62,12 @@ protected String makeFragment(
getFragmentSourceMSO(
buffer, index, values, s, fragInfo.getEndOffset(), modifiedStartOffset);
int srcIndex = 0;
// get a list of the first highest boosted phrase only
List<SubInfo> subInfos =
fragInfo.getSubInfos().stream().max(Comparator.comparingDouble(SubInfo::getBoost)).stream()
.toList();
for (SubInfo subInfo : subInfos) {
double topBoostValue =
fragInfo.getSubInfos().stream().map(SubInfo::getBoost).max(Float::compare).orElse(0f);
for (SubInfo subInfo : fragInfo.getSubInfos()) {
if (subInfo.getBoost() < topBoostValue) {
continue;
}
for (Toffs to : subInfo.getTermsOffsets()) {
fragment
.append(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ protected void initIndex(String name) throws Exception {
"The food is good there, but the service is terrible.",
"I personally don't like the staff at this place.",
"Not all food are good.",
"This pizza place was one of my favorites in downtown, and as promised by other users, the pepperoni pizza is the best pepperoni pizza in town."))
"The margarita pizza and the marinara pizza in this pizzeria are yummy and inexpensive."))
.build())
.putFields(
"boundary_scanner_field",
Expand Down Expand Up @@ -194,7 +194,7 @@ public void testHighlightMultivalueField() {
}

@Test
public void testHighlightMultivalueFieldWithTopPhraseOnly() {
public void testHighlightMultivalueFieldWithTopBoostOnly() {
Highlight highlight =
Highlight.newBuilder()
.addFields("comment_multivalue")
Expand All @@ -212,21 +212,41 @@ public void testHighlightMultivalueFieldWithTopPhraseOnly() {
PhraseQuery.newBuilder()
.setField("comment_multivalue")
.addAllTerms(
List.of("pepperoni", "pizza")))
List.of("margarita", "pizza")))
.setBoost(3))
.setOccurValue(BooleanClause.Occur.SHOULD_VALUE))
.addClauses(
BooleanClause.newBuilder()
.setQuery(
Query.newBuilder()
.setPhraseQuery(
PhraseQuery.newBuilder()
.setField("comment_multivalue")
.addAllTerms(
List.of("marinara", "pizza")))
.setBoost(3))
.setOccurValue(BooleanClause.Occur.SHOULD_VALUE))
.addClauses(
BooleanClause.newBuilder()
.setQuery(
Query.newBuilder()
.setTermQuery(
TermQuery.newBuilder()
.setField("comment_multivalue")
.setTextValue("delicious"))
.setBoost(4)))
.addClauses(
BooleanClause.newBuilder()
.setQuery(
Query.newBuilder()
.setTermQuery(
TermQuery.newBuilder()
.setField("comment_multivalue")
.setTextValue("pizza"))
.setTextValue("yummy"))
.setBoost(2)))))
.setMaxNumberOfFragments(UInt32Value.of(1))
.setFragmentSize(UInt32Value.of(250))
.setTopPhraseOnce(BoolValue.of(true))
.setTopBoostOnly(BoolValue.of(true))
.setScoreOrdered(BoolValue.of(true))
.setDiscreteMultivalue(BoolValue.of(true)))
.build();
Expand All @@ -236,7 +256,7 @@ public void testHighlightMultivalueFieldWithTopPhraseOnly() {

assertThat(response.getHits(0).getHighlightsMap().get("comment_multivalue").getFragmentsList())
.containsExactly(
"This pizza place was one of my favorites in downtown, and as promised by other users, the <em>pepperoni pizza</em> is the best pepperoni pizza in town.");
"The <em>margarita pizza</em> and the <em>marinara pizza</em> in this pizzeria are yummy and inexpensive.");
assertThat(response.getDiagnostics().getHighlightTimeMs()).isGreaterThan(0);
}

Expand Down

0 comments on commit 26c0db4

Please sign in to comment.