From 321d29911470c77839db29174be602a3665d1896 Mon Sep 17 00:00:00 2001 From: Andrew Prudhomme Date: Mon, 6 Jan 2025 13:22:55 -0800 Subject: [PATCH] Backport: Add Term/TermInSet query support for DATE_TIME field (#802) --- .../luceneserver/field/DateTimeFieldDef.java | 27 ++- .../field/DateTimeFieldDefTest.java | 227 ++++++++++++++++++ .../field/registerFieldsDateTime.json | 21 ++ 3 files changed, 274 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDef.java index e853ba8d6..e60946f2b 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDef.java @@ -24,6 +24,7 @@ import com.yelp.nrtsearch.server.luceneserver.doc.LoadedDocValues; import com.yelp.nrtsearch.server.luceneserver.field.properties.RangeQueryable; import com.yelp.nrtsearch.server.luceneserver.field.properties.Sortable; +import com.yelp.nrtsearch.server.luceneserver.field.properties.TermQueryable; import java.io.IOException; import java.time.Instant; import java.time.LocalDateTime; @@ -34,6 +35,7 @@ import java.time.format.DateTimeParseException; import java.time.format.ResolverStyle; import java.time.temporal.ChronoField; +import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.LongPoint; @@ -50,7 +52,8 @@ import org.apache.lucene.search.SortField; /** Field class for 'DATE_TIME' field type. */ -public class DateTimeFieldDef extends IndexableFieldDef implements Sortable, RangeQueryable { +public class DateTimeFieldDef extends IndexableFieldDef + implements Sortable, RangeQueryable, TermQueryable { private static final String EPOCH_MILLIS = "epoch_millis"; private static final String STRICT_DATE_OPTIONAL_TIME = "strict_date_optional_time"; @@ -147,6 +150,28 @@ private long convertDateStringToMillis(String dateString) { .toEpochMilli(); } + @Override + public Query getTermQueryFromLongValue(long longValue) { + return LongPoint.newExactQuery(getName(), longValue); + } + + @Override + public Query getTermInSetQueryFromLongValues(List longValues) { + return LongPoint.newSetQuery(getName(), longValues); + } + + @Override + public Query getTermQueryFromTextValue(String textValue) { + return getTermQueryFromLongValue(getTimeToIndex(textValue)); + } + + @Override + public Query getTermInSetQueryFromTextValues(List textValues) { + List longTerms = new ArrayList<>(textValues.size()); + textValues.forEach((s) -> longTerms.add(getTimeToIndex(s))); + return getTermInSetQueryFromLongValues(longTerms); + } + private void ensureUpperIsMoreThanLower(RangeQuery rangeQuery, long lower, long upper) { if (lower > upper) { throw new IllegalArgumentException( diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDefTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDefTest.java index ec8a09e19..e2b10edab 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDefTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/DateTimeFieldDefTest.java @@ -16,6 +16,8 @@ package com.yelp.nrtsearch.server.luceneserver.field; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest; import com.yelp.nrtsearch.server.grpc.AddDocumentRequest.MultiValuedField; @@ -24,7 +26,10 @@ import com.yelp.nrtsearch.server.grpc.RangeQuery; import com.yelp.nrtsearch.server.grpc.SearchRequest; import com.yelp.nrtsearch.server.grpc.SearchResponse; +import com.yelp.nrtsearch.server.grpc.TermInSetQuery; +import com.yelp.nrtsearch.server.grpc.TermQuery; import com.yelp.nrtsearch.server.luceneserver.ServerTestCase; +import io.grpc.StatusRuntimeException; import io.grpc.testing.GrpcCleanupRule; import java.io.IOException; import java.util.ArrayList; @@ -96,6 +101,14 @@ private AddDocumentRequest buildDocument( .putFields( "timestamp_string_format", MultiValuedField.newBuilder().addValue(timestampFormatted).build()) + .putFields("single_stored", MultiValuedField.newBuilder().addValue(timestampMillis).build()) + .putFields("stored_only", MultiValuedField.newBuilder().addValue(timestampMillis).build()) + .putFields( + "multi_stored", + MultiValuedField.newBuilder() + .addValue(timestampMillis) + .addValue(String.valueOf(Long.parseLong(timestampMillis) + 2)) + .build()) .build(); } @@ -472,6 +485,186 @@ public void testRangeQueryWithCombinationOfSpecifiedBoundsAndExclusive() { assertRangeQuery(rangeQuery, "3", "4", "6"); } + @Test + public void testTermQuery() { + TermQuery termQuery = + TermQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setTextValue("1611742000") + .build(); + assertTermQuery(termQuery, "1"); + termQuery = + TermQuery.newBuilder().setField("timestamp_epoch_millis").setLongValue(1611742000).build(); + assertTermQuery(termQuery, "1"); + } + + @Test + public void testTermQuery_stringFormat() { + TermQuery termQuery = + TermQuery.newBuilder() + .setField("timestamp_string_format") + .setTextValue("2021-02-15 20:20:00") + .build(); + assertTermQuery(termQuery, "3"); + termQuery = + TermQuery.newBuilder() + .setField("timestamp_string_format") + .setLongValue(1613420400000L) + .build(); + assertTermQuery(termQuery, "3"); + } + + @Test + public void testTermQuery_single() { + TermQuery termQuery = + TermQuery.newBuilder().setField("single_stored").setTextValue("1611742000").build(); + assertTermQuery(termQuery, "1"); + termQuery = TermQuery.newBuilder().setField("single_stored").setLongValue(1611742000).build(); + assertTermQuery(termQuery, "1"); + } + + @Test + public void testTermQuery_multi() { + TermQuery termQuery = + TermQuery.newBuilder().setField("multi_stored").setTextValue("1613742002").build(); + assertTermQuery(termQuery, "4"); + termQuery = TermQuery.newBuilder().setField("multi_stored").setLongValue(1613742002).build(); + assertTermQuery(termQuery, "4"); + } + + @Test + public void testTermQuery_notSearchable() { + try { + TermQuery termQuery = + TermQuery.newBuilder().setField("stored_only").setTextValue("1611742000").build(); + assertTermQuery(termQuery); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Field stored_only is not searchable, which is required for TermQuery / TermInSetQuery")); + } + } + + @Test + public void testTermInSetQuery() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_epoch_millis") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1611742000) + .addTerms(1612742000) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + } + + @Test + public void testTermInSetQuery_stringFormat() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_string_format") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("2021-02-15 20:20:00") + .addTerms("2021-03-15 20:20:00") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("timestamp_string_format") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1613420400000L) + .addTerms(1615839600000L) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + } + + @Test + public void testTermInSetQuery_single() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("single_stored") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("single_stored") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1611742000) + .addTerms(1612742000) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "1", "3"); + } + + @Test + public void testTermInSetQuery_multi() { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("multi_stored") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1612742002") + .addTerms("1613742002") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + termInSetQuery = + TermInSetQuery.newBuilder() + .setField("multi_stored") + .setLongTerms( + TermInSetQuery.LongTerms.newBuilder() + .addTerms(1612742002) + .addTerms(1613742002) + .build()) + .build(); + assertTermInSetQuery(termInSetQuery, "3", "4"); + } + + @Test + public void testTermInSetQuery_notSearchable() { + try { + TermInSetQuery termInSetQuery = + TermInSetQuery.newBuilder() + .setField("stored_only") + .setTextTerms( + TermInSetQuery.TextTerms.newBuilder() + .addTerms("1611742000") + .addTerms("1612742000") + .build()) + .build(); + assertTermInSetQuery(termInSetQuery); + fail(); + } catch (StatusRuntimeException e) { + assertTrue( + e.getMessage() + .contains( + "Field stored_only is not searchable, which is required for TermQuery / TermInSetQuery")); + } + } + private void assertRangeQuery(RangeQuery rangeQuery, String... expectedIds) { String idFieldName = "doc_id"; Query query = Query.newBuilder().setRangeQuery(rangeQuery).build(); @@ -489,6 +682,40 @@ private void assertRangeQuery(RangeQuery rangeQuery, String... expectedIds) { assertEquals(expected, actualValues); } + private void assertTermQuery(TermQuery termQuery, String... expectedIds) { + String idFieldName = "doc_id"; + Query query = Query.newBuilder().setTermQuery(termQuery).build(); + SearchResponse searchResponse = doQuery(query, List.of(idFieldName)); + assertEquals(expectedIds.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map( + hit -> + hit.getFieldsMap().get(idFieldName).getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + List expected = Arrays.asList(expectedIds); + expected.sort(Comparator.comparing(Function.identity())); + assertEquals(expected, actualValues); + } + + private void assertTermInSetQuery(TermInSetQuery termInSetQuery, String... expectedIds) { + String idFieldName = "doc_id"; + Query query = Query.newBuilder().setTermInSetQuery(termInSetQuery).build(); + SearchResponse searchResponse = doQuery(query, List.of(idFieldName)); + assertEquals(expectedIds.length, searchResponse.getHitsCount()); + List actualValues = + searchResponse.getHitsList().stream() + .map( + hit -> + hit.getFieldsMap().get(idFieldName).getFieldValueList().get(0).getTextValue()) + .sorted() + .collect(Collectors.toList()); + List expected = Arrays.asList(expectedIds); + expected.sort(Comparator.comparing(Function.identity())); + assertEquals(expected, actualValues); + } + private SearchResponse doQuery(Query query, List fields) { return getGrpcServer() .getBlockingStub() diff --git a/src/test/resources/field/registerFieldsDateTime.json b/src/test/resources/field/registerFieldsDateTime.json index 56842b794..d283b8179 100644 --- a/src/test/resources/field/registerFieldsDateTime.json +++ b/src/test/resources/field/registerFieldsDateTime.json @@ -26,6 +26,27 @@ "dateTimeFormat": "yyyy-MM-dd HH:mm:ss", "storeDocValues": true, "search": true + }, + { + "name": "single_stored", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "search": true, + "store": true + }, + { + "name": "multi_stored", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "search": true, + "multiValued": true, + "store": true + }, + { + "name": "stored_only", + "type": "DATE_TIME", + "dateTimeFormat": "epoch_millis", + "store": true } ] } \ No newline at end of file