Skip to content

Commit

Permalink
refactor star tree filter
Browse files Browse the repository at this point in the history
Signed-off-by: Sandesh Kumar <sandeshkr419@gmail.com>
  • Loading branch information
sandeshkr419 committed Oct 15, 2024
1 parent 66876e8 commit fc2ea8b
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.compositeindex.datacube;

import org.apache.lucene.index.DocValuesType;
import org.opensearch.common.Rounding;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.time.DateUtils;
Expand Down Expand Up @@ -169,4 +170,8 @@ public int compare(DateTimeUnitRounding unit1, DateTimeUnitRounding unit2) {
public static List<DateTimeUnitRounding> getSortedDateTimeUnits(List<DateTimeUnitRounding> dateTimeUnits) {
return dateTimeUnits.stream().sorted(new DateTimeUnitComparator()).collect(Collectors.toList());
}

public DocValuesType getDocValuesType() {
return DocValuesType.SORTED_NUMERIC;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.compositeindex.datacube;

import org.apache.lucene.index.DocValuesType;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.ToXContent;

Expand Down Expand Up @@ -42,4 +43,6 @@ public interface Dimension extends ToXContent {
* Returns the list of dimension fields that represent the dimension
*/
List<String> getSubDimensionNames();

DocValuesType getDocValuesType();
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.compositeindex.datacube;

import org.apache.lucene.index.DocValuesType;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
Expand Down Expand Up @@ -71,4 +72,9 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(field);
}

@Override
public DocValuesType getDocValuesType() {
return DocValuesType.SORTED_NUMERIC;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.compositeindex.datacube;

import org.apache.lucene.index.DocValuesType;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;

Expand Down Expand Up @@ -69,4 +70,9 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(field);
}

@Override
public DocValuesType getDocValuesType() {
return DocValuesType.SORTED_NUMERIC;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.opensearch.index.compositeindex.datacube.startree.utils;

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SegmentReader;
import org.apache.lucene.search.CollectionTerminatedException;
Expand Down Expand Up @@ -99,6 +100,12 @@ private static StarTreeQueryContext tryCreateStarTreeQueryContext(
if (queryBuilder == null || queryBuilder instanceof MatchAllQueryBuilder) {
queryMap = null;
} else if (queryBuilder instanceof TermQueryBuilder) {
// TODO: Add support for keyword fields
if (compositeFieldType.getDimensions().stream().anyMatch(d -> d.getDocValuesType() != DocValuesType.SORTED_NUMERIC)) {
// return null for non-numeric fields
return null;
}

List<String> supportedDimensions = compositeFieldType.getDimensions()
.stream()
.map(Dimension::getField)
Expand Down Expand Up @@ -233,8 +240,7 @@ public static FixedBitSet getStarTreeFilteredValues(SearchContext context, LeafR
throws IOException {
FixedBitSet result = context.getStarTreeQueryContext().getStarTreeValues(ctx);
if (result == null) {
StarTreeFilter filter = new StarTreeFilter(starTreeValues, context.getStarTreeQueryContext().getQueryMap());
result = filter.getStarTreeResult();
result = StarTreeFilter.getStarTreeResult(starTreeValues, context.getStarTreeQueryContext().getQueryMap());
context.getStarTreeQueryContext().setStarTreeValues(ctx, result);
}
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,18 @@
public class StarTreeFilter {
private static final Logger logger = LogManager.getLogger(StarTreeFilter.class);

private final Map<String, Long> queryMap;
private final StarTreeValues starTreeValues;

public StarTreeFilter(StarTreeValues starTreeAggrStructure, Map<String, Long> predicateEvaluators) {
// This filter operator does not support AND/OR/NOT operations as of now.
starTreeValues = starTreeAggrStructure;
queryMap = predicateEvaluators != null ? predicateEvaluators : Collections.emptyMap();
}

/**
* <ul>
* <li>First go over the star tree and try to match as many dimensions as possible
* <li>For the remaining columns, use star-tree doc values to match them
* </ul>
* First go over the star tree and try to match as many dimensions as possible
* For the remaining columns, use star-tree doc values to match them
*/
public FixedBitSet getStarTreeResult() throws IOException {
StarTreeResult starTreeResult = traverseStarTree();
public static FixedBitSet getStarTreeResult(StarTreeValues starTreeValues, Map<String, Long> predicateEvaluators) throws IOException {
Map<String, Long> queryMap = predicateEvaluators != null ? predicateEvaluators : Collections.emptyMap();
StarTreeResult starTreeResult = traverseStarTree(starTreeValues, queryMap);

// Initialize FixedBitSet with size maxMatchedDoc + 1
FixedBitSet bitSet = new FixedBitSet(starTreeResult.maxMatchedDoc + 1);
SortedNumericStarTreeValuesIterator starTreeValuesIterator = new SortedNumericStarTreeValuesIterator(
starTreeResult._matchedDocIds.build().iterator()
starTreeResult.matchedDocIds.build().iterator()
);

// No matches, return an empty FixedBitSet
Expand All @@ -80,10 +70,10 @@ public FixedBitSet getStarTreeResult() throws IOException {
FixedBitSet tempBitSet = new FixedBitSet(starTreeResult.maxMatchedDoc + 1);

// Process remaining predicate columns to further filter the results
for (String remainingPredicateColumn : starTreeResult._remainingPredicateColumns) {
for (String remainingPredicateColumn : starTreeResult.remainingPredicateColumns) {
logger.debug("remainingPredicateColumn : {}, maxMatchedDoc : {} ", remainingPredicateColumn, starTreeResult.maxMatchedDoc);

SortedNumericStarTreeValuesIterator ndv = (SortedNumericStarTreeValuesIterator) this.starTreeValues.getDimensionValuesIterator(
SortedNumericStarTreeValuesIterator ndv = (SortedNumericStarTreeValuesIterator) starTreeValues.getDimensionValuesIterator(
remainingPredicateColumn
);

Expand Down Expand Up @@ -118,8 +108,8 @@ public FixedBitSet getStarTreeResult() throws IOException {
* Helper method to traverse the star tree, get matching documents and keep track of all the
* predicate dimensions that are not matched.
*/
private StarTreeResult traverseStarTree() throws IOException {
DocIdSetBuilder docsWithField = new DocIdSetBuilder(this.starTreeValues.getStarTreeDocumentCount());
private static StarTreeResult traverseStarTree(StarTreeValues starTreeValues, Map<String, Long> queryMap) throws IOException {
DocIdSetBuilder docsWithField = new DocIdSetBuilder(starTreeValues.getStarTreeDocumentCount());
DocIdSetBuilder.BulkAdder adder;
Set<String> globalRemainingPredicateColumns = null;
StarTreeNode starTree = starTreeValues.getRoot();
Expand Down Expand Up @@ -214,15 +204,20 @@ private StarTreeResult traverseStarTree() throws IOException {
/**
* Helper class to wrap the result from traversing the star tree.
* */
static class StarTreeResult {
final DocIdSetBuilder _matchedDocIds;
final Set<String> _remainingPredicateColumns;
final int numOfMatchedDocs;
final int maxMatchedDoc;

StarTreeResult(DocIdSetBuilder matchedDocIds, Set<String> remainingPredicateColumns, int numOfMatchedDocs, int maxMatchedDoc) {
_matchedDocIds = matchedDocIds;
_remainingPredicateColumns = remainingPredicateColumns;
public static class StarTreeResult {
public final DocIdSetBuilder matchedDocIds;
public final Set<String> remainingPredicateColumns;
public final int numOfMatchedDocs;
public final int maxMatchedDoc;

public StarTreeResult(
DocIdSetBuilder matchedDocIds,
Set<String> remainingPredicateColumns,
int numOfMatchedDocs,
int maxMatchedDoc
) {
this.matchedDocIds = matchedDocIds;
this.remainingPredicateColumns = remainingPredicateColumns;
this.numOfMatchedDocs = numOfMatchedDocs;
this.maxMatchedDoc = maxMatchedDoc;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ public class StarTreeQueryContext {
* This is used to cache the results for each leaf reader context
* to avoid reading the filtered values from the leaf reader context multiple times
*/
private FixedBitSet[] starTreeValues;
private final FixedBitSet[] starTreeValues;

public StarTreeQueryContext(CompositeIndexFieldInfo starTree, Map<String, Long> queryMap, int cacheStarTreeValuesSize) {
public StarTreeQueryContext(CompositeIndexFieldInfo starTree, Map<String, Long> queryMap, int numSegmentsCache) {
this.starTree = starTree;
this.queryMap = queryMap;
if (cacheStarTreeValuesSize > -1) {
starTreeValues = new FixedBitSet[cacheStarTreeValuesSize];
if (numSegmentsCache > -1) {
starTreeValues = new FixedBitSet[numSegmentsCache];
} else {
starTreeValues = null;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import org.opensearch.core.common.Strings;
import org.opensearch.index.IndexService;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite99.datacube.startree.StarTreeDocValuesFormatTests;
import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests;
import org.opensearch.index.compositeindex.CompositeIndexSettings;
import org.opensearch.index.compositeindex.datacube.startree.StarTreeIndexSettings;
import org.opensearch.index.mapper.CompositeMappedFieldType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
Expand All @@ -30,8 +30,8 @@
import org.opensearch.common.util.FeatureFlags;
import org.opensearch.index.codec.composite.CompositeIndexFieldInfo;
import org.opensearch.index.codec.composite.CompositeIndexReader;
import org.opensearch.index.codec.composite.composite99.Composite99Codec;
import org.opensearch.index.codec.composite99.datacube.startree.StarTreeDocValuesFormatTests;
import org.opensearch.index.codec.composite.composite912.Composite912Codec;
import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests;
import org.opensearch.index.compositeindex.datacube.Dimension;
import org.opensearch.index.compositeindex.datacube.NumericDimension;
import org.opensearch.index.mapper.MappedFieldType;
Expand Down Expand Up @@ -94,7 +94,7 @@ protected Codec getCodec() {
} catch (IOException e) {
throw new RuntimeException(e);
}
return new Composite99Codec(Lucene99Codec.Mode.BEST_SPEED, mapperService, testLogger);
return new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, testLogger);
}

public void testStarTreeDocValues() throws IOException {
Expand Down

0 comments on commit fc2ea8b

Please sign in to comment.