diff --git a/CHANGELOG.md b/CHANGELOG.md index 7013660ee44f7..030a1504d11c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added new Setting property UnmodifiableOnRestore to prevent updating settings on restore snapshot ([#16957](https://github.com/opensearch-project/OpenSearch/pull/16957)) - Introduce Template query ([#16818](https://github.com/opensearch-project/OpenSearch/pull/16818)) - Propagate the sourceIncludes and excludes fields from fetchSourceContext to FieldsVisitor. ([#17080](https://github.com/opensearch-project/OpenSearch/pull/17080)) +- [Star Tree] [Search] Resolving Date histogram with metric aggregation using star-tree ([#16674](https://github.com/opensearch-project/OpenSearch/pull/16674)) ### Dependencies - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504)) diff --git a/server/src/main/java/org/opensearch/common/Rounding.java b/server/src/main/java/org/opensearch/common/Rounding.java index 83411dea9dad3..df911ecbbafe8 100644 --- a/server/src/main/java/org/opensearch/common/Rounding.java +++ b/server/src/main/java/org/opensearch/common/Rounding.java @@ -271,6 +271,10 @@ public void writeTo(StreamOutput out) throws IOException { public abstract byte id(); + public DateTimeUnit unit() { + return null; + } + /** * A strategy for rounding milliseconds since epoch. * @@ -525,6 +529,11 @@ public byte id() { return ID; } + @Override + public DateTimeUnit unit() { + return unit; + } + private LocalDateTime truncateLocalDateTime(LocalDateTime localDateTime) { switch (unit) { case SECOND_OF_MINUTE: diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DateDimension.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DateDimension.java index 88a67e1134067..36dd42122c60e 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/DateDimension.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/DateDimension.java @@ -169,6 +169,24 @@ public int compare(DateTimeUnitRounding unit1, DateTimeUnitRounding unit2) { } } + /** + * Returns the closest valid calendar interval to be used for the search interval + */ + public DateTimeUnitRounding findClosestValidInterval(DateTimeUnitRounding searchInterval) { + DateTimeUnitComparator comparator = new DateTimeUnitComparator(); + DateTimeUnitRounding closestValidInterval = null; + + // Find the largest interval that is less than or equal to search interval + for (DateTimeUnitRounding interval : sortedCalendarIntervals) { + if (comparator.compare(interval, searchInterval) <= 0) { + closestValidInterval = interval; + } else { + break; + } + } + return closestValidInterval; + } + /** * Returns a sorted list of dateTimeUnits based on the DateTimeUnitComparator */ diff --git a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeQueryHelper.java b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeQueryHelper.java index e46cf6f56b36e..ce4f8ba7b2d00 100644 --- a/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeQueryHelper.java +++ b/server/src/main/java/org/opensearch/index/compositeindex/datacube/startree/utils/StarTreeQueryHelper.java @@ -17,10 +17,13 @@ import org.opensearch.common.lucene.Lucene; import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.compositeindex.datacube.DateDimension; import org.opensearch.index.compositeindex.datacube.Dimension; import org.opensearch.index.compositeindex.datacube.Metric; import org.opensearch.index.compositeindex.datacube.MetricStat; import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding; import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; import org.opensearch.index.mapper.CompositeDataCubeFieldType; import org.opensearch.index.query.MatchAllQueryBuilder; @@ -28,7 +31,8 @@ import org.opensearch.index.query.TermQueryBuilder; import org.opensearch.search.aggregations.AggregatorFactory; import org.opensearch.search.aggregations.LeafBucketCollector; -import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorFactory; import org.opensearch.search.aggregations.metrics.MetricAggregatorFactory; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.builder.SearchSourceBuilder; @@ -37,9 +41,10 @@ import org.opensearch.search.startree.StarTreeQueryContext; import java.io.IOException; -import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.function.BiConsumer; import java.util.function.Consumer; import java.util.stream.Collectors; @@ -74,10 +79,16 @@ public static StarTreeQueryContext getStarTreeQueryContext(SearchContext context ); for (AggregatorFactory aggregatorFactory : context.aggregations().factories().getFactories()) { - MetricStat metricStat = validateStarTreeMetricSupport(compositeMappedFieldType, aggregatorFactory); - if (metricStat == null) { - return null; + // first check for aggregation is a metric aggregation + if (validateStarTreeMetricSupport(compositeMappedFieldType, aggregatorFactory)) { + continue; + } + + // if not a metric aggregation, check for applicable date histogram shape + if (validateDateHistogramSupport(compositeMappedFieldType, aggregatorFactory)) { + continue; } + return null; } // need to cache star tree values only for multiple aggregations @@ -100,63 +111,86 @@ private static StarTreeQueryContext tryCreateStarTreeQueryContext( if (queryBuilder == null || queryBuilder instanceof MatchAllQueryBuilder) { queryMap = null; } else if (queryBuilder instanceof TermQueryBuilder) { + TermQueryBuilder termQueryBuilder = (TermQueryBuilder) queryBuilder; // TODO: Add support for keyword fields - if (compositeFieldType.getDimensions().stream().anyMatch(d -> d.getDocValuesType() != DocValuesType.SORTED_NUMERIC)) { - // return null for non-numeric fields - return null; - } - - List supportedDimensions = compositeFieldType.getDimensions() + Dimension matchedDimension = compositeFieldType.getDimensions() .stream() - .map(Dimension::getField) - .collect(Collectors.toList()); - queryMap = getStarTreePredicates(queryBuilder, supportedDimensions); - if (queryMap == null) { + .filter(d -> (d.getField().equals(termQueryBuilder.fieldName()) && d.getDocValuesType() == DocValuesType.SORTED_NUMERIC)) + .findFirst() + .orElse(null); + if (matchedDimension == null) { return null; } + queryMap = Map.of(termQueryBuilder.fieldName(), Long.parseLong(termQueryBuilder.value().toString())); } else { return null; } return new StarTreeQueryContext(compositeIndexFieldInfo, queryMap, cacheStarTreeValuesSize); } - /** - * Parse query body to star-tree predicates - * @param queryBuilder to match star-tree supported query shape - * @return predicates to match - */ - private static Map getStarTreePredicates(QueryBuilder queryBuilder, List supportedDimensions) { - TermQueryBuilder tq = (TermQueryBuilder) queryBuilder; - String field = tq.fieldName(); - if (!supportedDimensions.contains(field)) { - return null; - } - long inputQueryVal = Long.parseLong(tq.value().toString()); - - // Create a map with the field and the value - Map predicateMap = new HashMap<>(); - predicateMap.put(field, inputQueryVal); - return predicateMap; - } - - private static MetricStat validateStarTreeMetricSupport( + private static boolean validateStarTreeMetricSupport( CompositeDataCubeFieldType compositeIndexFieldInfo, AggregatorFactory aggregatorFactory ) { if (aggregatorFactory instanceof MetricAggregatorFactory && aggregatorFactory.getSubFactories().getFactories().length == 0) { + MetricAggregatorFactory metricAggregatorFactory = (MetricAggregatorFactory) aggregatorFactory; String field; Map> supportedMetrics = compositeIndexFieldInfo.getMetrics() .stream() .collect(Collectors.toMap(Metric::getField, Metric::getMetrics)); - MetricStat metricStat = ((MetricAggregatorFactory) aggregatorFactory).getMetricStat(); - field = ((MetricAggregatorFactory) aggregatorFactory).getField(); + MetricStat metricStat = metricAggregatorFactory.getMetricStat(); + field = metricAggregatorFactory.getField(); + + return supportedMetrics.containsKey(field) && supportedMetrics.get(field).contains(metricStat); + } + return false; + } + + private static boolean validateDateHistogramSupport( + CompositeDataCubeFieldType compositeIndexFieldInfo, + AggregatorFactory aggregatorFactory + ) { + if (!(aggregatorFactory instanceof DateHistogramAggregatorFactory) + || aggregatorFactory.getSubFactories().getFactories().length < 1) { + return false; + } + DateHistogramAggregatorFactory dateHistogramAggregatorFactory = (DateHistogramAggregatorFactory) aggregatorFactory; + + // Find the DateDimension in the dimensions list + DateDimension starTreeDateDimension = null; + for (Dimension dimension : compositeIndexFieldInfo.getDimensions()) { + if (dimension instanceof DateDimension) { + starTreeDateDimension = (DateDimension) dimension; + break; + } + } + + // If no DateDimension is found, validation fails + if (starTreeDateDimension == null) { + return false; + } + + // Ensure the rounding is not null + if (dateHistogramAggregatorFactory.getRounding() == null) { + return false; + } + + // Find the closest valid interval in the DateTimeUnitRounding class associated with star tree + DateTimeUnitRounding rounding = starTreeDateDimension.findClosestValidInterval( + new DateTimeUnitAdapter(dateHistogramAggregatorFactory.getRounding()) + ); + if (rounding == null) { + return false; + } - if (field != null && supportedMetrics.containsKey(field) && supportedMetrics.get(field).contains(metricStat)) { - return metricStat; + // Validate all sub-factories + for (AggregatorFactory subFactory : aggregatorFactory.getSubFactories().getFactories()) { + if (!validateStarTreeMetricSupport(compositeIndexFieldInfo, subFactory)) { + return false; } } - return null; + return true; } public static CompositeIndexFieldInfo getSupportedStarTree(SearchContext context) { @@ -222,11 +256,37 @@ public static LeafBucketCollector getStarTreeLeafCollector( // Call the final consumer after processing all entries finalConsumer.run(); - // Return a LeafBucketCollector that terminates collection - return new LeafBucketCollectorBase(sub, valuesSource.doubleValues(ctx)) { + // Terminate after pre-computing aggregation + throw new CollectionTerminatedException(); + } + + public static StarTreeBucketCollector getStarTreeBucketMetricCollector( + CompositeIndexFieldInfo starTree, + String metric, + ValuesSource.Numeric valuesSource, + StarTreeBucketCollector parentCollector, + Consumer growArrays, + BiConsumer updateBucket + ) throws IOException { + assert parentCollector != null; + return new StarTreeBucketCollector(parentCollector) { + String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName(), + metric + ); + SortedNumericStarTreeValuesIterator metricValuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(metricName); + @Override - public void collect(int doc, long bucket) { - throw new CollectionTerminatedException(); + public void collectStarTreeEntry(int starTreeEntryBit, long bucket) throws IOException { + growArrays.accept(bucket); + // Advance the valuesIterator to the current bit + if (!metricValuesIterator.advanceExact(starTreeEntryBit)) { + return; // Skip if no entries for this document + } + long metricValue = metricValuesIterator.nextValue(); + updateBucket.accept(bucket, metricValue); } }; } @@ -240,7 +300,7 @@ public static FixedBitSet getStarTreeFilteredValues(SearchContext context, LeafR throws IOException { FixedBitSet result = context.getStarTreeQueryContext().getStarTreeValues(ctx); if (result == null) { - result = StarTreeFilter.getStarTreeResult(starTreeValues, context.getStarTreeQueryContext().getQueryMap()); + result = StarTreeFilter.getStarTreeResult(starTreeValues, context.getStarTreeQueryContext().getQueryMap(), Set.of()); context.getStarTreeQueryContext().setStarTreeValues(ctx, result); } return result; diff --git a/server/src/main/java/org/opensearch/search/aggregations/StarTreeBucketCollector.java b/server/src/main/java/org/opensearch/search/aggregations/StarTreeBucketCollector.java new file mode 100644 index 0000000000000..e994b65442a49 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/StarTreeBucketCollector.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations; + +import org.apache.lucene.util.FixedBitSet; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Collector for star tree aggregation + * This abstract class exposes utilities to help avoid traversing star-tree multiple times and + * collect relevant metrics across nested aggregations in a single traversal + * @opensearch.internal + */ +@ExperimentalApi +public abstract class StarTreeBucketCollector { + + protected final StarTreeValues starTreeValues; + protected final FixedBitSet matchingDocsBitSet; + protected final List subCollectors = new ArrayList<>(); + + public StarTreeBucketCollector(StarTreeValues starTreeValues, FixedBitSet matchingDocsBitSet) throws IOException { + this.starTreeValues = starTreeValues; + this.matchingDocsBitSet = matchingDocsBitSet; + this.setSubCollectors(); + } + + public StarTreeBucketCollector(StarTreeBucketCollector parent) throws IOException { + this.starTreeValues = parent.getStarTreeValues(); + this.matchingDocsBitSet = parent.getMatchingDocsBitSet(); + this.setSubCollectors(); + } + + /** + * Sets the sub-collectors to track nested aggregators + */ + public void setSubCollectors() throws IOException {}; + + /** + * Returns a list of sub-collectors to track nested aggregators + */ + public List getSubCollectors() { + return subCollectors; + } + + /** + * Returns the tree values to iterate + */ + public StarTreeValues getStarTreeValues() { + return starTreeValues; + } + + /** + * Returns the matching docs bitset to iterate upon the star-tree values based on search query + */ + public FixedBitSet getMatchingDocsBitSet() { + return matchingDocsBitSet; + } + + /** + * Collects the star tree entry and bucket ordinal to update + * The method implementation should identify the metrics to collect from that star-tree entry to the specified bucket + */ + public abstract void collectStarTreeEntry(int starTreeEntry, long bucket) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java b/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java new file mode 100644 index 0000000000000..c2f2017997c4d --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/StarTreePreComputeCollector.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations; + +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; + +import java.io.IOException; + +/** + * This interface is used to pre-compute the star tree bucket collector for each segment/leaf. + * It is utilized by parent aggregation to retrieve a StarTreeBucketCollector which can be used to + * pre-compute the associated aggregation along with its parent pre-computation using star-tree + * + * @opensearch.internal + */ +public interface StarTreePreComputeCollector { + /** + * Get the star tree bucket collector for the specified segment/leaf + */ + StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java index 5420d8c7f6dbf..f075d67b0f48d 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/BucketsAggregator.java @@ -43,6 +43,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.InternalAggregations; import org.opensearch.search.aggregations.LeafBucketCollector; +import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.bucket.global.GlobalAggregator; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.aggregations.support.AggregationPath; @@ -129,6 +130,20 @@ public final void collectExistingBucket(LeafBucketCollector subCollector, int do subCollector.collect(doc, bucketOrd); } + /** + * Utility method to collect doc count in the given bucket (identified by the bucket ordinal) + * After collecting doc count, invoke collectStarTreeEntry() for sub-collectors to update their relevant buckets + */ + public final void collectStarTreeBucket(StarTreeBucketCollector collector, long docCount, long bucketOrd, int entryBit) + throws IOException { + if (docCounts.increment(bucketOrd, docCount) == docCount) { + multiBucketConsumer.accept(0); + } + for (StarTreeBucketCollector subCollector : collector.getSubCollectors()) { + subCollector.collectStarTreeEntry(entryBit, bucketOrd); + } + } + /** * This only tidies up doc counts. Call {@link MergingBucketsDeferringCollector#mergeBuckets(long[])} to merge the actual * ordinals and doc ID deltas. diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 96a49bc3fd5f6..23fbacc979224 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -34,11 +34,23 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.util.FixedBitSet; import org.opensearch.common.Nullable; import org.opensearch.common.Rounding; import org.opensearch.common.lease.Releasables; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.MetricStat; +import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeQueryHelper; +import org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeUtils; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding; +import org.opensearch.index.compositeindex.datacube.startree.utils.iterator.SortedNumericStarTreeValuesIterator; +import org.opensearch.index.mapper.CompositeDataCubeFieldType; import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.Aggregator; import org.opensearch.search.aggregations.AggregatorFactories; @@ -47,6 +59,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.bucket.BucketsAggregator; import org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge; import org.opensearch.search.aggregations.bucket.filterrewrite.FilterRewriteOptimizationContext; @@ -54,13 +68,17 @@ import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.startree.StarTreeFilter; import java.io.IOException; import java.util.Collections; +import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.function.BiConsumer; import java.util.function.Function; +import static org.opensearch.index.compositeindex.datacube.startree.utils.StarTreeQueryHelper.getSupportedStarTree; import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; /** @@ -71,7 +89,7 @@ * * @opensearch.internal */ -class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator { +class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAggregator, StarTreePreComputeCollector { private final ValuesSource.Numeric valuesSource; private final DocValueFormat formatter; private final Rounding rounding; @@ -85,8 +103,11 @@ class DateHistogramAggregator extends BucketsAggregator implements SizedBucketAg private final LongBounds extendedBounds; private final LongBounds hardBounds; private final LongKeyedBucketOrds bucketOrds; + private final String starTreeDateDimension; + private boolean starTreeDateRoundingRequired = true; private final FilterRewriteOptimizationContext filterRewriteOptimizationContext; + public final String STARTREE_TIMESTAMP_FIELD = "@timestamp"; DateHistogramAggregator( String name, @@ -151,6 +172,7 @@ protected Function bucketOrdProducer() { } }; filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); + this.starTreeDateDimension = (context.getStarTreeQueryContext() != null) ? fetchStarTreeCalendarUnit() : null; } @Override @@ -171,6 +193,13 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol if (optimized) throw new CollectionTerminatedException(); SortedNumericDocValues values = valuesSource.longValues(ctx); + CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); + if (supportedStarTree != null) { + if (preComputeWithStarTree(ctx, supportedStarTree) == true) { + throw new CollectionTerminatedException(); + } + } + return new LeafBucketCollectorBase(sub, values) { @Override public void collect(int doc, long owningBucketOrd) throws IOException { @@ -201,6 +230,88 @@ public void collect(int doc, long owningBucketOrd) throws IOException { }; } + private String fetchStarTreeCalendarUnit() { + if (this.rounding.unit() == null) { + return null; + } + + CompositeDataCubeFieldType compositeMappedFieldType = (CompositeDataCubeFieldType) context.mapperService() + .getCompositeFieldTypes() + .iterator() + .next(); + DateDimension starTreeDateDimension = (DateDimension) compositeMappedFieldType.getDimensions() + .stream() + .filter(dim -> dim.getField().equals(STARTREE_TIMESTAMP_FIELD)) + .findFirst() // Get the first matching time dimension + .orElseThrow(() -> new AssertionError(String.format(Locale.ROOT, "Date dimension '%s' not found", STARTREE_TIMESTAMP_FIELD))); + + DateTimeUnitAdapter dateTimeUnitRounding = new DateTimeUnitAdapter(this.rounding.unit()); + DateTimeUnitRounding rounding = starTreeDateDimension.findClosestValidInterval(dateTimeUnitRounding); + String dimensionName = STARTREE_TIMESTAMP_FIELD + "_" + rounding.shortName(); + if (rounding.shortName().equals(this.rounding.unit().shortName())) { + this.starTreeDateRoundingRequired = false; + } + return dimensionName; + } + + @Override + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + assert parentCollector == null; + StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(ctx, starTree); + return new StarTreeBucketCollector( + starTreeValues, + StarTreeFilter.getStarTreeResult(starTreeValues, context.getStarTreeQueryContext().getQueryMap(), Set.of(starTreeDateDimension)) + ) { + @Override + public void setSubCollectors() throws IOException { + for (Aggregator aggregator : subAggregators) { + this.subCollectors.add(((StarTreePreComputeCollector) aggregator).getStarTreeBucketCollector(ctx, starTree, this)); + } + } + + SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getDimensionValuesIterator(starTreeDateDimension); + + String metricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + "_doc_count", + MetricStat.DOC_COUNT.getTypeName() + ); + SortedNumericStarTreeValuesIterator docCountsIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(metricName); + + @Override + public void collectStarTreeEntry(int starTreeEntry, long owningBucketOrd) throws IOException { + if (!valuesIterator.advanceExact(starTreeEntry)) { + return; + } + + for (int i = 0, count = valuesIterator.entryValueCount(); i < count; i++) { + long dimensionValue = starTreeDateRoundingRequired + ? preparedRounding.round(valuesIterator.nextValue()) + : valuesIterator.nextValue(); + + if (docCountsIterator.advanceExact(starTreeEntry)) { + long metricValue = docCountsIterator.nextValue(); + + long bucketOrd = bucketOrds.add(owningBucketOrd, dimensionValue); + if (bucketOrd < 0) { + bucketOrd = -1 - bucketOrd; + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } else { + grow(bucketOrd + 1); + collectStarTreeBucket(this, metricValue, bucketOrd, starTreeEntry); + } + } + } + } + }; + } + @Override public InternalAggregation[] buildAggregations(long[] owningBucketOrds) throws IOException { return buildAggregationsForVariableBuckets(owningBucketOrds, bucketOrds, (bucketValue, docCount, subAggregationResults) -> { @@ -268,4 +379,20 @@ public double bucketSize(long bucket, Rounding.DateTimeUnit unitSize) { return 1.0; } } + + private boolean preComputeWithStarTree(LeafReaderContext ctx, CompositeIndexFieldInfo starTree) throws IOException { + StarTreeBucketCollector starTreeBucketCollector = getStarTreeBucketCollector(ctx, starTree, null); + FixedBitSet matchingDocsBitSet = starTreeBucketCollector.getMatchingDocsBitSet(); + + int numBits = matchingDocsBitSet.length(); + + if (numBits > 0) { + for (int bit = matchingDocsBitSet.nextSetBit(0); bit != DocIdSetIterator.NO_MORE_DOCS; bit = (bit + 1 < numBits) + ? matchingDocsBitSet.nextSetBit(bit + 1) + : DocIdSetIterator.NO_MORE_DOCS) { + starTreeBucketCollector.collectStarTreeEntry(bit, 0); + } + } + return true; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java index 807ec1ab4e4b7..55da3189a9fb8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregatorFactory.java @@ -153,4 +153,8 @@ protected Aggregator createUnmapped(SearchContext searchContext, Aggregator pare protected boolean supportsConcurrentSegmentSearch() { return true; } + + public Rounding.DateTimeUnit getRounding() { + return this.rounding.unit(); + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java index 2970c5ca851e7..c9f5bb7f3534b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java @@ -53,6 +53,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -68,7 +70,7 @@ * * @opensearch.internal */ -class AvgAggregator extends NumericMetricsAggregator.SingleValue { +class AvgAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { final ValuesSource.Numeric valuesSource; @@ -108,6 +110,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc } CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); if (supportedStarTree != null) { + if (parent != null && subAggregators.length == 0) { + // If this a child aggregator, then the parent will trigger star-tree pre-computation. + // Returning NO_OP_COLLECTOR explicitly because the getLeafCollector() are invoked starting from innermost aggregators + return LeafBucketCollector.NO_OP_COLLECTOR; + } return getStarTreeLeafCollector(ctx, sub, supportedStarTree); } return getDefaultLeafCollector(ctx, sub); @@ -164,7 +171,7 @@ public LeafBucketCollector getStarTreeLeafCollector(LeafReaderContext ctx, LeafB MetricStat.VALUE_COUNT.getTypeName() ); - final CompensatedSum kahanSummation = new CompensatedSum(sums.get(0), 0); + final CompensatedSum kahanSummation = new CompensatedSum(sums.get(0), compensations.get(0)); SortedNumericStarTreeValuesIterator sumValuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues .getMetricValuesIterator(sumMetricName); SortedNumericStarTreeValuesIterator countValueIterator = (SortedNumericStarTreeValuesIterator) starTreeValues @@ -192,6 +199,7 @@ public LeafBucketCollector getStarTreeLeafCollector(LeafReaderContext ctx, LeafB } sums.set(0, kahanSummation.value()); + compensations.set(0, kahanSummation.delta()); return new LeafBucketCollectorBase(sub, valuesSource.doubleValues(ctx)) { @Override public void collect(int doc, long bucket) { @@ -226,4 +234,47 @@ public void doClose() { Releasables.close(counts, sums, compensations); } + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + assert parentCollector != null; + return new StarTreeBucketCollector(parentCollector) { + String sumMetricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName(), + MetricStat.SUM.getTypeName() + ); + String valueCountMetricName = StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( + starTree.getField(), + ((ValuesSource.Numeric.FieldData) valuesSource).getIndexFieldName(), + MetricStat.VALUE_COUNT.getTypeName() + ); + SortedNumericStarTreeValuesIterator sumMetricValuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(sumMetricName); + SortedNumericStarTreeValuesIterator valueCountMetricValuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues + .getMetricValuesIterator(valueCountMetricName); + + final CompensatedSum kahanSummation = new CompensatedSum(0, 0); + + @Override + public void collectStarTreeEntry(int starTreeEntryBit, long bucket) throws IOException { + counts = context.bigArrays().grow(counts, bucket + 1); + sums = context.bigArrays().grow(sums, bucket + 1); + compensations = context.bigArrays().grow(compensations, bucket + 1); + // Advance the valuesIterator to the current bit + if (!sumMetricValuesIterator.advanceExact(starTreeEntryBit) + || !valueCountMetricValuesIterator.advanceExact(starTreeEntryBit)) { + return; // Skip if no entries for this document + } + kahanSummation.reset(sums.get(bucket), compensations.get(bucket)); + kahanSummation.add(NumericUtils.sortableLongToDouble(sumMetricValuesIterator.nextValue())); + + sums.set(bucket, kahanSummation.value()); + compensations.set(bucket, kahanSummation.delta()); + counts.increment(bucket, valueCountMetricValuesIterator.nextValue()); + } + }; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java index 257109bca54bb..49aaf5e0670bb 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java @@ -52,6 +52,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -69,7 +71,7 @@ * * @opensearch.internal */ -class MaxAggregator extends NumericMetricsAggregator.SingleValue { +class MaxAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { final ValuesSource.Numeric valuesSource; final DocValueFormat formatter; @@ -130,6 +132,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); if (supportedStarTree != null) { + if (parent != null && subAggregators.length == 0) { + // If this a child aggregator, then the parent will trigger star-tree pre-computation. + // Returning NO_OP_COLLECTOR explicitly because the getLeafCollector() are invoked starting from innermost aggregators + return LeafBucketCollector.NO_OP_COLLECTOR; + } return getStarTreeCollector(ctx, sub, supportedStarTree); } return getDefaultLeafCollector(ctx, sub); @@ -249,4 +256,27 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue }); return result[0] != null ? converter.apply(result[0]) : null; } + + /** + * The parent aggregator invokes this method to get a StarTreeBucketCollector, + * which exposes collectStarTreeEntry() to be evaluated on filtered star tree entries + */ + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + return StarTreeQueryHelper.getStarTreeBucketMetricCollector( + starTree, + MetricStat.MAX.getTypeName(), + valuesSource, + parentCollector, + (bucket) -> { + long from = maxes.size(); + maxes = context.bigArrays().grow(maxes, bucket + 1); + maxes.fill(from, maxes.size(), Double.NEGATIVE_INFINITY); + }, + (bucket, metricValue) -> maxes.set(bucket, Math.max(maxes.get(bucket), (NumericUtils.sortableLongToDouble(metricValue)))) + ); + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java index a9f20bdeb5fd5..febb227dd4e2a 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java @@ -52,6 +52,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -68,7 +70,7 @@ * * @opensearch.internal */ -class MinAggregator extends NumericMetricsAggregator.SingleValue { +class MinAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { private static final int MAX_BKD_LOOKUPS = 1024; final ValuesSource.Numeric valuesSource; @@ -129,6 +131,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); if (supportedStarTree != null) { + if (parent != null && subAggregators.length == 0) { + // If this a child aggregator, then the parent will trigger star-tree pre-computation. + // Returning NO_OP_COLLECTOR explicitly because the getLeafCollector() are invoked starting from innermost aggregators + return LeafBucketCollector.NO_OP_COLLECTOR; + } return getStarTreeCollector(ctx, sub, supportedStarTree); } return getDefaultLeafCollector(ctx, sub); @@ -243,4 +250,27 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue } catch (CollectionTerminatedException e) {} return result[0]; } + + /** + * The parent aggregator invokes this method to get a StarTreeBucketCollector, + * which exposes collectStarTreeEntry() to be evaluated on filtered star tree entries + */ + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + return StarTreeQueryHelper.getStarTreeBucketMetricCollector( + starTree, + MetricStat.MIN.getTypeName(), + valuesSource, + parentCollector, + (bucket) -> { + long from = mins.size(); + mins = context.bigArrays().grow(mins, bucket + 1); + mins.fill(from, mins.size(), Double.POSITIVE_INFINITY); + }, + (bucket, metricValue) -> mins.set(bucket, Math.min(mins.get(bucket), NumericUtils.sortableLongToDouble(metricValue))) + ); + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java index 3d237a94c5699..7376cc1e93b41 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java @@ -46,6 +46,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -60,7 +62,7 @@ * * @opensearch.internal */ -public class SumAggregator extends NumericMetricsAggregator.SingleValue { +public class SumAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { private final ValuesSource.Numeric valuesSource; private final DocValueFormat format; @@ -98,6 +100,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); if (supportedStarTree != null) { + if (parent != null && subAggregators.length == 0) { + // If this a child aggregator, then the parent will trigger star-tree pre-computation. + // Returning NO_OP_COLLECTOR explicitly because the getLeafCollector() are invoked starting from innermost aggregators + return LeafBucketCollector.NO_OP_COLLECTOR; + } return getStarTreeCollector(ctx, sub, supportedStarTree); } return getDefaultLeafCollector(ctx, sub); @@ -135,7 +142,8 @@ public void collect(int doc, long bucket) throws IOException { public LeafBucketCollector getStarTreeCollector(LeafReaderContext ctx, LeafBucketCollector sub, CompositeIndexFieldInfo starTree) throws IOException { - final CompensatedSum kahanSummation = new CompensatedSum(sums.get(0), 0); + final CompensatedSum kahanSummation = new CompensatedSum(sums.get(0), compensations.get(0)); + return StarTreeQueryHelper.getStarTreeLeafCollector( context, valuesSource, @@ -144,7 +152,38 @@ public LeafBucketCollector getStarTreeCollector(LeafReaderContext ctx, LeafBucke starTree, MetricStat.SUM.getTypeName(), value -> kahanSummation.add(NumericUtils.sortableLongToDouble(value)), - () -> sums.set(0, kahanSummation.value()) + () -> { + sums.set(0, kahanSummation.value()); + compensations.set(0, kahanSummation.delta()); + } + ); + } + + /** + * The parent aggregator invokes this method to get a StarTreeBucketCollector, + * which exposes collectStarTreeEntry() to be evaluated on filtered star tree entries + */ + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + final CompensatedSum kahanSummation = new CompensatedSum(0, 0); + return StarTreeQueryHelper.getStarTreeBucketMetricCollector( + starTree, + MetricStat.SUM.getTypeName(), + valuesSource, + parentCollector, + (bucket) -> { + sums = context.bigArrays().grow(sums, bucket + 1); + compensations = context.bigArrays().grow(compensations, bucket + 1); + }, + (bucket, metricValue) -> { + kahanSummation.reset(sums.get(bucket), compensations.get(bucket)); + kahanSummation.add(NumericUtils.sortableLongToDouble(metricValue)); + sums.set(bucket, kahanSummation.value()); + compensations.set(bucket, kahanSummation.delta()); + } ); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java index a156ec49983fa..f6f4a8a56eddc 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java @@ -46,6 +46,8 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.StarTreeBucketCollector; +import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -63,7 +65,7 @@ * * @opensearch.internal */ -public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue { +public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { final ValuesSource valuesSource; @@ -96,6 +98,11 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBuc CompositeIndexFieldInfo supportedStarTree = getSupportedStarTree(this.context); if (supportedStarTree != null) { + if (parent != null && subAggregators.length == 0) { + // If this a child aggregator, then the parent will trigger star-tree pre-computation. + // Returning NO_OP_COLLECTOR explicitly because the getLeafCollector() are invoked starting from innermost aggregators + return LeafBucketCollector.NO_OP_COLLECTOR; + } return getStarTreeCollector(ctx, sub, supportedStarTree); } @@ -180,4 +187,22 @@ public void doClose() { Releasables.close(counts); } + /** + * The parent aggregator invokes this method to get a StarTreeBucketCollector, + * which exposes collectStarTreeEntry() to be evaluated on filtered star tree entries + */ + public StarTreeBucketCollector getStarTreeBucketCollector( + LeafReaderContext ctx, + CompositeIndexFieldInfo starTree, + StarTreeBucketCollector parentCollector + ) throws IOException { + return StarTreeQueryHelper.getStarTreeBucketMetricCollector( + starTree, + MetricStat.VALUE_COUNT.getTypeName(), + (ValuesSource.Numeric) valuesSource, + parentCollector, + (bucket) -> counts = context.bigArrays().grow(counts, bucket + 1), + (bucket, metricValue) -> counts.increment(bucket, metricValue) + ); + } } diff --git a/server/src/main/java/org/opensearch/search/startree/StarTreeFilter.java b/server/src/main/java/org/opensearch/search/startree/StarTreeFilter.java index 261430f542d02..1629b9d0c1db4 100644 --- a/server/src/main/java/org/opensearch/search/startree/StarTreeFilter.java +++ b/server/src/main/java/org/opensearch/search/startree/StarTreeFilter.java @@ -47,9 +47,13 @@ public class StarTreeFilter { * First go over the star tree and try to match as many dimensions as possible * For the remaining columns, use star-tree doc values to match them */ - public static FixedBitSet getStarTreeResult(StarTreeValues starTreeValues, Map predicateEvaluators) throws IOException { + public static FixedBitSet getStarTreeResult( + StarTreeValues starTreeValues, + Map predicateEvaluators, + Set groupByField + ) throws IOException { Map queryMap = predicateEvaluators != null ? predicateEvaluators : Collections.emptyMap(); - StarTreeResult starTreeResult = traverseStarTree(starTreeValues, queryMap); + StarTreeResult starTreeResult = traverseStarTree(starTreeValues, queryMap, groupByField); // Initialize FixedBitSet with size maxMatchedDoc + 1 FixedBitSet bitSet = new FixedBitSet(starTreeResult.maxMatchedDoc + 1); @@ -113,7 +117,8 @@ public static FixedBitSet getStarTreeResult(StarTreeValues starTreeValues, Map queryMap) throws IOException { + private static StarTreeResult traverseStarTree(StarTreeValues starTreeValues, Map queryMap, Set groupbyField) + throws IOException { DocIdSetBuilder docsWithField = new DocIdSetBuilder(starTreeValues.getStarTreeDocumentCount()); DocIdSetBuilder.BulkAdder adder; Set globalRemainingPredicateColumns = null; @@ -126,6 +131,7 @@ private static StarTreeResult traverseStarTree(StarTreeValues starTreeValues, Ma queue.add(starTree); int currentDimensionId = -1; Set remainingPredicateColumns = new HashSet<>(queryMap.keySet()); + Set remainingGroupByColumns = new HashSet<>(groupbyField); int matchedDocsCountInStarTree = 0; int maxDocNum = -1; StarTreeNode starTreeNode; @@ -136,13 +142,14 @@ private static StarTreeResult traverseStarTree(StarTreeValues starTreeValues, Ma if (dimensionId > currentDimensionId) { String dimension = dimensionNames.get(dimensionId); remainingPredicateColumns.remove(dimension); + remainingGroupByColumns.remove(dimension); if (foundLeafNode && globalRemainingPredicateColumns == null) { globalRemainingPredicateColumns = new HashSet<>(remainingPredicateColumns); } currentDimensionId = dimensionId; } - if (remainingPredicateColumns.isEmpty()) { + if (remainingPredicateColumns.isEmpty() && remainingGroupByColumns.isEmpty()) { int docId = starTreeNode.getAggregatedDocId(); docIds.add(docId); matchedDocsCountInStarTree++; @@ -161,7 +168,8 @@ private static StarTreeResult traverseStarTree(StarTreeValues starTreeValues, Ma String childDimension = dimensionNames.get(dimensionId + 1); StarTreeNode starNode = null; - if (globalRemainingPredicateColumns == null || !globalRemainingPredicateColumns.contains(childDimension)) { + if (((globalRemainingPredicateColumns == null || !globalRemainingPredicateColumns.contains(childDimension)) + && !remainingGroupByColumns.contains(childDimension))) { starNode = starTreeNode.getChildStarNode(); } diff --git a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java index 3b32e9e4ac6b7..1beec828e849e 100644 --- a/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java +++ b/server/src/test/java/org/opensearch/search/SearchServiceStarTreeTests.java @@ -25,6 +25,12 @@ import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; import org.opensearch.search.aggregations.AggregationBuilders; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.opensearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; +import org.opensearch.search.aggregations.metrics.SumAggregationBuilder; +import org.opensearch.search.aggregations.startree.DateHistogramAggregatorTests; import org.opensearch.search.aggregations.startree.StarTreeFilterTests; import org.opensearch.search.builder.SearchSourceBuilder; import org.opensearch.search.internal.AliasFilter; @@ -37,12 +43,27 @@ import java.io.IOException; import java.util.Map; +import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.medianAbsoluteDeviation; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; import static org.hamcrest.CoreMatchers.notNullValue; import static org.hamcrest.CoreMatchers.nullValue; +/** + * Tests for validating query shapes which can be resolved using star-tree index + * For valid resolvable (with star-tree) cases, StarTreeQueryContext is created and populated with the SearchContext + * For non-resolvable (with star-tree) cases, StarTreeQueryContext is null + */ public class SearchServiceStarTreeTests extends OpenSearchSingleNodeTestCase { - public void testParseQueryToOriginalOrStarTreeQuery() throws IOException { + private static final String TIMESTAMP_FIELD = "@timestamp"; + private static final String FIELD_NAME = "status"; + + /** + * Test query parsing for non-nested metric aggregations, with/without numeric term query + */ + public void testQueryParsingForMetricAggregations() throws IOException { FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); setStarTreeIndexSetting("true"); @@ -81,10 +102,8 @@ public void testParseQueryToOriginalOrStarTreeQuery() throws IOException { sourceBuilder = new SearchSourceBuilder().query(new MatchAllQueryBuilder()); assertStarTreeContext(request, sourceBuilder, null, -1); - // Case 3: MatchAllQuery and aggregations present, should use star tree - sourceBuilder = new SearchSourceBuilder().size(0) - .query(new MatchAllQueryBuilder()) - .aggregation(AggregationBuilders.max("test").field("field")); + // Case 3: MatchAllQuery and metric aggregations present, should use star tree + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(max("test").field("field")); CompositeIndexFieldInfo expectedStarTree = new CompositeIndexFieldInfo( "startree", CompositeMappedFieldType.CompositeFieldType.STAR_TREE @@ -92,36 +111,198 @@ public void testParseQueryToOriginalOrStarTreeQuery() throws IOException { Map expectedQueryMap = null; assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, -1), -1); - // Case 4: MatchAllQuery and aggregations present, but postFilter specified, should not use star tree + // Case 4: MatchAllQuery and metric aggregations present, but postFilter specified, should not use star tree sourceBuilder = new SearchSourceBuilder().size(0) .query(new MatchAllQueryBuilder()) - .aggregation(AggregationBuilders.max("test").field("field")) + .aggregation(max("test").field("field")) .postFilter(new MatchAllQueryBuilder()); assertStarTreeContext(request, sourceBuilder, null, -1); - // Case 5: TermQuery and single aggregation, should use star tree, but not initialize query cache - sourceBuilder = new SearchSourceBuilder().size(0) - .query(new TermQueryBuilder("sndv", 1)) - .aggregation(AggregationBuilders.max("test").field("field")); + // Case 5: TermQuery and single metric aggregation, should use star tree, but not initialize query cache + sourceBuilder = new SearchSourceBuilder().size(0).query(new TermQueryBuilder("sndv", 1)).aggregation(max("test").field("field")); expectedQueryMap = Map.of("sndv", 1L); assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, -1), -1); - // Case 6: TermQuery and multiple aggregations present, should use star tree & initialize cache + // Case 6: TermQuery and multiple metric aggregations present, should use star tree & initialize cache sourceBuilder = new SearchSourceBuilder().size(0) .query(new TermQueryBuilder("sndv", 1)) - .aggregation(AggregationBuilders.max("test").field("field")) + .aggregation(max("test").field("field")) .aggregation(AggregationBuilders.sum("test2").field("field")); expectedQueryMap = Map.of("sndv", 1L); assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, 0), 0); // Case 7: No query, metric aggregations present, should use star tree - sourceBuilder = new SearchSourceBuilder().size(0).aggregation(AggregationBuilders.max("test").field("field")); + sourceBuilder = new SearchSourceBuilder().size(0).aggregation(max("test").field("field")); assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, null, -1), -1); setStarTreeIndexSetting(null); } - private void setStarTreeIndexSetting(String value) throws IOException { + /** + * Test query parsing for date histogram aggregations, with/without numeric term query + */ + public void testQueryParsingForDateHistogramAggregations() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + setStarTreeIndexSetting("true"); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + CreateIndexRequestBuilder builder = client().admin() + .indices() + .prepareCreate("test") + .setSettings(settings) + .setMapping(DateHistogramAggregatorTests.getExpandedMapping(1, false)); + createIndex("test", builder); + + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexService indexService = indicesService.indexServiceSafe(resolveIndex("test")); + IndexShard indexShard = indexService.getShard(0); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + + MaxAggregationBuilder maxAggNoSub = max("max").field(FIELD_NAME); + MaxAggregationBuilder sumAggNoSub = max("sum").field(FIELD_NAME); + SumAggregationBuilder sumAggSub = sum("sum").field(FIELD_NAME).subAggregation(maxAggNoSub); + MedianAbsoluteDeviationAggregationBuilder medianAgg = medianAbsoluteDeviation("median").field(FIELD_NAME); + + // Case 1: No query or aggregations, should not use star tree + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 2: MatchAllQuery present but no aggregations, should not use star tree + sourceBuilder = new SearchSourceBuilder().query(new MatchAllQueryBuilder()); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 3: MatchAllQuery and non-nested metric aggregations is nested within date-histogram aggregation, should use star tree + DateHistogramAggregationBuilder dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(dateHistogramAggregationBuilder); + CompositeIndexFieldInfo expectedStarTree = new CompositeIndexFieldInfo( + "startree1", + CompositeMappedFieldType.CompositeFieldType.STAR_TREE + ); + Map expectedQueryMap = null; + assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, -1), -1); + + // Case 4: MatchAllQuery and nested-metric aggregations is nested within date-histogram aggregation, should not use star tree + dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(sumAggSub); + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 5: MatchAllQuery and non-startree supported aggregation nested within date-histogram aggregation, should not use star tree + dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(medianAgg); + sourceBuilder = new SearchSourceBuilder().size(0).query(new MatchAllQueryBuilder()).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 6: NumericTermQuery and date-histogram aggregation present, should use star tree + dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0) + .query(new TermQueryBuilder(FIELD_NAME, 1)) + .aggregation(dateHistogramAggregationBuilder); + expectedQueryMap = Map.of(FIELD_NAME, 1L); + assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, -1), -1); + + // Case 7: Date histogram with non calendar interval: rounding is null for DateHistogramFactory - cannot use star-tree + dateHistogramAggregationBuilder = dateHistogram("non_cal").field(TIMESTAMP_FIELD) + .fixedInterval(DateHistogramInterval.DAY) + .subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 8: Date histogram with no metric aggregation - does not use star-tree + dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD).calendarInterval(DateHistogramInterval.DAY); + sourceBuilder = new SearchSourceBuilder().size(0).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 9: Date histogram with no valid time interval to resolve aggregation - should not use star-tree + dateHistogramAggregationBuilder = dateHistogram("by_sec").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.SECOND) + .subAggregation(maxAggNoSub); + sourceBuilder = new SearchSourceBuilder().size(0).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, null, -1); + + // Case 10: Date histogram nested with multiple non-nested metric aggregations - should use star-tree + dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(maxAggNoSub) + .subAggregation(sumAggNoSub); + expectedQueryMap = null; + sourceBuilder = new SearchSourceBuilder().size(0).aggregation(dateHistogramAggregationBuilder); + assertStarTreeContext(request, sourceBuilder, new StarTreeQueryContext(expectedStarTree, expectedQueryMap, -1), -1); + + setStarTreeIndexSetting(null); + } + + /** + * Test query parsing for date histogram aggregations on star-tree index when @timestamp field does not exist + */ + public void testInvalidQueryParsingForDateHistogramAggregations() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + setStarTreeIndexSetting("true"); + + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(StarTreeIndexSettings.IS_COMPOSITE_INDEX_SETTING.getKey(), true) + .build(); + CreateIndexRequestBuilder builder = client().admin() + .indices() + .prepareCreate("test") + .setSettings(settings) + .setMapping(StarTreeFilterTests.getExpandedMapping(1, false)); + createIndex("test", builder); + + IndicesService indicesService = getInstanceFromNode(IndicesService.class); + IndexService indexService = indicesService.indexServiceSafe(resolveIndex("test")); + IndexShard indexShard = indexService.getShard(0); + ShardSearchRequest request = new ShardSearchRequest( + OriginalIndices.NONE, + new SearchRequest().allowPartialSearchResults(true), + indexShard.shardId(), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + + MaxAggregationBuilder maxAggNoSub = max("max").field(FIELD_NAME); + DateHistogramAggregationBuilder dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(maxAggNoSub); + SearchSourceBuilder sourceBuilder = new SearchSourceBuilder().size(0) + .query(new MatchAllQueryBuilder()) + .aggregation(dateHistogramAggregationBuilder); + CompositeIndexFieldInfo expectedStarTree = new CompositeIndexFieldInfo( + "startree1", + CompositeMappedFieldType.CompositeFieldType.STAR_TREE + ); + assertStarTreeContext(request, sourceBuilder, null, -1); + + setStarTreeIndexSetting(null); + } + + private void setStarTreeIndexSetting(String value) { client().admin() .cluster() .prepareUpdateSettings() diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/DateHistogramAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/DateHistogramAggregatorTests.java new file mode 100644 index 0000000000000..a32d00550f24b --- /dev/null +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/DateHistogramAggregatorTests.java @@ -0,0 +1,361 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations.startree; + +import com.carrotsearch.randomizedtesting.RandomizedTest; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene912.Lucene912Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.LongPoint; +import org.apache.lucene.document.SortedNumericDocValuesField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.index.RandomIndexWriter; +import org.opensearch.common.Rounding; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.util.FeatureFlags; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.codec.composite.CompositeIndexFieldInfo; +import org.opensearch.index.codec.composite.CompositeIndexReader; +import org.opensearch.index.codec.composite.composite912.Composite912Codec; +import org.opensearch.index.codec.composite912.datacube.startree.StarTreeDocValuesFormatTests; +import org.opensearch.index.compositeindex.datacube.DateDimension; +import org.opensearch.index.compositeindex.datacube.Dimension; +import org.opensearch.index.compositeindex.datacube.NumericDimension; +import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitAdapter; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramAggregatorTestCase; +import org.opensearch.search.aggregations.bucket.histogram.DateHistogramInterval; +import org.opensearch.search.aggregations.bucket.histogram.InternalDateHistogram; +import org.opensearch.search.aggregations.support.ValuesSourceAggregationBuilder; +import org.junit.After; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.Random; + +import static org.opensearch.index.codec.composite912.datacube.startree.AbstractStarTreeDVFormatTests.topMapping; +import static org.opensearch.search.aggregations.AggregationBuilders.avg; +import static org.opensearch.search.aggregations.AggregationBuilders.count; +import static org.opensearch.search.aggregations.AggregationBuilders.dateHistogram; +import static org.opensearch.search.aggregations.AggregationBuilders.max; +import static org.opensearch.search.aggregations.AggregationBuilders.min; +import static org.opensearch.search.aggregations.AggregationBuilders.sum; +import static org.opensearch.test.InternalAggregationTestCase.DEFAULT_MAX_BUCKETS; + +public class DateHistogramAggregatorTests extends DateHistogramAggregatorTestCase { + private static final String TIMESTAMP_FIELD = "@timestamp"; + private static final MappedFieldType TIMESTAMP_FIELD_TYPE = new DateFieldMapper.DateFieldType(TIMESTAMP_FIELD); + + private static final String FIELD_NAME = "status"; + private static final MappedFieldType NUMBER_FIELD_TYPE = new NumberFieldMapper.NumberFieldType( + FIELD_NAME, + NumberFieldMapper.NumberType.LONG + ); + + @Before + public void setup() { + FeatureFlags.initializeFeatureFlags(Settings.builder().put(FeatureFlags.STAR_TREE_INDEX, true).build()); + } + + @After + public void teardown() throws IOException { + FeatureFlags.initializeFeatureFlags(Settings.EMPTY); + } + + protected Codec getCodec() { + final Logger testLogger = LogManager.getLogger(MetricAggregatorTests.class); + MapperService mapperService; + try { + mapperService = StarTreeDocValuesFormatTests.createMapperService(getExpandedMapping(1, false)); + } catch (IOException e) { + throw new RuntimeException(e); + } + return new Composite912Codec(Lucene912Codec.Mode.BEST_SPEED, mapperService, testLogger); + } + + public void testStarTreeDateHistogram() throws IOException { + Directory directory = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(null); + conf.setCodec(getCodec()); + conf.setMergePolicy(newLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(random(), directory, conf); + + Random random = RandomizedTest.getRandom(); + int totalDocs = 100; + final String STATUS = "status"; + final String SIZE = "size"; + int val; + long date; + + List docs = new ArrayList<>(); + // Index 100 random documents + for (int i = 0; i < totalDocs; i++) { + Document doc = new Document(); + if (random.nextBoolean()) { + val = random.nextInt(10); // Random int between 0 and 9 for status + doc.add(new SortedNumericDocValuesField(STATUS, val)); + } + if (random.nextBoolean()) { + val = random.nextInt(100); // Random int between 0 and 99 for size + doc.add(new SortedNumericDocValuesField(SIZE, val)); + } + date = random.nextInt(180) * 24 * 60 * 60 * 1000L; // Random date within 180 days + doc.add(new SortedNumericDocValuesField(TIMESTAMP_FIELD, date)); + doc.add(new LongPoint(TIMESTAMP_FIELD, date)); + iw.addDocument(doc); + docs.add(doc); + } + + if (randomBoolean()) { + iw.forceMerge(1); + } + iw.close(); + + DirectoryReader ir = DirectoryReader.open(directory); + initValuesSourceRegistry(); + LeafReaderContext context = ir.leaves().get(0); + + SegmentReader reader = Lucene.segmentReader(context.reader()); + IndexSearcher indexSearcher = newSearcher(reader, false, false); + CompositeIndexReader starTreeDocValuesReader = (CompositeIndexReader) reader.getDocValuesReader(); + + List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); + CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); + + ValuesSourceAggregationBuilder[] agggBuilders = { + sum("_name").field(FIELD_NAME), + max("_name").field(FIELD_NAME), + min("_name").field(FIELD_NAME), + count("_name").field(FIELD_NAME), + avg("_name").field(FIELD_NAME) }; + + List supportedDimensions = new LinkedList<>(); + supportedDimensions.add(new NumericDimension(STATUS)); + supportedDimensions.add(new NumericDimension(SIZE)); + supportedDimensions.add( + new DateDimension( + TIMESTAMP_FIELD, + List.of( + new DateTimeUnitAdapter(Rounding.DateTimeUnit.MONTH_OF_YEAR), + new DateTimeUnitAdapter(Rounding.DateTimeUnit.DAY_OF_MONTH) + ), + DateFieldMapper.Resolution.MILLISECONDS + ) + ); + + for (ValuesSourceAggregationBuilder aggregationBuilder : agggBuilders) { + Query query = new MatchAllDocsQuery(); + QueryBuilder queryBuilder = null; + + DateHistogramAggregationBuilder dateHistogramAggregationBuilder = dateHistogram("by_day").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.DAY) + .subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, dateHistogramAggregationBuilder, starTree, supportedDimensions); + + dateHistogramAggregationBuilder = dateHistogram("by_month").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.MONTH) + .subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, dateHistogramAggregationBuilder, starTree, supportedDimensions); + + // year not present in star-tree, but should be able to compute using @timestamp_day dimension + dateHistogramAggregationBuilder = dateHistogram("by_year").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.YEAR) + .subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, dateHistogramAggregationBuilder, starTree, supportedDimensions); + + // Numeric-terms query with date histogram + for (int cases = 0; cases < 100; cases++) { + String queryField; + long queryValue; + if (randomBoolean()) { + queryField = STATUS; + queryValue = random.nextInt(10); + } else { + queryField = SIZE; + queryValue = random.nextInt(20) - 15; + } + dateHistogramAggregationBuilder = dateHistogram("by_month").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.MONTH) + .subAggregation(aggregationBuilder); + query = SortedNumericDocValuesField.newSlowExactQuery(queryField, queryValue); + queryBuilder = new TermQueryBuilder(queryField, queryValue); + testCase(indexSearcher, query, queryBuilder, dateHistogramAggregationBuilder, starTree, supportedDimensions); + + // year not present in star-tree, but should be able to compute using @timestamp_day dimension + dateHistogramAggregationBuilder = dateHistogram("by_year").field(TIMESTAMP_FIELD) + .calendarInterval(DateHistogramInterval.YEAR) + .subAggregation(aggregationBuilder); + testCase(indexSearcher, query, queryBuilder, dateHistogramAggregationBuilder, starTree, supportedDimensions); + } + } + ir.close(); + directory.close(); + } + + private void testCase( + IndexSearcher indexSearcher, + Query query, + QueryBuilder queryBuilder, + DateHistogramAggregationBuilder dateHistogramAggregationBuilder, + CompositeIndexFieldInfo starTree, + List supportedDimensions + ) throws IOException { + InternalDateHistogram starTreeAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + dateHistogramAggregationBuilder, + starTree, + supportedDimensions, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + true, + TIMESTAMP_FIELD_TYPE, + NUMBER_FIELD_TYPE + ); + + InternalDateHistogram defaultAggregation = searchAndReduceStarTree( + createIndexSettings(), + indexSearcher, + query, + queryBuilder, + dateHistogramAggregationBuilder, + null, + null, + null, + DEFAULT_MAX_BUCKETS, + false, + null, + false, + TIMESTAMP_FIELD_TYPE, + NUMBER_FIELD_TYPE + ); + + assertEquals(defaultAggregation.getBuckets().size(), starTreeAggregation.getBuckets().size()); + assertEquals(defaultAggregation.getBuckets(), starTreeAggregation.getBuckets()); + } + + public static XContentBuilder getExpandedMapping(int maxLeafDocs, boolean skipStarNodeCreationForStatusDimension) throws IOException { + return topMapping(b -> { + b.startObject("composite"); + b.startObject("startree1"); // Use the same name as the provided mapping + b.field("type", "star_tree"); + b.startObject("config"); + b.field("max_leaf_docs", maxLeafDocs); + if (skipStarNodeCreationForStatusDimension) { + b.startArray("skip_star_node_creation_for_dimensions"); + b.value("status"); // Skip for "status" dimension + b.endArray(); + } + b.startArray("ordered_dimensions"); + b.startObject(); + b.field("name", "status"); + b.endObject(); + b.startObject(); + b.field("name", "size"); + b.endObject(); + b.startObject(); + b.field("name", TIMESTAMP_FIELD); + b.startArray("calendar_intervals"); + b.value("month"); + b.value("day"); + b.endArray(); + b.endObject(); + b.endArray(); + b.startArray("metrics"); + b.startObject(); + b.field("name", "size"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.startObject(); + b.field("name", "status"); + b.startArray("stats"); + b.value("sum"); + b.value("value_count"); + b.value("min"); + b.value("max"); + b.endArray(); + b.endObject(); + b.endArray(); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("properties"); + b.startObject("@timestamp"); + b.field("type", "date"); + b.field("format", "strict_date_optional_time||epoch_second"); + b.endObject(); + b.startObject("message"); + b.field("type", "keyword"); + b.field("index", false); + b.field("doc_values", false); + b.endObject(); + b.startObject("clientip"); + b.field("type", "ip"); + b.endObject(); + b.startObject("request"); + b.field("type", "text"); + b.startObject("fields"); + b.startObject("raw"); + b.field("type", "keyword"); + b.field("ignore_above", 256); + b.endObject(); + b.endObject(); + b.endObject(); + b.startObject("status"); + b.field("type", "integer"); + b.endObject(); + b.startObject("size"); + b.field("type", "integer"); + b.endObject(); + b.startObject("geoip"); + b.startObject("properties"); + b.startObject("country_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("city_name"); + b.field("type", "keyword"); + b.endObject(); + b.startObject("location"); + b.field("type", "geo_point"); + b.endObject(); + b.endObject(); + b.endObject(); + b.endObject(); + }); + } +} diff --git a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java index c1cb19b9576e4..02ce70b234ab9 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/startree/StarTreeFilterTests.java @@ -46,6 +46,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import static org.opensearch.index.codec.composite912.datacube.startree.AbstractStarTreeDVFormatTests.topMapping; @@ -229,7 +230,7 @@ private long getDocCountFromStarTree(CompositeIndexReader starTreeDocValuesReade List compositeIndexFields = starTreeDocValuesReader.getCompositeIndexFields(); CompositeIndexFieldInfo starTree = compositeIndexFields.get(0); StarTreeValues starTreeValues = StarTreeQueryHelper.getStarTreeValues(context, starTree); - FixedBitSet filteredValues = StarTreeFilter.getStarTreeResult(starTreeValues, filters); + FixedBitSet filteredValues = StarTreeFilter.getStarTreeResult(starTreeValues, filters, Set.of()); SortedNumericStarTreeValuesIterator valuesIterator = (SortedNumericStarTreeValuesIterator) starTreeValues.getMetricValuesIterator( StarTreeUtils.fullyQualifiedFieldNameForStarTreeMetricsDocValues( diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 27142b298db52..f3105eae20f79 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -411,7 +411,8 @@ protected SearchContext createSearchContextWithStarTreeContext( // Mock SearchContextAggregations SearchContextAggregations searchContextAggregations = mock(SearchContextAggregations.class); AggregatorFactories aggregatorFactories = mock(AggregatorFactories.class); - when(searchContext.aggregations()).thenReturn(searchContextAggregations); + when(searchContext.aggregations()).thenReturn(new SearchContextAggregations(AggregatorFactories.EMPTY, bucketConsumer)); + when(searchContextAggregations.factories()).thenReturn(aggregatorFactories); if (aggregatorFactory != null) {