Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add histogram field type support to boxplot aggs #52265

Merged
merged 2 commits into from
Feb 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/reference/aggregations/metrics.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ include::metrics/valuecount-aggregation.asciidoc[]

include::metrics/median-absolute-deviation-aggregation.asciidoc[]

include::metrics/boxplot-aggregation.asciidoc[]



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
=== Boxplot Aggregation

A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
These values can be generated by a provided script or extracted from specific numeric or
<<histogram,histogram fields>> in the documents.

The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
median, first quartile (25th percentile) and third quartile (75th percentile) values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ GET latency/_search

<1> Compression controls memory usage and approximation error

// tag::[t-digest]
// tag::t-digest[]
The TDigest algorithm uses a number of "nodes" to approximate percentiles -- the
more nodes available, the higher the accuracy (and large memory footprint) proportional
to the volume of data. The `compression` parameter limits the maximum number of
Expand All @@ -301,7 +301,7 @@ A "node" uses roughly 32 bytes of memory, so under worst-case scenarios (large a
of data which arrives sorted and in-order) the default settings will produce a
TDigest roughly 64KB in size. In practice data tends to be more random and
the TDigest will use less memory.
// tag::[t-digest]
// end::t-digest[]

==== HDR Histogram

Expand Down
1 change: 1 addition & 0 deletions docs/reference/mapping/types/histogram.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ following aggregations and queries:

* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
* <<query-dsl-exists-query,exists>> query

[[mapping-types-histogram-building-histogram]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@

import static org.elasticsearch.search.aggregations.metrics.PercentilesMethod.COMPRESSION_FIELD;

public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
BoxplotAggregationBuilder> {
public static final String NAME = "boxplot";

private static final ObjectParser<BoxplotAggregationBuilder, Void> PARSER;

static {
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
}

Expand Down Expand Up @@ -98,7 +98,7 @@ public double compression() {

@Override
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
ValuesSourceConfig<ValuesSource.Numeric> config,
ValuesSourceConfig<ValuesSource> config,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
Expand All @@ -29,12 +31,12 @@

public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {

private final ValuesSource.Numeric valuesSource;
private final ValuesSource valuesSource;
private final DocValueFormat format;
protected ObjectArray<TDigestState> states;
protected final double compression;

BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, context, parent, pipelineAggregators, metaData);
Expand All @@ -58,23 +60,38 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
return LeafBucketCollector.NO_OP_COLLECTOR;
}
final BigArrays bigArrays = context.bigArrays();
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
states = bigArrays.grow(states, bucket + 1);

if (values.advanceExact(doc)) {
if (valuesSource instanceof ValuesSource.Histogram) {
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) {
final int valueCount = values.docValueCount();
for (int i = 0; i < valueCount; i++) {
state.add(values.nextValue());
final HistogramValue sketch = values.histogram();
while(sketch.next()) {
state.add(sketch.value(), sketch.count());
}
}
}
}
};
};
} else {
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
states = bigArrays.grow(states, bucket + 1);
if (values.advanceExact(doc)) {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) {
final int valueCount = values.docValueCount();
for (int i = 0; i < valueCount; i++) {
state.add(values.nextValue());
}
}
}
}
};
}
}

private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import java.util.List;
import java.util.Map;

public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {

private final double compression;

BoxplotAggregatorFactory(String name,
ValuesSourceConfig<ValuesSource.Numeric> config,
ValuesSourceConfig<ValuesSource> config,
double compression,
QueryShardContext queryShardContext,
AggregatorFactory parent,
Expand All @@ -46,7 +46,7 @@ protected Aggregator createUnmapped(SearchContext searchContext,
}

@Override
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
protected Aggregator doCreateInternal(ValuesSource valuesSource,
SearchContext searchContext,
Aggregator parent,
boolean collectsFromSingleBucket,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;

import java.util.ArrayList;
Expand Down Expand Up @@ -131,8 +133,7 @@ public void testHDRHistogram() throws Exception {
}
}

public void testTDigestHistogram() throws Exception {

private void setupTDigestHistogram(int compression) throws Exception {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("_doc")
Expand Down Expand Up @@ -170,8 +171,6 @@ public void testTDigestHistogram() throws Exception {
PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2);
client().admin().indices().putMapping(request2).actionGet();


int compression = TestUtil.nextInt(random(), 200, 300);
TDigestState histogram = new TDigestState(compression);
BulkRequest bulkRequest = new BulkRequest();

Expand Down Expand Up @@ -218,6 +217,11 @@ public void testTDigestHistogram() throws Exception {

response = client().prepareSearch("pre_agg").get();
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
}

public void testTDigestHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);

PercentilesAggregationBuilder builder =
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
Expand All @@ -236,6 +240,31 @@ public void testTDigestHistogram() throws Exception {
}
}

public void testBoxplotHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);

SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();

Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);

assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
}

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());
Expand Down