Skip to content

Commit

Permalink
Add Sampler & Diversified Sampler aggs to AggConfigs (#120135)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dosant committed Dec 8, 2021
1 parent 6fb7325 commit 2e57f00
Show file tree
Hide file tree
Showing 27 changed files with 798 additions and 38 deletions.
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/agg_types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ export const getAggTypes = () => ({
{ name: BUCKET_TYPES.SIGNIFICANT_TERMS, fn: buckets.getSignificantTermsBucketAgg },
{ name: BUCKET_TYPES.GEOHASH_GRID, fn: buckets.getGeoHashBucketAgg },
{ name: BUCKET_TYPES.GEOTILE_GRID, fn: buckets.getGeoTitleBucketAgg },
{ name: BUCKET_TYPES.SAMPLER, fn: buckets.getSamplerBucketAgg },
{ name: BUCKET_TYPES.DIVERSIFIED_SAMPLER, fn: buckets.getDiversifiedSamplerBucketAgg },
],
});

Expand All @@ -79,6 +81,8 @@ export const getAggTypesFunctions = () => [
buckets.aggDateHistogram,
buckets.aggTerms,
buckets.aggMultiTerms,
buckets.aggSampler,
buckets.aggDiversifiedSampler,
metrics.aggAvg,
metrics.aggBucketAvg,
metrics.aggBucketMax,
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/aggs_service.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ describe('Aggs service', () => {
"significant_terms",
"geohash_grid",
"geotile_grid",
"sampler",
"diversified_sampler",
"foo",
]
`);
Expand Down Expand Up @@ -122,6 +124,8 @@ describe('Aggs service', () => {
"significant_terms",
"geohash_grid",
"geotile_grid",
"sampler",
"diversified_sampler",
]
`);
expect(bStart.types.getAll().metrics.map((t) => t(aggTypesDependencies).name))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ export enum BUCKET_TYPES {
GEOHASH_GRID = 'geohash_grid',
GEOTILE_GRID = 'geotile_grid',
DATE_HISTOGRAM = 'date_histogram',
SAMPLER = 'sampler',
DIVERSIFIED_SAMPLER = 'diversified_sampler',
}
62 changes: 62 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/diversified_sampler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { BucketAggType } from './bucket_agg_type';
import { BaseAggParams } from '../types';
import { aggDiversifiedSamplerFnName } from './diversified_sampler_fn';

export const DIVERSIFIED_SAMPLER_AGG_NAME = 'diversified_sampler';

const title = i18n.translate('data.search.aggs.buckets.diversifiedSamplerTitle', {
defaultMessage: 'Diversified sampler',
description: 'Diversified sampler aggregation title',
});

export interface AggParamsDiversifiedSampler extends BaseAggParams {
/**
* Is used to provide values used for de-duplication
*/
field: string;

/**
* Limits how many top-scoring documents are collected in the sample processed on each shard.
*/
shard_size?: number;

/**
* Limits how many documents are permitted per choice of de-duplicating value
*/
max_docs_per_value?: number;
}

/**
* Like the sampler aggregation this is a filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents.
* The diversified_sampler aggregation adds the ability to limit the number of matches that share a common value.
*/
export const getDiversifiedSamplerBucketAgg = () =>
new BucketAggType({
name: DIVERSIFIED_SAMPLER_AGG_NAME,
title,
customLabels: false,
expressionName: aggDiversifiedSamplerFnName,
params: [
{
name: 'shard_size',
type: 'number',
},
{
name: 'max_docs_per_value',
type: 'number',
},
{
name: 'field',
type: 'field',
},
],
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { functionWrapper } from '../test_helpers';
import { aggDiversifiedSampler } from './diversified_sampler_fn';

describe('aggDiversifiedSampler', () => {
const fn = functionWrapper(aggDiversifiedSampler());

test('fills in defaults when only required args are provided', () => {
const actual = fn({ id: 'sampler', schema: 'bucket', field: 'author' });
expect(actual).toMatchInlineSnapshot(`
Object {
"type": "agg_type",
"value": Object {
"enabled": true,
"id": "sampler",
"params": Object {
"field": "author",
"max_docs_per_value": undefined,
"shard_size": undefined,
},
"schema": "bucket",
"type": "diversified_sampler",
},
}
`);
});

test('includes optional params when they are provided', () => {
const actual = fn({
id: 'sampler',
schema: 'bucket',
shard_size: 300,
field: 'author',
max_docs_per_value: 3,
});

expect(actual.value).toMatchInlineSnapshot(`
Object {
"enabled": true,
"id": "sampler",
"params": Object {
"field": "author",
"max_docs_per_value": 3,
"shard_size": 300,
},
"schema": "bucket",
"type": "diversified_sampler",
}
`);
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { ExpressionFunctionDefinition } from 'src/plugins/expressions/common';
import { AggExpressionFunctionArgs, AggExpressionType, BUCKET_TYPES } from '../';
import { DIVERSIFIED_SAMPLER_AGG_NAME } from './diversified_sampler';

export const aggDiversifiedSamplerFnName = 'aggDiversifiedSampler';

type Input = any;
type Arguments = AggExpressionFunctionArgs<typeof BUCKET_TYPES.DIVERSIFIED_SAMPLER>;

type Output = AggExpressionType;
type FunctionDefinition = ExpressionFunctionDefinition<
typeof aggDiversifiedSamplerFnName,
Input,
Arguments,
Output
>;

export const aggDiversifiedSampler = (): FunctionDefinition => ({
name: aggDiversifiedSamplerFnName,
help: i18n.translate('data.search.aggs.function.buckets.diversifiedSampler.help', {
defaultMessage: 'Generates a serialized agg config for a Diversified sampler agg',
}),
type: 'agg_type',
args: {
id: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.id.help', {
defaultMessage: 'ID for this aggregation',
}),
},
enabled: {
types: ['boolean'],
default: true,
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.enabled.help', {
defaultMessage: 'Specifies whether this aggregation should be enabled',
}),
},
schema: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.schema.help', {
defaultMessage: 'Schema to use for this aggregation',
}),
},
shard_size: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.shardSize.help', {
defaultMessage:
'The shard_size parameter limits how many top-scoring documents are collected in the sample processed on each shard.',
}),
},
max_docs_per_value: {
types: ['number'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.maxDocsPerValue.help', {
defaultMessage:
'Limits how many documents are permitted per choice of de-duplicating value.',
}),
},
field: {
types: ['string'],
help: i18n.translate('data.search.aggs.buckets.diversifiedSampler.field.help', {
defaultMessage: 'Used to provide values used for de-duplication.',
}),
},
},
fn: (input, args) => {
const { id, enabled, schema, ...rest } = args;

return {
type: 'agg_type',
value: {
id,
enabled,
schema,
type: DIVERSIFIED_SAMPLER_AGG_NAME,
params: {
...rest,
},
},
};
},
});
4 changes: 4 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,7 @@ export * from './terms_fn';
export * from './terms';
export * from './multi_terms_fn';
export * from './multi_terms';
export * from './sampler_fn';
export * from './sampler';
export * from './diversified_sampler_fn';
export * from './diversified_sampler';
43 changes: 43 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/sampler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { i18n } from '@kbn/i18n';
import { BucketAggType } from './bucket_agg_type';
import { BaseAggParams } from '../types';
import { aggSamplerFnName } from './sampler_fn';

export const SAMPLER_AGG_NAME = 'sampler';

const title = i18n.translate('data.search.aggs.buckets.samplerTitle', {
defaultMessage: 'Sampler',
description: 'Sampler aggregation title',
});

export interface AggParamsSampler extends BaseAggParams {
/**
* Limits how many top-scoring documents are collected in the sample processed on each shard.
*/
shard_size?: number;
}

/**
* A filtering aggregation used to limit any sub aggregations' processing to a sample of the top-scoring documents.
*/
export const getSamplerBucketAgg = () =>
new BucketAggType({
name: SAMPLER_AGG_NAME,
title,
customLabels: false,
expressionName: aggSamplerFnName,
params: [
{
name: 'shard_size',
type: 'number',
},
],
});
52 changes: 52 additions & 0 deletions src/plugins/data/common/search/aggs/buckets/sampler_fn.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

import { functionWrapper } from '../test_helpers';
import { aggSampler } from './sampler_fn';

describe('aggSampler', () => {
const fn = functionWrapper(aggSampler());

test('fills in defaults when only required args are provided', () => {
const actual = fn({ id: 'sampler', schema: 'bucket' });
expect(actual).toMatchInlineSnapshot(`
Object {
"type": "agg_type",
"value": Object {
"enabled": true,
"id": "sampler",
"params": Object {
"shard_size": undefined,
},
"schema": "bucket",
"type": "sampler",
},
}
`);
});

test('includes optional params when they are provided', () => {
const actual = fn({
id: 'sampler',
schema: 'bucket',
shard_size: 300,
});

expect(actual.value).toMatchInlineSnapshot(`
Object {
"enabled": true,
"id": "sampler",
"params": Object {
"shard_size": 300,
},
"schema": "bucket",
"type": "sampler",
}
`);
});
});
Loading

0 comments on commit 2e57f00

Please sign in to comment.