Skip to content

Commit

Permalink
Add the Diversified Sampler aggregation (#1735)
Browse files Browse the repository at this point in the history
  • Loading branch information
panychek authored and ruflin committed Dec 16, 2019
1 parent 5428351 commit 98e5e92
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file based on the
* Fixed handling of Search::OPTION_SEARCH_IGNORE_UNAVAILABLE inside Scroll object

### Added
Added `DiversifiedSampler` aggregation [#1735](https://github.com/ruflin/Elastica/pull/1735)

* Added `\Elastica\Query\DistanceFeature` [#1730](https://github.com/ruflin/Elastica/pull/1730)

Expand Down
42 changes: 42 additions & 0 deletions lib/Elastica/Aggregation/DiversifiedSampler.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<?php

namespace Elastica\Aggregation;

/**
* Class DiversifiedSampler.
*
* @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html
*/
class DiversifiedSampler extends AbstractSimpleAggregation
{
/**
* Set the number of top-scoring documents to be returned from each shard.
*
* @return $this
*/
public function setShardSize(int $shardSize): self
{
return $this->setParam('shard_size', $shardSize);
}

/**
* Set the maximum number of documents to be returned per value.
*
* @return $this
*/
public function setMaxDocsPerValue(int $max): self
{
return $this->setParam('max_docs_per_value', $max);
}

/**
* Instruct Elasticsearch to use direct field data or ordinals/hashes of the field values to execute this aggregation.
* The execution hint will be ignored if it is not applicable.
*
* @return $this
*/
public function setExecutionHint(string $hint): self
{
return $this->setParam('execution_hint', $hint);
}
}
11 changes: 11 additions & 0 deletions lib/Elastica/QueryBuilder/DSL/Aggregation.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
use Elastica\Aggregation\Cardinality;
use Elastica\Aggregation\DateHistogram;
use Elastica\Aggregation\DateRange;
use Elastica\Aggregation\DiversifiedSampler;
use Elastica\Aggregation\ExtendedStats;
use Elastica\Aggregation\Filter;
use Elastica\Aggregation\Filters;
Expand Down Expand Up @@ -441,4 +442,14 @@ public function sampler($name): Sampler
{
return new Sampler($name);
}

/**
* diversified sampler aggregation.
*
* @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html
*/
public function diversified_sampler(string $name): DiversifiedSampler
{
return new DiversifiedSampler($name);
}
}
120 changes: 120 additions & 0 deletions test/Elastica/Aggregation/DiversifiedSamplerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
<?php

namespace Elastica\Test\Aggregation;

use Elastica\Aggregation\DiversifiedSampler;
use Elastica\Aggregation\Sum;
use Elastica\Document;
use Elastica\Index;
use Elastica\Mapping;
use Elastica\Query;

class DiversifiedSamplerTest extends BaseAggregationTest
{
protected function _getIndexForTest(): Index
{
$index = $this->_createIndex(null, true, 2);

$mapping = new Mapping([
'price' => ['type' => 'integer'],
'color' => ['type' => 'keyword'],
]);
$index->setMapping($mapping);

$routing1 = 'first_routing';
$routing2 = 'second_routing';

$index->addDocuments([
(new Document(1, ['price' => 5, 'color' => 'blue']))->setRouting($routing1),
(new Document(2, ['price' => 8, 'color' => 'blue']))->setRouting($routing1),
(new Document(3, ['price' => 1, 'color' => 'blue']))->setRouting($routing1),
(new Document(4, ['price' => 3, 'color' => 'red']))->setRouting($routing1),
(new Document(5, ['price' => 1.5, 'color' => 'red']))->setRouting($routing1),
(new Document(6, ['price' => 2, 'color' => 'green']))->setRouting($routing1),
(new Document(7, ['price' => 5, 'color' => 'blue']))->setRouting($routing2),
(new Document(8, ['price' => 8, 'color' => 'blue']))->setRouting($routing2),
(new Document(9, ['price' => 1, 'color' => 'red']))->setRouting($routing2),
(new Document(10, ['price' => 3, 'color' => 'red']))->setRouting($routing2),
]);

$index->refresh();

return $index;
}

/**
* @group unit
*/
public function testToArray()
{
$expected = [
'diversified_sampler' => [
'field' => 'color',
'shard_size' => 1,
'max_docs_per_value' => 2,
'execution_hint' => 'map',
],
'aggs' => [
'price_sum' => [
'sum' => [
'field' => 'price',
],
],
],
];

$agg = new DiversifiedSampler('price_diversified_sampler');
$agg->setField('color');

$agg->setShardSize(1);
$agg->setMaxDocsPerValue(2);
$agg->setExecutionHint('map');

$childAgg = new Sum('price_sum');
$childAgg->setField('price');

$agg->addAggregation($childAgg);

$this->assertEquals($expected, $agg->toArray());
}

/**
* @dataProvider shardSizeAndMaxDocPerValueProvider
* @group functional
*/
public function testSamplerAggregation(int $shardSize, int $maxDocPerValue, int $docCount)
{
$agg = new DiversifiedSampler('price_diversified_sampler');
$agg->setField('color');

$agg->setShardSize($shardSize);
$agg->setMaxDocsPerValue($maxDocPerValue);

$childAgg = new Sum('price_sum');
$childAgg->setField('price');

$agg->addAggregation($childAgg);

$query = new Query();
$query->addAggregation($agg);
$results = $this->_getIndexForTest()->search($query)->getAggregation('price_diversified_sampler');

$this->assertEquals($docCount, $results['doc_count']);
}

public function shardSizeAndMaxDocPerValueProvider()
{
return [
[1, 1, 2],
[2, 1, 4],
[3, 1, 5],
[4, 1, 5],
[1, 2, 2],
[2, 2, 4],
[3, 2, 6],
[4, 2, 8],
[5, 2, 9],
[6, 2, 9],
];
}
}
1 change: 1 addition & 0 deletions test/Elastica/QueryBuilder/DSL/AggregationTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ public function testInterface()
$this->_assertImplemented($aggregationDSL, 'sum_bucket', Aggregation\SumBucket::class, ['name']);
$this->_assertImplemented($aggregationDSL, 'adjacency_matrix', Aggregation\AdjacencyMatrix::class, ['name']);
$this->_assertImplemented($aggregationDSL, 'sampler', Aggregation\Sampler::class, ['name']);
$this->_assertImplemented($aggregationDSL, 'diversified_sampler', Aggregation\DiversifiedSampler::class, ['name']);

$this->_assertNotImplemented($aggregationDSL, 'children', ['name']);
$this->_assertNotImplemented($aggregationDSL, 'geo_bounds', ['name']);
Expand Down

0 comments on commit 98e5e92

Please sign in to comment.