From 98e5e92ec8e9cc82c47d1719628106a5874d067f Mon Sep 17 00:00:00 2001 From: Igor Panychek Date: Mon, 16 Dec 2019 19:24:07 +0400 Subject: [PATCH] Add the Diversified Sampler aggregation (#1735) --- CHANGELOG.md | 1 + .../Aggregation/DiversifiedSampler.php | 42 ++++++ lib/Elastica/QueryBuilder/DSL/Aggregation.php | 11 ++ .../Aggregation/DiversifiedSamplerTest.php | 120 ++++++++++++++++++ .../QueryBuilder/DSL/AggregationTest.php | 1 + 5 files changed, 175 insertions(+) create mode 100755 lib/Elastica/Aggregation/DiversifiedSampler.php create mode 100755 test/Elastica/Aggregation/DiversifiedSamplerTest.php diff --git a/CHANGELOG.md b/CHANGELOG.md index abc34a8dac..e33ea24e69 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ All notable changes to this project will be documented in this file based on the * Fixed handling of Search::OPTION_SEARCH_IGNORE_UNAVAILABLE inside Scroll object ### Added +Added `DiversifiedSampler` aggregation [#1735](https://github.com/ruflin/Elastica/pull/1735) * Added `\Elastica\Query\DistanceFeature` [#1730](https://github.com/ruflin/Elastica/pull/1730) diff --git a/lib/Elastica/Aggregation/DiversifiedSampler.php b/lib/Elastica/Aggregation/DiversifiedSampler.php new file mode 100755 index 0000000000..da585c356f --- /dev/null +++ b/lib/Elastica/Aggregation/DiversifiedSampler.php @@ -0,0 +1,42 @@ +setParam('shard_size', $shardSize); + } + + /** + * Set the maximum number of documents to be returned per value. + * + * @return $this + */ + public function setMaxDocsPerValue(int $max): self + { + return $this->setParam('max_docs_per_value', $max); + } + + /** + * Instruct Elasticsearch to use direct field data or ordinals/hashes of the field values to execute this aggregation. + * The execution hint will be ignored if it is not applicable. + * + * @return $this + */ + public function setExecutionHint(string $hint): self + { + return $this->setParam('execution_hint', $hint); + } +} diff --git a/lib/Elastica/QueryBuilder/DSL/Aggregation.php b/lib/Elastica/QueryBuilder/DSL/Aggregation.php index aa48473cf2..74d8c86bcd 100644 --- a/lib/Elastica/QueryBuilder/DSL/Aggregation.php +++ b/lib/Elastica/QueryBuilder/DSL/Aggregation.php @@ -9,6 +9,7 @@ use Elastica\Aggregation\Cardinality; use Elastica\Aggregation\DateHistogram; use Elastica\Aggregation\DateRange; +use Elastica\Aggregation\DiversifiedSampler; use Elastica\Aggregation\ExtendedStats; use Elastica\Aggregation\Filter; use Elastica\Aggregation\Filters; @@ -441,4 +442,14 @@ public function sampler($name): Sampler { return new Sampler($name); } + + /** + * diversified sampler aggregation. + * + * @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-diversified-sampler-aggregation.html + */ + public function diversified_sampler(string $name): DiversifiedSampler + { + return new DiversifiedSampler($name); + } } diff --git a/test/Elastica/Aggregation/DiversifiedSamplerTest.php b/test/Elastica/Aggregation/DiversifiedSamplerTest.php new file mode 100755 index 0000000000..a0c9f34674 --- /dev/null +++ b/test/Elastica/Aggregation/DiversifiedSamplerTest.php @@ -0,0 +1,120 @@ +_createIndex(null, true, 2); + + $mapping = new Mapping([ + 'price' => ['type' => 'integer'], + 'color' => ['type' => 'keyword'], + ]); + $index->setMapping($mapping); + + $routing1 = 'first_routing'; + $routing2 = 'second_routing'; + + $index->addDocuments([ + (new Document(1, ['price' => 5, 'color' => 'blue']))->setRouting($routing1), + (new Document(2, ['price' => 8, 'color' => 'blue']))->setRouting($routing1), + (new Document(3, ['price' => 1, 'color' => 'blue']))->setRouting($routing1), + (new Document(4, ['price' => 3, 'color' => 'red']))->setRouting($routing1), + (new Document(5, ['price' => 1.5, 'color' => 'red']))->setRouting($routing1), + (new Document(6, ['price' => 2, 'color' => 'green']))->setRouting($routing1), + (new Document(7, ['price' => 5, 'color' => 'blue']))->setRouting($routing2), + (new Document(8, ['price' => 8, 'color' => 'blue']))->setRouting($routing2), + (new Document(9, ['price' => 1, 'color' => 'red']))->setRouting($routing2), + (new Document(10, ['price' => 3, 'color' => 'red']))->setRouting($routing2), + ]); + + $index->refresh(); + + return $index; + } + + /** + * @group unit + */ + public function testToArray() + { + $expected = [ + 'diversified_sampler' => [ + 'field' => 'color', + 'shard_size' => 1, + 'max_docs_per_value' => 2, + 'execution_hint' => 'map', + ], + 'aggs' => [ + 'price_sum' => [ + 'sum' => [ + 'field' => 'price', + ], + ], + ], + ]; + + $agg = new DiversifiedSampler('price_diversified_sampler'); + $agg->setField('color'); + + $agg->setShardSize(1); + $agg->setMaxDocsPerValue(2); + $agg->setExecutionHint('map'); + + $childAgg = new Sum('price_sum'); + $childAgg->setField('price'); + + $agg->addAggregation($childAgg); + + $this->assertEquals($expected, $agg->toArray()); + } + + /** + * @dataProvider shardSizeAndMaxDocPerValueProvider + * @group functional + */ + public function testSamplerAggregation(int $shardSize, int $maxDocPerValue, int $docCount) + { + $agg = new DiversifiedSampler('price_diversified_sampler'); + $agg->setField('color'); + + $agg->setShardSize($shardSize); + $agg->setMaxDocsPerValue($maxDocPerValue); + + $childAgg = new Sum('price_sum'); + $childAgg->setField('price'); + + $agg->addAggregation($childAgg); + + $query = new Query(); + $query->addAggregation($agg); + $results = $this->_getIndexForTest()->search($query)->getAggregation('price_diversified_sampler'); + + $this->assertEquals($docCount, $results['doc_count']); + } + + public function shardSizeAndMaxDocPerValueProvider() + { + return [ + [1, 1, 2], + [2, 1, 4], + [3, 1, 5], + [4, 1, 5], + [1, 2, 2], + [2, 2, 4], + [3, 2, 6], + [4, 2, 8], + [5, 2, 9], + [6, 2, 9], + ]; + } +} diff --git a/test/Elastica/QueryBuilder/DSL/AggregationTest.php b/test/Elastica/QueryBuilder/DSL/AggregationTest.php index 98415a2b63..c69e4791b5 100644 --- a/test/Elastica/QueryBuilder/DSL/AggregationTest.php +++ b/test/Elastica/QueryBuilder/DSL/AggregationTest.php @@ -58,6 +58,7 @@ public function testInterface() $this->_assertImplemented($aggregationDSL, 'sum_bucket', Aggregation\SumBucket::class, ['name']); $this->_assertImplemented($aggregationDSL, 'adjacency_matrix', Aggregation\AdjacencyMatrix::class, ['name']); $this->_assertImplemented($aggregationDSL, 'sampler', Aggregation\Sampler::class, ['name']); + $this->_assertImplemented($aggregationDSL, 'diversified_sampler', Aggregation\DiversifiedSampler::class, ['name']); $this->_assertNotImplemented($aggregationDSL, 'children', ['name']); $this->_assertNotImplemented($aggregationDSL, 'geo_bounds', ['name']);