Skip to content

Commit

Permalink
Add Presto Aggregate function: geometric_mean(real) -> real (#6993)
Browse files Browse the repository at this point in the history
Summary:
Fixes #6992

Pull Request resolved: #6993

Reviewed By: xiaoxmeng

Differential Revision: D50297714

Pulled By: mbasmanova

fbshipit-source-id: 7c38b2e3a45d9601e480db0ff8c398b53757b1c3
  • Loading branch information
xumingming authored and facebook-github-bot committed Oct 16, 2023
1 parent 94917a5 commit 9314277
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 47 deletions.
6 changes: 3 additions & 3 deletions velox/docs/functions/presto/aggregate.rst
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ General Aggregate Functions
each input value occurs. Supports integral, floating-point,
boolean, timestamp, and date input types.

.. function:: geometric_mean(x) -> double
.. function:: geometric_mean(bigint) -> double
geometric_mean(double) -> double
geometric_mean(real) -> real

Returns the `geometric mean <https://en.wikipedia.org/wiki/Geometric_mean>`_ of all input values.

Supported types are BIGINT and DOUBLE.

.. function:: max_by(x, y) -> [same as x]

Returns the value of ``x`` associated with the maximum value of ``y`` over all input values.
Expand Down
65 changes: 32 additions & 33 deletions velox/functions/prestosql/aggregates/GeometricMeanAggregate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@ namespace facebook::velox::aggregate::prestosql {

namespace {

template <typename T>
template <typename TInput, typename TResult>
class GeometricMeanAggregate {
public:
using InputType = Row<T>;
using InputType = Row<TInput>;

using IntermediateType =
Row</*logSum*/ double,
/*count*/ int64_t>;

using OutputType = double;
using OutputType = TResult;

static bool toIntermediate(
exec::out_type<Row<double, int64_t>>& out,
exec::arg_type<T> in) {
exec::arg_type<TInput> in) {
out.copy_from(std::make_tuple(std::log(in), 1));
return true;
}
Expand All @@ -52,7 +52,9 @@ class GeometricMeanAggregate {

explicit AccumulatorType(HashStringAllocator* /*allocator*/) {}

void addInput(HashStringAllocator* /*allocator*/, exec::arg_type<T> data) {
void addInput(
HashStringAllocator* /*allocator*/,
exec::arg_type<TInput> data) {
logSum_ += std::log(data);
count_ = checkedPlus<int64_t>(count_, 1);
}
Expand Down Expand Up @@ -93,6 +95,13 @@ void registerGeometricMeanAggregate(const std::string& prefix) {
.build());
}

// Register for real input type.
signatures.push_back(exec::AggregateFunctionSignatureBuilder()
.returnType("real")
.intermediateType("row(double,bigint)")
.argumentType("real")
.build());

exec::registerAggregateFunction(
name,
std::move(signatures),
Expand All @@ -104,34 +113,24 @@ void registerGeometricMeanAggregate(const std::string& prefix) {
-> std::unique_ptr<exec::Aggregate> {
VELOX_USER_CHECK_EQ(argTypes.size(), 1, "{} takes one argument", name);
auto inputType = argTypes[0];
if (exec::isRawInput(step)) {
switch (inputType->kind()) {
case TypeKind::BIGINT:
return std::make_unique<
SimpleAggregateAdapter<GeometricMeanAggregate<int64_t>>>(
resultType);
case TypeKind::DOUBLE:
return std::make_unique<
SimpleAggregateAdapter<GeometricMeanAggregate<double>>>(
resultType);
default:
VELOX_USER_FAIL(
"Unknown input type for {} aggregation {}",
name,
inputType->toString());
}
} else {
switch (resultType->kind()) {
case TypeKind::DOUBLE:
case TypeKind::ROW:
return std::make_unique<
SimpleAggregateAdapter<GeometricMeanAggregate<double>>>(
resultType);
default:
VELOX_USER_FAIL(
"Unsupported result type for final aggregation: {}",
resultType->toString());
}

switch (inputType->kind()) {
case TypeKind::BIGINT:
return std::make_unique<SimpleAggregateAdapter<
GeometricMeanAggregate<int64_t, double>>>(resultType);
case TypeKind::DOUBLE:
return std::make_unique<
SimpleAggregateAdapter<GeometricMeanAggregate<double, double>>>(
resultType);
case TypeKind::REAL:
return std::make_unique<
SimpleAggregateAdapter<GeometricMeanAggregate<float, float>>>(
resultType);
default:
VELOX_USER_FAIL(
"Unknown input type for {} aggregation {}",
name,
inputType->toString());
}
},
false);
Expand Down
55 changes: 44 additions & 11 deletions velox/functions/prestosql/aggregates/tests/GeometricMeanTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,19 @@ class GeometricMeanTest : public AggregationTestBase {
}
};

double geometricMean(
template <typename T>
T geometricMean(
int32_t start,
int32_t end,
int32_t steps,
std::function<double(int32_t)> valueConverter) {
std::function<T(int32_t)> valueConverter) {
double logSum = 0;
int64_t count = 0;
for (int32_t i = start; i < end; i += steps) {
logSum += std::log(valueConverter(i));
count++;
}
return std::exp(logSum / count);
return static_cast<T>(std::exp(logSum / count));
}

TEST_F(GeometricMeanTest, globalEmpty) {
Expand All @@ -67,7 +68,7 @@ TEST_F(GeometricMeanTest, globalNulls) {
testAggregations({data}, {}, {"geometric_mean(c0)"}, "SELECT NULL");

auto expected = makeRowVector({
makeConstant(geometricMean(1, 100, 2, folly::identity), 1),
makeConstant(geometricMean<double>(1, 100, 2, folly::identity), 1),
});

testAggregations({data}, {}, {"geometric_mean(c1)"}, {expected});
Expand All @@ -80,7 +81,7 @@ TEST_F(GeometricMeanTest, globalIntegers) {

auto expected = makeRowVector({
makeFlatVector(std::vector<double>{
geometricMean(1, 100, 1, [](int32_t i) { return i / 7; }),
geometricMean<double>(1, 100, 1, [](int32_t i) { return i / 7; }),
}),
});

Expand Down Expand Up @@ -109,7 +110,7 @@ TEST_F(GeometricMeanTest, groupByNulls) {
makeFlatVector<double>(
10,
[](auto row) {
return geometricMean(
return geometricMean<double>(
1, 10, 2, [&](int32_t i) { return row * 10 + i; });
},
[](auto row) { return row == 3; }),
Expand All @@ -129,7 +130,7 @@ TEST_F(GeometricMeanTest, groupByIntegers) {
makeFlatVector<double>(
10,
[](auto row) {
return geometricMean(
return geometricMean<double>(
0, 10, 1, [&](int32_t i) { return row * 10 + i; });
}),
});
Expand All @@ -143,8 +144,8 @@ TEST_F(GeometricMeanTest, globalDoubles) {
});

auto expected = makeRowVector({
makeFlatVector(std::vector<double>{
geometricMean(0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
makeFlatVector(std::vector<double>{geometricMean<double>(
0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
});

testAggregations({data}, {}, {"geometric_mean(c0)"}, {expected});
Expand All @@ -161,7 +162,39 @@ TEST_F(GeometricMeanTest, groupByDoubles) {
makeFlatVector<double>(
10,
[](auto row) {
return geometricMean(
return geometricMean<double>(
0, 10, 1, [&](int32_t i) { return row + i * 0.1; });
}),
});

testAggregations({data}, {"c0"}, {"geometric_mean(c1)"}, {expected});
}

TEST_F(GeometricMeanTest, globalReals) {
auto data = makeRowVector({
makeFlatVector<float>(100, [](auto row) { return row * 0.1 / 7; }),
});

auto expected = makeRowVector({
makeFlatVector(std::vector<float>{geometricMean<float>(
0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
});

testAggregations({data}, {}, {"geometric_mean(c0)"}, {expected});
}

TEST_F(GeometricMeanTest, groupByReals) {
auto data = makeRowVector({
makeFlatVector<int32_t>(100, [](auto row) { return row / 10; }),
makeFlatVector<float>(100, [](auto row) { return row * 0.1; }),
});

auto expected = makeRowVector({
makeFlatVector<int32_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}),
makeFlatVector<float>(
10,
[](auto row) {
return geometricMean<float>(
0, 10, 1, [&](int32_t i) { return row + i * 0.1; });
}),
});
Expand All @@ -185,7 +218,7 @@ TEST_F(GeometricMeanTest, groupByMultipleBatches) {
makeFlatVector<double>(
10,
[](auto row) {
return geometricMean(
return geometricMean<double>(
0, 10, 1, [&](int32_t i) { return row * 10 + i; });
}),
});
Expand Down

0 comments on commit 9314277

Please sign in to comment.