Add Presto Aggregate function: geometric_mean(real) -> real (#6993)

Summary: Fixes #6992 Pull Request resolved: #6993 Reviewed By: xiaoxmeng Differential Revision: D50297714 Pulled By: mbasmanova fbshipit-source-id: 7c38b2e3a45d9601e480db0ff8c398b53757b1c3
facebookincubator · Oct 16, 2023 · 9314277 · 9314277
1 parent 94917a5
commit 9314277
Show file tree

Hide file tree

Showing 3 changed files with 79 additions and 47 deletions.
diff --git a/velox/docs/functions/presto/aggregate.rst b/velox/docs/functions/presto/aggregate.rst
@@ -82,12 +82,12 @@ General Aggregate Functions
     each input value occurs. Supports integral, floating-point,
     boolean, timestamp, and date input types.
 
-.. function:: geometric_mean(x) -> double
+.. function:: geometric_mean(bigint) -> double
+              geometric_mean(double) -> double
+              geometric_mean(real) -> real
 
     Returns the `geometric mean <https://en.wikipedia.org/wiki/Geometric_mean>`_ of all input values.
 
-    Supported types are BIGINT and DOUBLE.
-
 .. function:: max_by(x, y) -> [same as x]
 
     Returns the value of ``x`` associated with the maximum value of ``y`` over all input values.

diff --git a/velox/functions/prestosql/aggregates/GeometricMeanAggregate.cpp b/velox/functions/prestosql/aggregates/GeometricMeanAggregate.cpp
@@ -24,20 +24,20 @@ namespace facebook::velox::aggregate::prestosql {
 
 namespace {
 
-template <typename T>
+template <typename TInput, typename TResult>
 class GeometricMeanAggregate {
  public:
-  using InputType = Row<T>;
+  using InputType = Row<TInput>;
 
   using IntermediateType =
       Row</*logSum*/ double,
           /*count*/ int64_t>;
 
-  using OutputType = double;
+  using OutputType = TResult;
 
   static bool toIntermediate(
       exec::out_type<Row<double, int64_t>>& out,
-      exec::arg_type<T> in) {
+      exec::arg_type<TInput> in) {
     out.copy_from(std::make_tuple(std::log(in), 1));
     return true;
   }
@@ -52,7 +52,9 @@ class GeometricMeanAggregate {
 
     explicit AccumulatorType(HashStringAllocator* /*allocator*/) {}
 
-    void addInput(HashStringAllocator* /*allocator*/, exec::arg_type<T> data) {
+    void addInput(
+        HashStringAllocator* /*allocator*/,
+        exec::arg_type<TInput> data) {
       logSum_ += std::log(data);
       count_ = checkedPlus<int64_t>(count_, 1);
     }
@@ -93,6 +95,13 @@ void registerGeometricMeanAggregate(const std::string& prefix) {
                              .build());
   }
 
+  // Register for real input type.
+  signatures.push_back(exec::AggregateFunctionSignatureBuilder()
+                           .returnType("real")
+                           .intermediateType("row(double,bigint)")
+                           .argumentType("real")
+                           .build());
+
   exec::registerAggregateFunction(
       name,
       std::move(signatures),
@@ -104,34 +113,24 @@ void registerGeometricMeanAggregate(const std::string& prefix) {
           -> std::unique_ptr<exec::Aggregate> {
         VELOX_USER_CHECK_EQ(argTypes.size(), 1, "{} takes one argument", name);
         auto inputType = argTypes[0];
-        if (exec::isRawInput(step)) {
-          switch (inputType->kind()) {
-            case TypeKind::BIGINT:
-              return std::make_unique<
-                  SimpleAggregateAdapter<GeometricMeanAggregate<int64_t>>>(
-                  resultType);
-            case TypeKind::DOUBLE:
-              return std::make_unique<
-                  SimpleAggregateAdapter<GeometricMeanAggregate<double>>>(
-                  resultType);
-            default:
-              VELOX_USER_FAIL(
-                  "Unknown input type for {} aggregation {}",
-                  name,
-                  inputType->toString());
-          }
-        } else {
-          switch (resultType->kind()) {
-            case TypeKind::DOUBLE:
-            case TypeKind::ROW:
-              return std::make_unique<
-                  SimpleAggregateAdapter<GeometricMeanAggregate<double>>>(
-                  resultType);
-            default:
-              VELOX_USER_FAIL(
-                  "Unsupported result type for final aggregation: {}",
-                  resultType->toString());
-          }
+
+        switch (inputType->kind()) {
+          case TypeKind::BIGINT:
+            return std::make_unique<SimpleAggregateAdapter<
+                GeometricMeanAggregate<int64_t, double>>>(resultType);
+          case TypeKind::DOUBLE:
+            return std::make_unique<
+                SimpleAggregateAdapter<GeometricMeanAggregate<double, double>>>(
+                resultType);
+          case TypeKind::REAL:
+            return std::make_unique<
+                SimpleAggregateAdapter<GeometricMeanAggregate<float, float>>>(
+                resultType);
+          default:
+            VELOX_USER_FAIL(
+                "Unknown input type for {} aggregation {}",
+                name,
+                inputType->toString());
         }
       },
       false);

diff --git a/velox/functions/prestosql/aggregates/tests/GeometricMeanTest.cpp b/velox/functions/prestosql/aggregates/tests/GeometricMeanTest.cpp
@@ -34,18 +34,19 @@ class GeometricMeanTest : public AggregationTestBase {
   }
 };
 
-double geometricMean(
+template <typename T>
+T geometricMean(
     int32_t start,
     int32_t end,
     int32_t steps,
-    std::function<double(int32_t)> valueConverter) {
+    std::function<T(int32_t)> valueConverter) {
   double logSum = 0;
   int64_t count = 0;
   for (int32_t i = start; i < end; i += steps) {
     logSum += std::log(valueConverter(i));
     count++;
   }
-  return std::exp(logSum / count);
+  return static_cast<T>(std::exp(logSum / count));
 }
 
 TEST_F(GeometricMeanTest, globalEmpty) {
@@ -67,7 +68,7 @@ TEST_F(GeometricMeanTest, globalNulls) {
   testAggregations({data}, {}, {"geometric_mean(c0)"}, "SELECT NULL");
 
   auto expected = makeRowVector({
-      makeConstant(geometricMean(1, 100, 2, folly::identity), 1),
+      makeConstant(geometricMean<double>(1, 100, 2, folly::identity), 1),
   });
 
   testAggregations({data}, {}, {"geometric_mean(c1)"}, {expected});
@@ -80,7 +81,7 @@ TEST_F(GeometricMeanTest, globalIntegers) {
 
   auto expected = makeRowVector({
       makeFlatVector(std::vector<double>{
-          geometricMean(1, 100, 1, [](int32_t i) { return i / 7; }),
+          geometricMean<double>(1, 100, 1, [](int32_t i) { return i / 7; }),
       }),
   });
 
@@ -109,7 +110,7 @@ TEST_F(GeometricMeanTest, groupByNulls) {
       makeFlatVector<double>(
           10,
           [](auto row) {
-            return geometricMean(
+            return geometricMean<double>(
                 1, 10, 2, [&](int32_t i) { return row * 10 + i; });
           },
           [](auto row) { return row == 3; }),
@@ -129,7 +130,7 @@ TEST_F(GeometricMeanTest, groupByIntegers) {
       makeFlatVector<double>(
           10,
           [](auto row) {
-            return geometricMean(
+            return geometricMean<double>(
                 0, 10, 1, [&](int32_t i) { return row * 10 + i; });
           }),
   });
@@ -143,8 +144,8 @@ TEST_F(GeometricMeanTest, globalDoubles) {
   });
 
   auto expected = makeRowVector({
-      makeFlatVector(std::vector<double>{
-          geometricMean(0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
+      makeFlatVector(std::vector<double>{geometricMean<double>(
+          0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
   });
 
   testAggregations({data}, {}, {"geometric_mean(c0)"}, {expected});
@@ -161,7 +162,39 @@ TEST_F(GeometricMeanTest, groupByDoubles) {
       makeFlatVector<double>(
           10,
           [](auto row) {
-            return geometricMean(
+            return geometricMean<double>(
+                0, 10, 1, [&](int32_t i) { return row + i * 0.1; });
+          }),
+  });
+
+  testAggregations({data}, {"c0"}, {"geometric_mean(c1)"}, {expected});
+}
+
+TEST_F(GeometricMeanTest, globalReals) {
+  auto data = makeRowVector({
+      makeFlatVector<float>(100, [](auto row) { return row * 0.1 / 7; }),
+  });
+
+  auto expected = makeRowVector({
+      makeFlatVector(std::vector<float>{geometricMean<float>(
+          0, 100, 1, [&](int32_t i) { return i * 0.1 / 7; })}),
+  });
+
+  testAggregations({data}, {}, {"geometric_mean(c0)"}, {expected});
+}
+
+TEST_F(GeometricMeanTest, groupByReals) {
+  auto data = makeRowVector({
+      makeFlatVector<int32_t>(100, [](auto row) { return row / 10; }),
+      makeFlatVector<float>(100, [](auto row) { return row * 0.1; }),
+  });
+
+  auto expected = makeRowVector({
+      makeFlatVector<int32_t>({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}),
+      makeFlatVector<float>(
+          10,
+          [](auto row) {
+            return geometricMean<float>(
                 0, 10, 1, [&](int32_t i) { return row + i * 0.1; });
           }),
   });
@@ -185,7 +218,7 @@ TEST_F(GeometricMeanTest, groupByMultipleBatches) {
       makeFlatVector<double>(
           10,
           [](auto row) {
-            return geometricMean(
+            return geometricMean<double>(
                 0, 10, 1, [&](int32_t i) { return row * 10 + i; });
           }),
   });