diff --git a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp index ecc17ad1d7ec..10008c618556 100644 --- a/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp +++ b/velox/functions/prestosql/aggregates/tests/MinMaxByAggregationTest.cpp @@ -18,7 +18,6 @@ #include "velox/exec/tests/utils/PlanBuilder.h" #include "velox/functions/lib/aggregates/tests/utils/AggregationTestBase.h" #include "velox/functions/prestosql/aggregates/AggregateNames.h" -#include "velox/vector/fuzzer/VectorFuzzer.h" #include @@ -28,30 +27,43 @@ using facebook::velox::VectorFuzzer; namespace facebook::velox::aggregate::test { namespace { - +const int kTestPrecision = 38; +const int kTestScale = 10; +const std::vector kColumnNames = + {"c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9"}; +const std::vector kSupportedTypes = { + BOOLEAN(), + TINYINT(), + SMALLINT(), + INTEGER(), + BIGINT(), + REAL(), + DOUBLE(), + VARCHAR(), + TIMESTAMP(), + DATE()}; + +// Test parmeters with value and compare types for min_by and max_by functions. struct TestParam { - // Specify the value type of minmax_by in test. - TypeKind valueType; - // Specify the comparison value type of minmax_by in test. - TypeKind comparisonType; + // Specify the index in kSupportedTypes for value type. + int valueTypeIndex; + // Specify the index in kSupportedTypes for compare type. + int compareTypeIndex; }; -const std::unordered_set kSupportedTypes = { - TypeKind::BOOLEAN, - TypeKind::TINYINT, - TypeKind::SMALLINT, - TypeKind::INTEGER, - TypeKind::BIGINT, - TypeKind::REAL, - TypeKind::DOUBLE, - TypeKind::VARCHAR, - TypeKind::TIMESTAMP}; +#define GET_VAL_TYPE_COL_NAME kColumnNames[GetParam().valueTypeIndex] +#define GET_COMPARE_TYPE_COL_NAME kColumnNames[GetParam().compareTypeIndex] +#define GET_VALUE_TYPEKIND kSupportedTypes[GetParam().valueTypeIndex]->kind() +#define GET_VALUE_TYPE kSupportedTypes[GetParam().valueTypeIndex] +#define GET_COMPARE_TYPE kSupportedTypes[GetParam().compareTypeIndex] +#define GET_COMPARE_TYPEKIND \ + kSupportedTypes[GetParam().compareTypeIndex]->kind() std::vector getTestParams() { std::vector params; - for (TypeKind valueType : kSupportedTypes) { - for (TypeKind comparisonType : kSupportedTypes) { - params.push_back({valueType, comparisonType}); + for (auto valueIndex = 0; valueIndex < kSupportedTypes.size(); ++valueIndex) { + for (auto colIndex = 0; colIndex < kSupportedTypes.size(); ++colIndex) { + params.push_back({valueIndex, colIndex}); } } return params; @@ -59,7 +71,7 @@ std::vector getTestParams() { #define EXECUTE_TEST_BY_VALUE_TYPE(testFunc, valueType) \ do { \ - switch (GetParam().comparisonType) { \ + switch (GET_COMPARE_TYPEKIND) { \ case TypeKind::BOOLEAN: \ testFunc(); \ break; \ @@ -89,13 +101,13 @@ std::vector getTestParams() { break; \ default: \ LOG(FATAL) << "Unsupported comparison type of minmax_by(): " \ - << mapTypeKindToName(GetParam().comparisonType); \ + << mapTypeKindToName(GET_COMPARE_TYPEKIND); \ } \ } while (0); #define EXECUTE_TEST(testFunc) \ do { \ - switch (GetParam().valueType) { \ + switch (GET_VALUE_TYPEKIND) { \ case TypeKind::BOOLEAN: \ EXECUTE_TEST_BY_VALUE_TYPE(testFunc, bool); \ break; \ @@ -125,7 +137,7 @@ std::vector getTestParams() { break; \ default: \ LOG(FATAL) << "Unsupported value type of minmax_by(): " \ - << mapTypeKindToName(GetParam().valueType); \ + << mapTypeKindToName(GET_VALUE_TYPEKIND); \ } \ } while (0); @@ -171,16 +183,16 @@ class MinMaxByAggregationTestBase : public AggregationTestBase { } // Get the column name in 'rowType_' for the given 'kind'. - std::string getColumnName(TypeKind kind) const { + std::string getColumnName(TypePtr type) const { for (int childIndex = 0; childIndex < rowType_->size(); ++childIndex) { const auto& childType = rowType_->childAt(childIndex); - if (childType->kind() == kind) { + if (childType == type) { return rowType_->nameOf(childIndex); } } VELOX_FAIL( "Type {} is not found in rowType_: ", - mapTypeKindToName(kind), + mapTypeKindToName(type->kind()), rowType_->toString()); } @@ -190,19 +202,8 @@ class MinMaxByAggregationTestBase : public AggregationTestBase { folly::Range values); const RowTypePtr rowType_{ - ROW({"c0", "c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8", "c9"}, - { - BOOLEAN(), - TINYINT(), - SMALLINT(), - INTEGER(), - BIGINT(), - REAL(), - DOUBLE(), - VARCHAR(), - DATE(), - TIMESTAMP(), - })}; + ROW(std::vector(kColumnNames), + std::vector(kSupportedTypes))}; // Specify the number of values in each typed data vector in // 'dataVectorsByType_'. const int numValues_; @@ -310,42 +311,42 @@ void MinMaxByAggregationTestBase::SetUp() { AggregationTestBase::SetUp(); AggregationTestBase::disallowInputShuffle(); - for (const TypeKind type : kSupportedTypes) { - switch (type) { + for (const TypePtr type : kSupportedTypes) { + auto kind = type->kind(); + switch (kind) { case TypeKind::BOOLEAN: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::TINYINT: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::SMALLINT: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::INTEGER: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::BIGINT: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::REAL: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::DOUBLE: - dataVectorsByType_.emplace(type, buildDataVector(numValues_)); + dataVectorsByType_.emplace(kind, buildDataVector(numValues_)); break; case TypeKind::TIMESTAMP: dataVectorsByType_.emplace( - type, buildDataVector(numValues_)); + kind, buildDataVector(numValues_)); break; case TypeKind::VARCHAR: dataVectorsByType_.emplace( - type, buildDataVector(numValues_)); + kind, buildDataVector(numValues_)); break; default: - LOG(FATAL) << "Unsupported data type: " << mapTypeKindToName(type); + LOG(FATAL) << "Unsupported data type: " << mapTypeKindToName(kind); } } - ASSERT_EQ(dataVectorsByType_.size(), kSupportedTypes.size()); rowVectors_ = makeVectors(rowType_, 5, 10); createDuckDbTable(rowVectors_); }; @@ -405,11 +406,11 @@ class MinMaxByGlobalByAggregationTest // All null cases. {makeRowVector( {makeConstant(std::optional(dataAt(0)), 10), - makeNullConstant(GetParam().comparisonType, 10)}), + makeNullConstant(GET_COMPARE_TYPEKIND, 10)}), "SELECT NULL"}, {makeRowVector( - {makeNullConstant(GetParam().valueType, 10), + {makeNullConstant(GET_VALUE_TYPEKIND, 10), makeConstant(std::optional(dataAt(0)), 10)}), "SELECT NULL"}, @@ -503,11 +504,11 @@ class MinMaxByGlobalByAggregationTest // All null cases. {makeRowVector( {makeConstant(std::optional(dataAt(0)), 10), - makeNullConstant(GetParam().comparisonType, 10)}), + makeNullConstant(GET_COMPARE_TYPEKIND, 10)}), "SELECT NULL"}, {makeRowVector( - {makeNullConstant(GetParam().valueType, 10), + {makeNullConstant(GET_VALUE_TYPEKIND, 10), makeConstant(std::optional(dataAt(0)), 10)}), "SELECT NULL"}, @@ -589,50 +590,41 @@ TEST_P(MinMaxByGlobalByAggregationTest, randomMinByGlobalBy) { // to generate only valid Timestamp values. Currently, VectorFuzzer may // generate values that are too large (and therefore are not supported by // DuckDB). - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP) { return; } testGlobalAggregation( - rowVectors_, - kMinBy, - getColumnName(GetParam().valueType), - getColumnName(GetParam().comparisonType)); + rowVectors_, kMinBy, GET_VAL_TYPE_COL_NAME, GET_COMPARE_TYPE_COL_NAME); } TEST_P(MinMaxByGlobalByAggregationTest, randomMaxByGlobalBy) { - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP) { return; } testGlobalAggregation( - rowVectors_, - kMaxBy, - getColumnName(GetParam().valueType), - getColumnName(GetParam().comparisonType)); + rowVectors_, kMaxBy, GET_VAL_TYPE_COL_NAME, GET_COMPARE_TYPE_COL_NAME); } TEST_P( MinMaxByGlobalByAggregationTest, randomMaxByGlobalByWithDistinctCompareValue) { - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP || - GetParam().comparisonType == TypeKind::BOOLEAN) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP || + GET_COMPARE_TYPEKIND == TypeKind::BOOLEAN) { return; } // Enable disk spilling test with distinct comparison values. AggregationTestBase::allowInputShuffle(); - auto rowType = - ROW({"c0", "c1"}, - {fromKindToScalerType(GetParam().valueType), - fromKindToScalerType(GetParam().comparisonType)}); + auto rowType = ROW({"c0", "c1"}, {GET_VALUE_TYPE, GET_COMPARE_TYPE}); - const bool isSmallInt = GetParam().comparisonType == TypeKind::TINYINT || - GetParam().comparisonType == TypeKind::SMALLINT; + const bool isSmallInt = GET_COMPARE_TYPEKIND == TypeKind::TINYINT || + GET_COMPARE_TYPEKIND == TypeKind::SMALLINT; const int kBatchSize = isSmallInt ? 1 << 4 : 1 << 10; const int kNumBatches = isSmallInt ? 4 : 10; const int kNumValues = kNumBatches * kBatchSize; @@ -652,9 +644,9 @@ TEST_P( for (int i = 0; i < kNumBatches; ++i) { // Generate a non-lazy vector so that it can be written out as a duckDB // table. - auto valueVector = fuzzer.fuzz(fromKindToScalerType(GetParam().valueType)); + auto valueVector = fuzzer.fuzz(GET_VALUE_TYPE); auto comparisonVector = buildDataVector( - GetParam().comparisonType, + GET_COMPARE_TYPEKIND, kBatchSize, folly::range(rawValues, rawValues + kBatchSize)); rawValues += kBatchSize; @@ -688,7 +680,7 @@ class MinMaxByGroupByAggregationTest const std::string aggregate = fmt::format( "{}({}, {})", aggName, valueColumnName, comparisonColumnName); std::string verifyDuckDbSql; - if (GetParam().valueType == TypeKind::BOOLEAN) { + if (GET_VALUE_TYPEKIND == TypeKind::BOOLEAN) { verifyDuckDbSql = fmt::format( "SELECT {}, CAST({} as BOOLEAN) FROM tmp GROUP BY {}", groupByColumnName, @@ -746,7 +738,7 @@ class MinMaxByGroupByAggregationTest // All null cases. {makeRowVector( - {makeNullConstant(GetParam().valueType, 6), + {makeNullConstant(GET_VALUE_TYPEKIND, 6), makeNullableFlatVector( {dataAt(4), dataAt(5), @@ -775,7 +767,7 @@ class MinMaxByGroupByAggregationTest dataAt(1), std::nullopt, dataAt(0)}), - makeNullConstant(GetParam().valueType, 6), + makeNullConstant(GET_VALUE_TYPEKIND, 6), makeNullableFlatVector( {dataAt(0), dataAt(0), @@ -899,7 +891,7 @@ class MinMaxByGroupByAggregationTest // All null cases. {makeRowVector( - {makeNullConstant(GetParam().valueType, 6), + {makeNullConstant(GET_VALUE_TYPEKIND, 6), makeNullableFlatVector( {dataAt(4), dataAt(5), @@ -928,7 +920,7 @@ class MinMaxByGroupByAggregationTest dataAt(1), std::nullopt, dataAt(0)}), - makeNullConstant(GetParam().valueType, 6), + makeNullConstant(GET_VALUE_TYPEKIND, 6), makeNullableFlatVector( {dataAt(0), dataAt(0), @@ -1027,39 +1019,39 @@ TEST_P(MinMaxByGroupByAggregationTest, maxByGroupBy) { } TEST_P(MinMaxByGroupByAggregationTest, randomMinByGroupBy) { - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP) { return; } testGroupByAggregation( rowVectors_, kMinBy, - getColumnName(GetParam().valueType), - getColumnName(GetParam().comparisonType), - getColumnName(TypeKind::INTEGER)); + getColumnName(GET_VALUE_TYPE), + getColumnName(GET_COMPARE_TYPE), + getColumnName(INTEGER())); } TEST_P(MinMaxByGroupByAggregationTest, randomMaxByGroupBy) { - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP) { return; } testGroupByAggregation( rowVectors_, kMaxBy, - getColumnName(GetParam().valueType), - getColumnName(GetParam().comparisonType), - getColumnName(TypeKind::INTEGER)); + getColumnName(GET_VALUE_TYPE), + getColumnName(GET_COMPARE_TYPE), + getColumnName(INTEGER())); } TEST_P( MinMaxByGroupByAggregationTest, randomMinMaxByGroupByWithDistinctCompareValue) { - if (GetParam().comparisonType == TypeKind::TIMESTAMP || - GetParam().valueType == TypeKind::TIMESTAMP || - GetParam().comparisonType == TypeKind::BOOLEAN) { + if (GET_COMPARE_TYPEKIND == TypeKind::TIMESTAMP || + GET_VALUE_TYPEKIND == TypeKind::TIMESTAMP || + GET_COMPARE_TYPEKIND == TypeKind::BOOLEAN) { return; } @@ -1067,13 +1059,10 @@ TEST_P( AggregationTestBase::allowInputShuffle(); auto rowType = - ROW({"c0", "c1", "c2"}, - {fromKindToScalerType(GetParam().valueType), - fromKindToScalerType(GetParam().comparisonType), - INTEGER()}); + ROW({"c0", "c1", "c2"}, {GET_VALUE_TYPE, GET_COMPARE_TYPE, INTEGER()}); - const bool isSmallInt = GetParam().comparisonType == TypeKind::TINYINT || - GetParam().comparisonType == TypeKind::SMALLINT; + const bool isSmallInt = GET_COMPARE_TYPEKIND == TypeKind::TINYINT || + GET_COMPARE_TYPEKIND == TypeKind::SMALLINT; const int kBatchSize = isSmallInt ? 1 << 4 : 1 << 10; const int kNumBatches = isSmallInt ? 3 : 10; const int kNumValues = kNumBatches * kBatchSize; @@ -1093,10 +1082,10 @@ TEST_P( for (int i = 0; i < kNumBatches; ++i) { // Generate a non-lazy vector so that it can be written out as a duckDB // table. - auto valueVector = fuzzer.fuzz(fromKindToScalerType(GetParam().valueType)); + auto valueVector = fuzzer.fuzz(GET_VALUE_TYPE); auto groupByVector = makeFlatVector(kBatchSize); auto comparisonVector = buildDataVector( - GetParam().comparisonType, + GET_COMPARE_TYPEKIND, kBatchSize, folly::range(rawValues, rawValues + kBatchSize)); rawValues += kBatchSize;