From d7f9ca453c33ff0ba75e3b5605e63bbb4e35d442 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Wed, 29 Jun 2022 16:49:26 +0800 Subject: [PATCH 01/12] init agg works --- .../tests/gtest_aggregation_executor.cpp | 97 +++++++++++++++++++ .../Flash/tests/gtest_projection_executor.cpp | 42 ++++---- dbms/src/Flash/tests/gtest_topn_executor.cpp | 14 ++- dbms/src/TestUtils/ExecutorTestUtils.cpp | 8 ++ dbms/src/TestUtils/ExecutorTestUtils.h | 6 ++ 5 files changed, 140 insertions(+), 27 deletions(-) create mode 100644 dbms/src/Flash/tests/gtest_aggregation_executor.cpp diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp new file mode 100644 index 00000000000..920fa3e7ce6 --- /dev/null +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -0,0 +1,97 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB +{ +namespace tests +{ + +class ExecutorAggTestRunner : public DB::tests::ExecutorTest +{ +public: + using ColStringType = std::optional::FieldType>; + using ColInt32Type = std::optional::FieldType>; + using ColumnWithString = std::vector; + using ColumnWithInt32 = std::vector; + + void initializeContext() override + { + ExecutorTest::initializeContext(); + + context.addMockTable({db_name, table_name}, + {{col_name[0], TiDB::TP::TypeLong}, + {col_name[1], TiDB::TP::TypeString}, + {col_name[2], TiDB::TP::TypeString}, + {col_name[3], TiDB::TP::TypeLong}}, + {toNullableVec(col_name[0], col_age), + toNullableVec(col_name[1], col_gender), + toNullableVec(col_name[2], col_country), + toNullableVec(col_name[3], col_salary)}); + } + + std::shared_ptr buildDAGRequest(MockAsts agg_funcs, MockAsts group_by_exprs, MockOrderByItems order_by_items, MockColumnNames proj) + { + /// We can filter the group by column with project operator. + /// topN is applied to get stable results in concurrency environment. + return context.scan(db_name, table_name).aggregation(agg_funcs, group_by_exprs).topN(order_by_items, 100).project(proj).build(context); + } + + /// Prepare some data and names + const String db_name{"test_db"}; + const String table_name{"clerk"}; + const std::vector col_name{"age", "gender", "country", "salary"}; + ColumnWithInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34}; + ColumnWithString col_gender{"male", "female", "female", "male", "female", "female", "male", "female", "male", }; + ColumnWithString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"}; + ColumnWithInt32 col_salary{1000, 1300, 0, {}, -200, 900, -999, 2000, -300}; +}; + +TEST_F(ExecutorAggTestRunner, Aggregation) +try +{ + std::shared_ptr request; + auto agg_func0 = Max(col(col_name[0])); /// select max(age) from clerk group by country order by max(age) DESC limit 100; + auto agg_func1 = Max(col(col_name[3])); /// select max(salary) from clerk group by country, gender order by max(salary) DESC limit 100; + + auto group_by_expr0 = col(col_name[2]); + auto group_by_expr10 = col(col_name[2]); + auto group_by_expr11 = col(col_name[1]); + + /// Prepare some data for test + std::vector expect_cols{ + {toNullableVec("max(age)", ColumnWithInt32{36, 32, 30, {}})}, + {toNullableVec("max(salary)", ColumnWithInt32{2000, 1300, 1000, 0, -300, {}})} + }; + std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; + std::vector projections{{"max(age)"}, {"max(salary)"}}; + std::vector order_by_items{{MockOrderByItem("max(age)", true)}, {MockOrderByItem("max(salary)", true)}}; + std::vector agg_funcs{{agg_func0}, {agg_func1}}; + const size_t test_num = expect_cols.size(); + + /// Start to test + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], order_by_items[i], projections[i]); + executeStreamsWithMultiConcurrency(request, expect_cols[i]); + } + + // TODO more aggregation functions... +} +CATCH + +} // namespace tests +} // namespace DB diff --git a/dbms/src/Flash/tests/gtest_projection_executor.cpp b/dbms/src/Flash/tests/gtest_projection_executor.cpp index 4f6401eb483..9da89a18786 100644 --- a/dbms/src/Flash/tests/gtest_projection_executor.cpp +++ b/dbms/src/Flash/tests/gtest_projection_executor.cpp @@ -50,14 +50,6 @@ class ExecutorProjectionTestRunner : public DB::tests::ExecutorTest return context.scan(db_name, table_name).project(param).topN(sort_col, false, 100).build(context); }; - void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) - { - for (size_t i = 1; i < 10; i += 2) - { - executeStreams(request, expect_columns, i); - } - } - /// Prepare column data const ColDataString col0{"col0-0", "col0-1", "", "col0-2", {}, "col0-3", ""}; const ColDataString col1{"col1-0", {}, "", "col1-1", "", "col1-2", "col1-3"}; @@ -88,11 +80,11 @@ try { /// Check single column auto request = buildDAGRequest({col_names[4]}, col_names[4]); - executeWithConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); + executeStreamsWithMultiConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc)}); /// Check multi columns request = buildDAGRequest({col_names[0], col_names[4]}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, { toNullableVec(col_names[0], col0_sorted_asc), toNullableVec(col_names[4], col4_sorted_asc), @@ -100,14 +92,14 @@ try /// Check multi columns request = buildDAGRequest({col_names[0], col_names[1], col_names[4]}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec(col_names[0], col0_sorted_asc), toNullableVec(col_names[1], col1_sorted_asc), toNullableVec(col_names[4], col4_sorted_asc)}); /// Check duplicate columns request = buildDAGRequest({col_names[4], col_names[4], col_names[4]}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec(col_names[4], col4_sorted_asc), toNullableVec(col_names[4], col4_sorted_asc), toNullableVec(col_names[4], col4_sorted_asc)}); @@ -126,7 +118,7 @@ try } request = buildDAGRequest(projection_input, col_names[4]); - executeWithConcurrency(request, columns); + executeStreamsWithMultiConcurrency(request, columns); } } CATCH @@ -140,18 +132,18 @@ try /// Data type: TypeString request = buildDAGRequest({eq(col(col_names[0]), col(col_names[0])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({eq(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 1, 0, {}, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({eq(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 0, 0, {}, 1, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); @@ -160,23 +152,23 @@ try /// Data type: TypeString request = buildDAGRequest({gt(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 0, 0, {}, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({gt(col(col_names[1]), col(col_names[0])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 1, 0, 1, {}, 1, 1}), toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({gt(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 1, 1, {}, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({gt(col(col_names[4]), col(col_names[3])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 1, 0, 0, {}, 0, 1}), toNullableVec(col_names[4], col4_sorted_asc)}); @@ -185,18 +177,18 @@ try /// Data type: TypeString request = buildDAGRequest({And(col(col_names[0]), col(col_names[0])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 0, 0, 0, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); request = buildDAGRequest({And(col(col_names[0]), col(col_names[1])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({0, 0, 0, 0, 0, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); /// Data type: TypeLong request = buildDAGRequest({And(col(col_names[3]), col(col_names[4])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 1, 0, 0, {}, 1, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); @@ -204,7 +196,7 @@ try /// Data type: TypeString request = buildDAGRequest({NOT(col(col_names[0])), NOT(col(col_names[1])), NOT(col(col_names[2])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 1, 1, 1, 1, 1, 1}), toNullableVec({1, 1, 1, 1, {}, 1, 1}), toNullableVec({1, {}, 1, 1, 1, 1, {}}), @@ -212,7 +204,7 @@ try /// Data type: TypeLong request = buildDAGRequest({NOT(col(col_names[3])), NOT(col(col_names[4])), col(col_names[4])}, col_names[4]); - executeWithConcurrency(request, + executeStreamsWithMultiConcurrency(request, {toNullableVec({{}, 0, 1, 0, {}, 0, 1}), toNullableVec({{}, 0, 0, 1, 0, 0, 0}), toNullableVec(col_names[4], col4_sorted_asc)}); diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp index 0e55702795d..74bdab73cc0 100644 --- a/dbms/src/Flash/tests/gtest_topn_executor.cpp +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -44,7 +44,7 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest {toNullableVec(col_name[0], col_age), toNullableVec(col_name[1], col_gender), toNullableVec(col_name[2], col_country), - toNullableVec(col_name[3], c0l_salary)}); + toNullableVec(col_name[3], col_salary)}); } std::shared_ptr buildDAGRequest(const String & table_name, const String & col_name, bool is_desc, int limit_num) @@ -72,7 +72,7 @@ class ExecutorTopNTestRunner : public DB::tests::ExecutorTest ColumnWithInt32 col_age{{}, 27, 32, 36, {}, 34}; ColumnWithString col_gender{"female", "female", "male", "female", "male", "male"}; ColumnWithString col_country{"korea", "usa", "usa", "china", "china", "china"}; - ColumnWithInt32 c0l_salary{1300, 0, {}, 900, {}, -300}; + ColumnWithInt32 col_salary{1300, 0, {}, 900, {}, -300}; }; TEST_F(ExecutorTopNTestRunner, TopN) @@ -217,5 +217,15 @@ try } CATCH +TEST_F(ExecutorTopNTestRunner, T) +try +{ + auto request = context.scan(db_name, table_name).aggregation({Max(col(col_name[3]))}, {col(col_name[1]), col(col_name[2])}).build(context); + std::vector expect_cols{{toNullableVec(col_name[0], ColumnWithInt32{1300})}}; + + executeStreams(request, expect_cols[0]); +} +CATCH + } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/ExecutorTestUtils.cpp b/dbms/src/TestUtils/ExecutorTestUtils.cpp index 881ebaf88db..b86dedf5baa 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.cpp +++ b/dbms/src/TestUtils/ExecutorTestUtils.cpp @@ -134,6 +134,14 @@ void ExecutorTest::executeStreams(const std::shared_ptr & requ executeStreams(request, context.executorIdColumnsMap(), expect_columns, concurrency); } +void ExecutorTest::executeStreamsWithMultiConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns, size_t max_concurrency, size_t step) +{ + for (size_t i = 1; i < max_concurrency; i += step) + { + executeStreams(request, expect_columns, i); + } +} + void ExecutorTest::executeStreamsWithSingleSource(const std::shared_ptr & request, const ColumnsWithTypeAndName & source_columns, const ColumnsWithTypeAndName & expect_columns, SourceType type, size_t concurrency) { std::unordered_map source_columns_map; diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 87bb7115bed..a663355303b 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -89,6 +89,12 @@ class ExecutorTest : public ::testing::Test SourceType type = TableScan, size_t concurrency = 1); + void executeStreamsWithMultiConcurrency( + const std::shared_ptr & request, + const ColumnsWithTypeAndName & expect_columns, + size_t max_concurrency = 10, + size_t step = 2); + protected: MockDAGRequestContext context; std::unique_ptr dag_context_ptr; From 9e42c8e655317eaf5df20e5410be16daf61b7dff Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Wed, 13 Jul 2022 16:02:03 +0800 Subject: [PATCH 02/12] add more tests --- .../tests/gtest_aggregation_executor.cpp | 105 ++++++++++++++---- dbms/src/Flash/tests/gtest_topn_executor.cpp | 10 -- dbms/src/TestUtils/ExecutorTestUtils.h | 7 -- dbms/src/TestUtils/mockExecutor.h | 5 + 4 files changed, 89 insertions(+), 38 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 920fa3e7ce6..188e790d99d 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -23,10 +23,12 @@ namespace tests class ExecutorAggTestRunner : public DB::tests::ExecutorTest { public: - using ColStringType = std::optional::FieldType>; - using ColInt32Type = std::optional::FieldType>; - using ColumnWithString = std::vector; - using ColumnWithInt32 = std::vector; + using ColStringNullableType = std::optional::FieldType>; + using ColInt32NullableType = std::optional::FieldType>; + using ColUInt64Type = typename TypeTraits::FieldType; + using ColumnWithNullableString = std::vector; + using ColumnWithNullableInt32 = std::vector; + using ColumnWithUInt64 = std::vector; void initializeContext() override { @@ -43,24 +45,33 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest toNullableVec(col_name[3], col_salary)}); } - std::shared_ptr buildDAGRequest(MockAsts agg_funcs, MockAsts group_by_exprs, MockOrderByItems order_by_items, MockColumnNames proj) + std::shared_ptr buildDAGRequest(MockAstVec agg_funcs, MockAstVec group_by_exprs, MockOrderByItemVec order_by_items, MockColumnNameVec proj) { /// We can filter the group by column with project operator. /// topN is applied to get stable results in concurrency environment. return context.scan(db_name, table_name).aggregation(agg_funcs, group_by_exprs).topN(order_by_items, 100).project(proj).build(context); } + void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) + { + for (size_t i = 1; i < max_concurrency; i += step) + ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request, i)); + } + + size_t max_concurrency = 10; + size_t step = 2; + /// Prepare some data and names const String db_name{"test_db"}; const String table_name{"clerk"}; const std::vector col_name{"age", "gender", "country", "salary"}; - ColumnWithInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34}; - ColumnWithString col_gender{"male", "female", "female", "male", "female", "female", "male", "female", "male", }; - ColumnWithString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"}; - ColumnWithInt32 col_salary{1000, 1300, 0, {}, -200, 900, -999, 2000, -300}; + ColumnWithNullableInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34}; + ColumnWithNullableString col_gender{"male", "female", "female", "male", "female", "female", "male", "female", "male", }; + ColumnWithNullableString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"}; + ColumnWithNullableInt32 col_salary{1000, 1300, 0, {}, -200, 900, -999, 2000, -300}; }; -TEST_F(ExecutorAggTestRunner, Aggregation) +TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin) try { std::shared_ptr request; @@ -71,27 +82,79 @@ try auto group_by_expr10 = col(col_name[2]); auto group_by_expr11 = col(col_name[1]); - /// Prepare some data for test + /// Prepare some data for max function test std::vector expect_cols{ - {toNullableVec("max(age)", ColumnWithInt32{36, 32, 30, {}})}, - {toNullableVec("max(salary)", ColumnWithInt32{2000, 1300, 1000, 0, -300, {}})} + {toNullableVec("max(age)", ColumnWithNullableInt32{36, 32, 30, {}})}, + {toNullableVec("max(salary)", ColumnWithNullableInt32{2000, 1300, 1000, 0, -300, {}})} }; - std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; - std::vector projections{{"max(age)"}, {"max(salary)"}}; - std::vector order_by_items{{MockOrderByItem("max(age)", true)}, {MockOrderByItem("max(salary)", true)}}; - std::vector agg_funcs{{agg_func0}, {agg_func1}}; - const size_t test_num = expect_cols.size(); + std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; + std::vector projections{{"max(age)"}, {"max(salary)"}}; + std::vector order_by_items{{MockOrderByItem("max(age)", true)}, {MockOrderByItem("max(salary)", true)}}; + std::vector agg_funcs{{agg_func0}, {agg_func1}}; + size_t test_num = expect_cols.size(); - /// Start to test + /// Start to test max function for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], order_by_items[i], projections[i]); - executeStreamsWithMultiConcurrency(request, expect_cols[i]); + executeWithConcurrency(request, expect_cols[i]); } - // TODO more aggregation functions... + /// Min function tests + + agg_func0 = Min(col(col_name[0])); /// select min(age) from clerk group by country order by min(age) DESC limit 100; + agg_func1 = Min(col(col_name[3])); /// select min(salary) from clerk group by country, gender order by min(salary) DESC limit 100; + + expect_cols = { + {toNullableVec("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})}, + {toNullableVec("min(salary)", ColumnWithNullableInt32{1300, 1000, 900, -200, -999, {}})} + }; + projections = {{"min(age)"}, {"min(salary)"}}; + order_by_items = {{MockOrderByItem("min(age)", true)}, {MockOrderByItem("min(salary)", true)}}; + agg_funcs = {{agg_func0}, {agg_func1}}; + test_num = expect_cols.size(); + + /// Start to test min function + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], order_by_items[i], projections[i]); + executeWithConcurrency(request, expect_cols[i]); + } } CATCH +TEST_F(ExecutorAggTestRunner, AggregationCount) +try +{ + /// Prepare some data + std::shared_ptr request; + auto agg_func0 = Count(col(col_name[0])); /// select count(age) from clerk group by country order by count(age) DESC limit 100; + auto agg_func1 = Count(col(col_name[1])); /// select count(gender) from clerk group by country, gender order by count(gender) DESC limit 100; + std::vector agg_funcs = {{agg_func0}, {agg_func1}}; + + auto group_by_expr0 = col(col_name[2]); + auto group_by_expr10 = col(col_name[2]); + auto group_by_expr11 = col(col_name[1]); + + std::vector expect_cols { + {toVec("count(age)", ColumnWithUInt64{3, 3, 1, 0})}, + {toVec("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})} + }; + std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; + std::vector projections{{"count(age)"}, {"count(gender)"}}; + std::vector order_by_items{{MockOrderByItem("count(age)", true)}, {MockOrderByItem("count(gender)", true)}}; + size_t test_num = expect_cols.size(); + + /// Start to test + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest({agg_funcs[i]}, group_by_exprs[i], order_by_items[i], projections[i]); + executeWithConcurrency(request, expect_cols[i]); + } +} +CATCH + +// TODO more aggregation functions... + } // namespace tests } // namespace DB diff --git a/dbms/src/Flash/tests/gtest_topn_executor.cpp b/dbms/src/Flash/tests/gtest_topn_executor.cpp index a1270075129..d79573524ad 100644 --- a/dbms/src/Flash/tests/gtest_topn_executor.cpp +++ b/dbms/src/Flash/tests/gtest_topn_executor.cpp @@ -217,15 +217,5 @@ try } CATCH -TEST_F(ExecutorTopNTestRunner, T) -try -{ - auto request = context.scan(db_name, table_name).aggregation({Max(col(col_name[3]))}, {col(col_name[1]), col(col_name[2])}).build(context); - std::vector expect_cols{{toNullableVec(col_name[0], ColumnWithInt32{1300})}}; - - executeStreams(request, expect_cols[0]); -} -CATCH - } // namespace tests } // namespace DB diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index a99f27b29bc..50df99e6dcc 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -88,13 +88,6 @@ class ExecutorTest : public ::testing::Test const ColumnsWithTypeAndName & source_columns, SourceType type = TableScan, size_t concurrency = 1); - - void executeStreamsWithMultiConcurrency( - const std::shared_ptr & request, - const ColumnsWithTypeAndName & expect_columns, - size_t max_concurrency = 10, - size_t step = 2); - protected: MockDAGRequestContext context; std::unique_ptr dag_context_ptr; diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 8b5a6d300ff..26876b3b21d 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -174,8 +174,13 @@ MockWindowFrame buildDefaultRowsFrame(); #define And(expr1, expr2) makeASTFunction("and", (expr1), (expr2)) #define Or(expr1, expr2) makeASTFunction("or", (expr1), (expr2)) #define NOT(expr) makeASTFunction("not", (expr)) + +// Aggregation functions #define Max(expr) makeASTFunction("max", (expr)) +#define Min(expr) makeASTFunction("min", (expr)) #define Sum(expr) makeASTFunction("sum", (expr)) +#define Count(expr) makeASTFunction("count", (expr)) + /// Window functions #define RowNumber() makeASTFunction("RowNumber") #define Rank() makeASTFunction("Rank") From 9e81bd2701cbc03929350c7e702c234b144d6f98 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Wed, 13 Jul 2022 16:23:15 +0800 Subject: [PATCH 03/12] format --- .../tests/gtest_aggregation_executor.cpp | 23 ++++++++++++------- dbms/src/TestUtils/ExecutorTestUtils.h | 1 + 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 188e790d99d..b2911b52526 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -66,7 +66,17 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest const String table_name{"clerk"}; const std::vector col_name{"age", "gender", "country", "salary"}; ColumnWithNullableInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34}; - ColumnWithNullableString col_gender{"male", "female", "female", "male", "female", "female", "male", "female", "male", }; + ColumnWithNullableString col_gender{ + "male", + "female", + "female", + "male", + "female", + "female", + "male", + "female", + "male", + }; ColumnWithNullableString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"}; ColumnWithNullableInt32 col_salary{1000, 1300, 0, {}, -200, 900, -999, 2000, -300}; }; @@ -85,8 +95,7 @@ try /// Prepare some data for max function test std::vector expect_cols{ {toNullableVec("max(age)", ColumnWithNullableInt32{36, 32, 30, {}})}, - {toNullableVec("max(salary)", ColumnWithNullableInt32{2000, 1300, 1000, 0, -300, {}})} - }; + {toNullableVec("max(salary)", ColumnWithNullableInt32{2000, 1300, 1000, 0, -300, {}})}}; std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; std::vector projections{{"max(age)"}, {"max(salary)"}}; std::vector order_by_items{{MockOrderByItem("max(age)", true)}, {MockOrderByItem("max(salary)", true)}}; @@ -107,8 +116,7 @@ try expect_cols = { {toNullableVec("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})}, - {toNullableVec("min(salary)", ColumnWithNullableInt32{1300, 1000, 900, -200, -999, {}})} - }; + {toNullableVec("min(salary)", ColumnWithNullableInt32{1300, 1000, 900, -200, -999, {}})}}; projections = {{"min(age)"}, {"min(salary)"}}; order_by_items = {{MockOrderByItem("min(age)", true)}, {MockOrderByItem("min(salary)", true)}}; agg_funcs = {{agg_func0}, {agg_func1}}; @@ -136,10 +144,9 @@ try auto group_by_expr10 = col(col_name[2]); auto group_by_expr11 = col(col_name[1]); - std::vector expect_cols { + std::vector expect_cols{ {toVec("count(age)", ColumnWithUInt64{3, 3, 1, 0})}, - {toVec("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})} - }; + {toVec("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})}}; std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; std::vector projections{{"count(age)"}, {"count(gender)"}}; std::vector order_by_items{{MockOrderByItem("count(age)", true)}, {MockOrderByItem("count(gender)", true)}}; diff --git a/dbms/src/TestUtils/ExecutorTestUtils.h b/dbms/src/TestUtils/ExecutorTestUtils.h index 50df99e6dcc..59b829e04b5 100644 --- a/dbms/src/TestUtils/ExecutorTestUtils.h +++ b/dbms/src/TestUtils/ExecutorTestUtils.h @@ -88,6 +88,7 @@ class ExecutorTest : public ::testing::Test const ColumnsWithTypeAndName & source_columns, SourceType type = TableScan, size_t concurrency = 1); + protected: MockDAGRequestContext context; std::unique_ptr dag_context_ptr; From 49d74b763fa99169d5ab61babe1e228d238ff5bd Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Thu, 14 Jul 2022 14:35:06 +0800 Subject: [PATCH 04/12] update --- .../tests/gtest_aggregation_executor.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index b2911b52526..517b81adc70 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -25,9 +25,12 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest public: using ColStringNullableType = std::optional::FieldType>; using ColInt32NullableType = std::optional::FieldType>; + using ColFloat64NullableType = std::optional::FieldType>; using ColUInt64Type = typename TypeTraits::FieldType; + using ColumnWithNullableString = std::vector; using ColumnWithNullableInt32 = std::vector; + using ColumnWithNullableFloat64 = std::vector; using ColumnWithUInt64 = std::vector; void initializeContext() override @@ -38,24 +41,24 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest {{col_name[0], TiDB::TP::TypeLong}, {col_name[1], TiDB::TP::TypeString}, {col_name[2], TiDB::TP::TypeString}, - {col_name[3], TiDB::TP::TypeLong}}, + {col_name[3], TiDB::TP::TypeDouble}}, {toNullableVec(col_name[0], col_age), toNullableVec(col_name[1], col_gender), toNullableVec(col_name[2], col_country), - toNullableVec(col_name[3], col_salary)}); + toNullableVec(col_name[3], col_salary)}); } - std::shared_ptr buildDAGRequest(MockAstVec agg_funcs, MockAstVec group_by_exprs, MockOrderByItemVec order_by_items, MockColumnNameVec proj) + std::shared_ptr buildDAGRequest(MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj) { /// We can filter the group by column with project operator. - /// topN is applied to get stable results in concurrency environment. - return context.scan(db_name, table_name).aggregation(agg_funcs, group_by_exprs).topN(order_by_items, 100).project(proj).build(context); + /// project is applied to get single column for comparison + return context.scan(db_name, table_name).aggregation(agg_funcs, group_by_exprs).project(proj).build(context); } void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) { - for (size_t i = 1; i < max_concurrency; i += step) - ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request, i)); + for (size_t i = 1; i <= max_concurrency; i += step) + ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); } size_t max_concurrency = 10; @@ -78,7 +81,7 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest "male", }; ColumnWithNullableString col_country{"russia", "korea", "usa", "usa", "usa", "china", "china", "china", "china"}; - ColumnWithNullableInt32 col_salary{1000, 1300, 0, {}, -200, 900, -999, 2000, -300}; + ColumnWithNullableFloat64 col_salary{1000.1, 1300.2, 0.3, {}, -200.4, 900.5, -999.6, 2000.7, -300.8}; }; TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin) @@ -95,17 +98,16 @@ try /// Prepare some data for max function test std::vector expect_cols{ {toNullableVec("max(age)", ColumnWithNullableInt32{36, 32, 30, {}})}, - {toNullableVec("max(salary)", ColumnWithNullableInt32{2000, 1300, 1000, 0, -300, {}})}}; + {toNullableVec("max(salary)", ColumnWithNullableFloat64{2000.7, 1300.2, 1000.1, 0.3, -300.8, {}})}}; std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; std::vector projections{{"max(age)"}, {"max(salary)"}}; - std::vector order_by_items{{MockOrderByItem("max(age)", true)}, {MockOrderByItem("max(salary)", true)}}; std::vector agg_funcs{{agg_func0}, {agg_func1}}; size_t test_num = expect_cols.size(); /// Start to test max function for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], order_by_items[i], projections[i]); + request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } @@ -116,16 +118,15 @@ try expect_cols = { {toNullableVec("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})}, - {toNullableVec("min(salary)", ColumnWithNullableInt32{1300, 1000, 900, -200, -999, {}})}}; + {toNullableVec("min(salary)", ColumnWithNullableFloat64{1300.2, 1000.1, 900.5, -200.4, -999.6, {}})}}; projections = {{"min(age)"}, {"min(salary)"}}; - order_by_items = {{MockOrderByItem("min(age)", true)}, {MockOrderByItem("min(salary)", true)}}; agg_funcs = {{agg_func0}, {agg_func1}}; test_num = expect_cols.size(); /// Start to test min function for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], order_by_items[i], projections[i]); + request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } } @@ -149,13 +150,12 @@ try {toVec("count(gender)", ColumnWithUInt64{2, 2, 2, 1, 1, 1})}}; std::vector group_by_exprs{{group_by_expr0}, {group_by_expr10, group_by_expr11}}; std::vector projections{{"count(age)"}, {"count(gender)"}}; - std::vector order_by_items{{MockOrderByItem("count(age)", true)}, {MockOrderByItem("count(gender)", true)}}; size_t test_num = expect_cols.size(); /// Start to test for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest({agg_funcs[i]}, group_by_exprs[i], order_by_items[i], projections[i]); + request = buildDAGRequest({agg_funcs[i]}, group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } } From afbcb6e8823e7229ea02cc88c9d1c2f65e7a57ef Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Fri, 15 Jul 2022 16:04:34 +0800 Subject: [PATCH 05/12] refine --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 517b81adc70..857282ab81a 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -37,11 +37,13 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest { ExecutorTest::initializeContext(); - context.addMockTable({db_name, table_name}, + context.addMockTable(/* name= */ {db_name, table_name}, + /* columnInfos= */ {{col_name[0], TiDB::TP::TypeLong}, {col_name[1], TiDB::TP::TypeString}, {col_name[2], TiDB::TP::TypeString}, {col_name[3], TiDB::TP::TypeDouble}}, + /* columns= */ {toNullableVec(col_name[0], col_age), toNullableVec(col_name[1], col_gender), toNullableVec(col_name[2], col_country), From 68ba6861ac4e38109ef62bc69ff660c537e668e7 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 11:22:32 +0800 Subject: [PATCH 06/12] update --- .../tests/gtest_aggregation_executor.cpp | 140 +++++++++++++++++- 1 file changed, 134 insertions(+), 6 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 857282ab81a..00ba8366a3b 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -20,23 +20,69 @@ namespace DB namespace tests { +#define DT DecimalField +#define COL_GROUP2(a, b) {col(types_col_name[a]), col(types_col_name[b])} +#define COL_PROJ2(a, b) {types_col_name[a], types_col_name[b]} + class ExecutorAggTestRunner : public DB::tests::ExecutorTest { public: using ColStringNullableType = std::optional::FieldType>; + using ColInt8NullableType = std::optional::FieldType>; + using ColInt16NullableType = std::optional::FieldType>; using ColInt32NullableType = std::optional::FieldType>; + using ColInt64NullableType = std::optional::FieldType>; + using ColFloat32NullableType = std::optional::FieldType>; using ColFloat64NullableType = std::optional::FieldType>; + using ColMyDateNullableType = std::optional::FieldType>; + using ColMyDateTimeNullableType = std::optional::FieldType>; + using ColDecimalNullableType = std::optional>::FieldType>; using ColUInt64Type = typename TypeTraits::FieldType; using ColumnWithNullableString = std::vector; + using ColumnWithNullableInt8 = std::vector; + using ColumnWithNullableInt16 = std::vector; using ColumnWithNullableInt32 = std::vector; + using ColumnWithNullableInt64 = std::vector; + using ColumnWithNullableFloat32 = std::vector; using ColumnWithNullableFloat64 = std::vector; + using ColumnWithNullableMyDate = std::vector; + using ColumnWithNullableMyDateTime = std::vector; + using ColumnWithNullableDecimal = std::vector; using ColumnWithUInt64 = std::vector; void initializeContext() override { ExecutorTest::initializeContext(); + /// Create table for tests of group by + context.addMockTable(/* name= */ {db_name, table_types}, + /* columnInfos= */ + {{types_col_name[0], TiDB::TP::TypeLong}, + {types_col_name[1], TiDB::TP::TypeDecimal}, + {types_col_name[2], TiDB::TP::TypeTiny}, + {types_col_name[3], TiDB::TP::TypeShort}, + {types_col_name[4], TiDB::TP::TypeLong}, + {types_col_name[5], TiDB::TP::TypeLongLong}, + {types_col_name[6], TiDB::TP::TypeFloat}, + {types_col_name[7], TiDB::TP::TypeDouble}, + {types_col_name[8], TiDB::TP::TypeDate}, + {types_col_name[9], TiDB::TP::TypeDatetime}, + {types_col_name[10], TiDB::TP::TypeString}}, + /* columns= */ + {toNullableVec(types_col_name[0], col_id), + toNullableVec(types_col_name[1], col_decimal), + toNullableVec(types_col_name[2], col_tinyint), + toNullableVec(types_col_name[3], col_smallint), + toNullableVec(types_col_name[4], col_int), + toNullableVec(types_col_name[5], col_bigint), + toNullableVec(types_col_name[6], col_float), + toNullableVec(types_col_name[7], col_double), + toNullableVec(types_col_name[8], col_mydate), + toNullableVec(types_col_name[9], col_mydatetime), + toNullableVec(types_col_name[10], col_string)}); + + /// Create table for tests of aggregation functions context.addMockTable(/* name= */ {db_name, table_name}, /* columnInfos= */ {{col_name[0], TiDB::TP::TypeLong}, @@ -50,11 +96,11 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest toNullableVec(col_name[3], col_salary)}); } - std::shared_ptr buildDAGRequest(MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj) + std::shared_ptr buildDAGRequest(std::pair src, MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj) { /// We can filter the group by column with project operator. /// project is applied to get single column for comparison - return context.scan(db_name, table_name).aggregation(agg_funcs, group_by_exprs).project(proj).build(context); + return context.scan(src.first, src.second).aggregation(agg_funcs, group_by_exprs).project(proj).build(context); } void executeWithConcurrency(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) @@ -66,8 +112,24 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest size_t max_concurrency = 10; size_t step = 2; - /// Prepare some data and names const String db_name{"test_db"}; + + /// Prepare some data and names for tests of group by + const String table_types{"types"}; + const std::vector types_col_name{"id", "decimal_", "tinyint_", "smallint_", "int_", "bigint_", "float_", "double_", "date_", "datetime_", "string_"}; + ColumnWithNullableInt32 col_id{1, 2, 3, 4, 5, 6, 7, 8, 9}; + ColumnWithNullableDecimal col_decimal{DT(55, 1), {}, DT(-24, 1), DT(40, 1), DT(-40, 1), DT(40, 1), {}, DT(55, 1), DT(0, 1)}; + ColumnWithNullableInt8 col_tinyint{1,2,3,{},{},0,0,-1,-2}; + ColumnWithNullableInt16 col_smallint{2, 3, {}, {}, 0, -1, -2, 4, 0}; + ColumnWithNullableInt32 col_int{4, {}, {}, 0, 123, -1, -1, 123, 4}; + ColumnWithNullableInt64 col_bigint{2, 2, {}, 0, -1, {}, -1, 0, 123}; + ColumnWithNullableFloat32 col_float{3.3, {}, 0, 4.0, 3.3, 5.6, -0.1, -0.1, {}}; + ColumnWithNullableFloat64 col_double{0.1, 0, 1.1, 1.1, 1.2, {}, {}, -1.2, -1.2}; + ColumnWithNullableMyDate col_mydate{1000000, 2000000, {}, 300000, 1000000, {}, 0, 2000000, {}}; + ColumnWithNullableMyDateTime col_mydatetime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000, 1000000}; + ColumnWithNullableString col_string{{}, "pingcap", "PingCAP", {}, "PINGCAP", "PingCAP", {}, "Shanghai", "Shanghai"}; + + /// Prepare some data and names for aggregation functions const String table_name{"clerk"}; const std::vector col_name{"age", "gender", "country", "salary"}; ColumnWithNullableInt32 col_age{30, {}, 27, 32, 25, 36, {}, 22, 34}; @@ -86,6 +148,72 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest ColumnWithNullableFloat64 col_salary{1000.1, 1300.2, 0.3, {}, -200.4, 900.5, -999.6, 2000.7, -300.8}; }; +/// Guarantee the correctness of group by +TEST_F(ExecutorAggTestRunner, GroupBy) +try +{ + std::shared_ptr request; + std::vector group_by_exprs; + std::vector projections; + std::vector expect_cols; + size_t test_num; + + { + /// group by single column + group_by_exprs = {{col(types_col_name[2])}, {col(types_col_name[3])}, {col(types_col_name[4])}, {col(types_col_name[5])}, {col(types_col_name[6])}, {col(types_col_name[7])}, {col(types_col_name[8])}, {col(types_col_name[9])}, {col(types_col_name[10])}}; + projections = {{types_col_name[2]}, {types_col_name[3]}, {types_col_name[4]}, {types_col_name[5]}, {types_col_name[6]}, {types_col_name[7]}, {types_col_name[8]}, {types_col_name[9]}, {types_col_name[10]}}; + expect_cols = { + {toNullableVec(types_col_name[2], ColumnWithNullableInt8{-1, 2, {}, 0, 1, 3, -2})}, /// select tinyint_ from test_db.types group by tinyint_; + {toNullableVec(types_col_name[3], ColumnWithNullableInt16{-1, 2, -2, {}, 0, 4, 3})}, /// select smallint_ from test_db.types group by smallint_; + {toNullableVec(types_col_name[4], ColumnWithNullableInt32{-1, {}, 4, 0, 123})}, /// select int_ from test_db.types group by int_; + {toNullableVec(types_col_name[5], ColumnWithNullableInt64{2, -1, 0, 123, {}})}, /// select bigint_ from test_db.types group by bigint_; + {toNullableVec(types_col_name[6], ColumnWithNullableFloat32{0, 4, 3.3, {}, 5.6, -0.1})}, /// select float_ from test_db.types group by float_; + {toNullableVec(types_col_name[7], ColumnWithNullableFloat64{0, {}, -1.2, 1.1, 1.2, 0.1})}, /// select double_ from test_db.types group by double_; + {toNullableVec(types_col_name[8], ColumnWithNullableMyDate{{}, 0, 300000, 1000000, 2000000})}, /// select date_ from test_db.types group by date_; + {toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{{}, 0, 1000000, 2000000, 3000000})}, /// select datetime_ from test_db.types group by datetime_; + {toNullableVec(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", "PINGCAP", "Shanghai"})}}; /// select string_ from test_db.types group by string_; + test_num = expect_cols.size(); + ASSERT_EQ(group_by_exprs.size(), test_num); + ASSERT_EQ(projections.size(), test_num); + + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]); + executeWithConcurrency(request, expect_cols[i]); + } + } + + { + /// group by two columns + group_by_exprs = {COL_GROUP2(2, 6), COL_GROUP2(3, 9), COL_GROUP2(4, 7), COL_GROUP2(5, 10), COL_GROUP2(8, 9), COL_GROUP2(9, 10)}; + projections = {COL_PROJ2(2, 6), COL_PROJ2(3, 9), COL_PROJ2(4, 7), COL_PROJ2(5, 10), COL_PROJ2(8, 9), COL_PROJ2(9, 10)}; + expect_cols = {{toNullableVec(types_col_name[2], ColumnWithNullableInt8{1, 2, {}, 3, 0, 0, -1, {}, -2}), + toNullableVec(types_col_name[6], ColumnWithNullableFloat32{3.3, {}, 4, 0, -0.1, 5.6, -0.1, 3.3, {}})}, + {toNullableVec(types_col_name[3], ColumnWithNullableInt16{2, 3, {}, {}, 0, -1, -2, 4}), + toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000})}, + {toNullableVec(types_col_name[4], ColumnWithNullableInt32{{}, 123, -1, 0, {}, 4, 4, 123}), + toNullableVec(types_col_name[7], ColumnWithNullableFloat64{0, -1.2, {}, 1.1, 1.1, -1.2, 0.1, 1.2})}, + {toNullableVec(types_col_name[5], ColumnWithNullableInt64{-1, 0, 0, 123, 2, {}, -1, 2}), + toNullableVec(types_col_name[10], ColumnWithNullableString{{}, {}, "Shanghai", "Shanghai", {}, "PingCAP", "PINGCAP", "pingcap"})}, + {toNullableVec(types_col_name[8], ColumnWithNullableMyDate{1000000, 2000000, {}, 300000, 1000000, 0, 2000000, {}}), + toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000})}, + {toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000}), + toNullableVec(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", {}, "PINGCAP", {}, "Shanghai", "Shanghai"})}}; + test_num = expect_cols.size(); + ASSERT_EQ(group_by_exprs.size(), test_num); + ASSERT_EQ(projections.size(), test_num); + + for (size_t i = 0; i < test_num; ++i) + { + request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]); + executeWithConcurrency(request, expect_cols[i]); + } + } + + /// TODO type: decimal, enum and unsigned numbers +} +CATCH + TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin) try { @@ -109,7 +237,7 @@ try /// Start to test max function for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], projections[i]); + request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } @@ -128,7 +256,7 @@ try /// Start to test min function for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest(agg_funcs[i], group_by_exprs[i], projections[i]); + request = buildDAGRequest(std::make_pair(db_name, table_name), agg_funcs[i], group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } } @@ -157,7 +285,7 @@ try /// Start to test for (size_t i = 0; i < test_num; ++i) { - request = buildDAGRequest({agg_funcs[i]}, group_by_exprs[i], projections[i]); + request = buildDAGRequest(std::make_pair(db_name, table_name), {agg_funcs[i]}, group_by_exprs[i], projections[i]); executeWithConcurrency(request, expect_cols[i]); } } From 80c412d28a6f0f74f32f5fc74847905b524cdbe8 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 11:34:31 +0800 Subject: [PATCH 07/12] update --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 00ba8366a3b..917a4bf56e1 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -243,8 +243,8 @@ try /// Min function tests - agg_func0 = Min(col(col_name[0])); /// select min(age) from clerk group by country order by min(age) DESC limit 100; - agg_func1 = Min(col(col_name[3])); /// select min(salary) from clerk group by country, gender order by min(salary) DESC limit 100; + agg_func0 = Min(col(col_name[0])); /// select min(age) from clerk group by country; + agg_func1 = Min(col(col_name[3])); /// select min(salary) from clerk group by country, gender; expect_cols = { {toNullableVec("min(age)", ColumnWithNullableInt32{30, 25, 22, {}})}, @@ -267,8 +267,8 @@ try { /// Prepare some data std::shared_ptr request; - auto agg_func0 = Count(col(col_name[0])); /// select count(age) from clerk group by country order by count(age) DESC limit 100; - auto agg_func1 = Count(col(col_name[1])); /// select count(gender) from clerk group by country, gender order by count(gender) DESC limit 100; + auto agg_func0 = Count(col(col_name[0])); /// select count(age) from clerk group by country; + auto agg_func1 = Count(col(col_name[1])); /// select count(gender) from clerk group by country, gender; std::vector agg_funcs = {{agg_func0}, {agg_func1}}; auto group_by_expr0 = col(col_name[2]); From 6db334bd4c0f4f7e1bd1b5de97c35fb7fa5a3dcd Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 11:38:25 +0800 Subject: [PATCH 08/12] format --- .../tests/gtest_aggregation_executor.cpp | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 917a4bf56e1..a94b9c50969 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -21,8 +21,14 @@ namespace tests { #define DT DecimalField -#define COL_GROUP2(a, b) {col(types_col_name[a]), col(types_col_name[b])} -#define COL_PROJ2(a, b) {types_col_name[a], types_col_name[b]} +#define COL_GROUP2(a, b) \ + { \ + col(types_col_name[a]), col(types_col_name[b]) \ + } +#define COL_PROJ2(a, b) \ + { \ + types_col_name[a], types_col_name[b] \ + } class ExecutorAggTestRunner : public DB::tests::ExecutorTest { @@ -113,13 +119,13 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest size_t step = 2; const String db_name{"test_db"}; - + /// Prepare some data and names for tests of group by const String table_types{"types"}; const std::vector types_col_name{"id", "decimal_", "tinyint_", "smallint_", "int_", "bigint_", "float_", "double_", "date_", "datetime_", "string_"}; ColumnWithNullableInt32 col_id{1, 2, 3, 4, 5, 6, 7, 8, 9}; ColumnWithNullableDecimal col_decimal{DT(55, 1), {}, DT(-24, 1), DT(40, 1), DT(-40, 1), DT(40, 1), {}, DT(55, 1), DT(0, 1)}; - ColumnWithNullableInt8 col_tinyint{1,2,3,{},{},0,0,-1,-2}; + ColumnWithNullableInt8 col_tinyint{1, 2, 3, {}, {}, 0, 0, -1, -2}; ColumnWithNullableInt16 col_smallint{2, 3, {}, {}, 0, -1, -2, 4, 0}; ColumnWithNullableInt32 col_int{4, {}, {}, 0, 123, -1, -1, 123, 4}; ColumnWithNullableInt64 col_bigint{2, 2, {}, 0, -1, {}, -1, 0, 123}; @@ -157,25 +163,25 @@ try std::vector projections; std::vector expect_cols; size_t test_num; - + { /// group by single column group_by_exprs = {{col(types_col_name[2])}, {col(types_col_name[3])}, {col(types_col_name[4])}, {col(types_col_name[5])}, {col(types_col_name[6])}, {col(types_col_name[7])}, {col(types_col_name[8])}, {col(types_col_name[9])}, {col(types_col_name[10])}}; projections = {{types_col_name[2]}, {types_col_name[3]}, {types_col_name[4]}, {types_col_name[5]}, {types_col_name[6]}, {types_col_name[7]}, {types_col_name[8]}, {types_col_name[9]}, {types_col_name[10]}}; expect_cols = { - {toNullableVec(types_col_name[2], ColumnWithNullableInt8{-1, 2, {}, 0, 1, 3, -2})}, /// select tinyint_ from test_db.types group by tinyint_; - {toNullableVec(types_col_name[3], ColumnWithNullableInt16{-1, 2, -2, {}, 0, 4, 3})}, /// select smallint_ from test_db.types group by smallint_; - {toNullableVec(types_col_name[4], ColumnWithNullableInt32{-1, {}, 4, 0, 123})}, /// select int_ from test_db.types group by int_; - {toNullableVec(types_col_name[5], ColumnWithNullableInt64{2, -1, 0, 123, {}})}, /// select bigint_ from test_db.types group by bigint_; - {toNullableVec(types_col_name[6], ColumnWithNullableFloat32{0, 4, 3.3, {}, 5.6, -0.1})}, /// select float_ from test_db.types group by float_; - {toNullableVec(types_col_name[7], ColumnWithNullableFloat64{0, {}, -1.2, 1.1, 1.2, 0.1})}, /// select double_ from test_db.types group by double_; - {toNullableVec(types_col_name[8], ColumnWithNullableMyDate{{}, 0, 300000, 1000000, 2000000})}, /// select date_ from test_db.types group by date_; - {toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{{}, 0, 1000000, 2000000, 3000000})}, /// select datetime_ from test_db.types group by datetime_; - {toNullableVec(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", "PINGCAP", "Shanghai"})}}; /// select string_ from test_db.types group by string_; + {toNullableVec(types_col_name[2], ColumnWithNullableInt8{-1, 2, {}, 0, 1, 3, -2})}, /// select tinyint_ from test_db.types group by tinyint_; + {toNullableVec(types_col_name[3], ColumnWithNullableInt16{-1, 2, -2, {}, 0, 4, 3})}, /// select smallint_ from test_db.types group by smallint_; + {toNullableVec(types_col_name[4], ColumnWithNullableInt32{-1, {}, 4, 0, 123})}, /// select int_ from test_db.types group by int_; + {toNullableVec(types_col_name[5], ColumnWithNullableInt64{2, -1, 0, 123, {}})}, /// select bigint_ from test_db.types group by bigint_; + {toNullableVec(types_col_name[6], ColumnWithNullableFloat32{0, 4, 3.3, {}, 5.6, -0.1})}, /// select float_ from test_db.types group by float_; + {toNullableVec(types_col_name[7], ColumnWithNullableFloat64{0, {}, -1.2, 1.1, 1.2, 0.1})}, /// select double_ from test_db.types group by double_; + {toNullableVec(types_col_name[8], ColumnWithNullableMyDate{{}, 0, 300000, 1000000, 2000000})}, /// select date_ from test_db.types group by date_; + {toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{{}, 0, 1000000, 2000000, 3000000})}, /// select datetime_ from test_db.types group by datetime_; + {toNullableVec(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", "PINGCAP", "Shanghai"})}}; /// select string_ from test_db.types group by string_; test_num = expect_cols.size(); ASSERT_EQ(group_by_exprs.size(), test_num); ASSERT_EQ(projections.size(), test_num); - + for (size_t i = 0; i < test_num; ++i) { request = buildDAGRequest(std::make_pair(db_name, table_types), {}, group_by_exprs[i], projections[i]); @@ -187,16 +193,22 @@ try /// group by two columns group_by_exprs = {COL_GROUP2(2, 6), COL_GROUP2(3, 9), COL_GROUP2(4, 7), COL_GROUP2(5, 10), COL_GROUP2(8, 9), COL_GROUP2(9, 10)}; projections = {COL_PROJ2(2, 6), COL_PROJ2(3, 9), COL_PROJ2(4, 7), COL_PROJ2(5, 10), COL_PROJ2(8, 9), COL_PROJ2(9, 10)}; - expect_cols = {{toNullableVec(types_col_name[2], ColumnWithNullableInt8{1, 2, {}, 3, 0, 0, -1, {}, -2}), + expect_cols = {/// select tinyint_, float_ from test_db.types group by tinyint_, float_; + {toNullableVec(types_col_name[2], ColumnWithNullableInt8{1, 2, {}, 3, 0, 0, -1, {}, -2}), toNullableVec(types_col_name[6], ColumnWithNullableFloat32{3.3, {}, 4, 0, -0.1, 5.6, -0.1, 3.3, {}})}, + /// select smallint_, datetime_ from test_db.types group by smallint_, datetime_; {toNullableVec(types_col_name[3], ColumnWithNullableInt16{2, 3, {}, {}, 0, -1, -2, 4}), toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, {}, 0, 2000000})}, + /// select int_, double_ from test_db.types group by int_, double_; {toNullableVec(types_col_name[4], ColumnWithNullableInt32{{}, 123, -1, 0, {}, 4, 4, 123}), toNullableVec(types_col_name[7], ColumnWithNullableFloat64{0, -1.2, {}, 1.1, 1.1, -1.2, 0.1, 1.2})}, + /// select bigint_, string_ from test_db.types group by bigint_, string_; {toNullableVec(types_col_name[5], ColumnWithNullableInt64{-1, 0, 0, 123, 2, {}, -1, 2}), toNullableVec(types_col_name[10], ColumnWithNullableString{{}, {}, "Shanghai", "Shanghai", {}, "PingCAP", "PINGCAP", "pingcap"})}, + /// select date_, datetime_ from test_db.types group by date_, datetime_; {toNullableVec(types_col_name[8], ColumnWithNullableMyDate{1000000, 2000000, {}, 300000, 1000000, 0, 2000000, {}}), toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000})}, + /// select datetime_, string_ from test_db.types group by datetime_, string_; {toNullableVec(types_col_name[9], ColumnWithNullableMyDateTime{2000000, 0, {}, 3000000, 1000000, 0, 2000000, 1000000}), toNullableVec(types_col_name[10], ColumnWithNullableString{{}, "pingcap", "PingCAP", {}, "PINGCAP", {}, "Shanghai", "Shanghai"})}}; test_num = expect_cols.size(); From 1bb7904cdeb16e51bedce1def785e2d3d447eadb Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 11:41:16 +0800 Subject: [PATCH 09/12] update --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index a94b9c50969..3cc76c1f912 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -230,8 +230,8 @@ TEST_F(ExecutorAggTestRunner, AggregationMaxAndMin) try { std::shared_ptr request; - auto agg_func0 = Max(col(col_name[0])); /// select max(age) from clerk group by country order by max(age) DESC limit 100; - auto agg_func1 = Max(col(col_name[3])); /// select max(salary) from clerk group by country, gender order by max(salary) DESC limit 100; + auto agg_func0 = Max(col(col_name[0])); /// select max(age) from clerk group by country; + auto agg_func1 = Max(col(col_name[3])); /// select max(salary) from clerk group by country, gender; auto group_by_expr0 = col(col_name[2]); auto group_by_expr10 = col(col_name[2]); From e831683807e14f3d3c3880bd1e0d8f9ae5cc549f Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 12:33:16 +0800 Subject: [PATCH 10/12] tweaking --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 3cc76c1f912..ebb7ab60648 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -105,7 +105,7 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest std::shared_ptr buildDAGRequest(std::pair src, MockAstVec agg_funcs, MockAstVec group_by_exprs, MockColumnNameVec proj) { /// We can filter the group by column with project operator. - /// project is applied to get single column for comparison + /// project is applied to get partial aggregation output, so that we can remove redundant outputs and compare results with less handwriting codes. return context.scan(src.first, src.second).aggregation(agg_funcs, group_by_exprs).project(proj).build(context); } @@ -115,8 +115,8 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest ASSERT_COLUMNS_EQ_UR(expect_columns, executeStreams(request, i)); } - size_t max_concurrency = 10; - size_t step = 2; + static const size_t max_concurrency = 10; + static const size_t step = 2; const String db_name{"test_db"}; From c1a18c883ed25af3ded891d4c88a8e95864c1245 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 17:26:24 +0800 Subject: [PATCH 11/12] update --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index ebb7ab60648..6e8d21111c4 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -303,7 +303,8 @@ try } CATCH -// TODO more aggregation functions... +// TODO support more type of min, max, count. +// support more aggregation functions: sum, forst_row, group_concat } // namespace tests } // namespace DB From 2b01a23b80c0ed8196c310853203b4a879932d78 Mon Sep 17 00:00:00 2001 From: xzhangxian1008 Date: Tue, 19 Jul 2022 17:38:46 +0800 Subject: [PATCH 12/12] Update dbms/src/Flash/tests/gtest_aggregation_executor.cpp Co-authored-by: SeaRise --- dbms/src/Flash/tests/gtest_aggregation_executor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp index 6e8d21111c4..53a6591ac96 100644 --- a/dbms/src/Flash/tests/gtest_aggregation_executor.cpp +++ b/dbms/src/Flash/tests/gtest_aggregation_executor.cpp @@ -42,7 +42,7 @@ class ExecutorAggTestRunner : public DB::tests::ExecutorTest using ColFloat64NullableType = std::optional::FieldType>; using ColMyDateNullableType = std::optional::FieldType>; using ColMyDateTimeNullableType = std::optional::FieldType>; - using ColDecimalNullableType = std::optional>::FieldType>; + using ColDecimalNullableType = std::optional::FieldType>; using ColUInt64Type = typename TypeTraits::FieldType; using ColumnWithNullableString = std::vector;