From 767844960683c9229d9461cf06dbd4fcc7e7e560 Mon Sep 17 00:00:00 2001 From: Roman Udovichenko Date: Thu, 28 Dec 2023 10:44:13 +0300 Subject: [PATCH] [yt provider] Don't omit YtMerge with KeepSorted setting (YQL-17413) (#755) * [yt provider] Don't omit YtMerge with KeepSorted setting * More fixes + tests --- .../yt/provider/yql_yt_logical_optimize.cpp | 12 +++-- .../provider/yql_yt_physical_finalizing.cpp | 6 +++ .../yt/provider/yql_yt_physical_optimize.cpp | 18 +++++++ .../sql/dq_file/part19/canondata/result.json | 22 +++++++++ .../tests/sql/sql2yql/canondata/result.json | 14 ++++++ .../sql/suites/optimizers/sorted_desc.txt | 10 ++++ .../suites/optimizers/sorted_desc.txt.attr | 47 +++++++++++++++++++ .../suites/optimizers/yql-17413-topsort.cfg | 2 + .../suites/optimizers/yql-17413-topsort.sql | 9 ++++ .../part19/canondata/result.json | 21 +++++++++ 10 files changed, 158 insertions(+), 3 deletions(-) create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt.attr create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.cfg create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.sql diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_logical_optimize.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_logical_optimize.cpp index 19632907f4f5..35ca38e8c0d7 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_logical_optimize.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_logical_optimize.cpp @@ -54,6 +54,7 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoUnorderedBase::Match, HNDL(Unordered)); AddHandler(0, &TCoAggregate::Match, HNDL(CountAggregate)); AddHandler(0, &TYtReadTable::Match, HNDL(ZeroSampleToZeroLimit)); + AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize)); AddHandler(1, &TCoFilterNullMembers::Match, HNDL(FilterNullMemebers)); AddHandler(1, &TCoSkipNullMembers::Match, HNDL(FilterNullMemebers)); @@ -75,8 +76,6 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase { AddHandler(2, &TCoEquiJoin::Match, HNDL(ConvertToCommonTypeForForcedMergeJoin)); AddHandler(2, &TCoShuffleByKeys::Match, HNDL(ShuffleByKeys)); - - AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize)); #undef HNDL } @@ -139,13 +138,20 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase { effectiveColumns.insert(column.Name); } + if (NYql::HasSetting(op.Settings().Ref(), EYtSettingType::KeepSorted)) { + for (size_t i = 0; i < rowSpec->SortedBy.size(); ++i) { + const bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second; + keepColumns = keepColumns || inserted; + } + } + if (!path.Ranges().Maybe()) { // add columns which are implicitly used by path.Ranges(), but not included in path.Columns(); const auto ranges = TYtRangesInfo(path.Ranges()); const size_t usedKeyPrefix = ranges.GetUsedKeyPrefixLength(); YQL_ENSURE(usedKeyPrefix <= rowSpec->SortedBy.size()); for (size_t i = 0; i < usedKeyPrefix; ++i) { - bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second; + const bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second; keepColumns = keepColumns || inserted; } } diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp index 16784b31aee6..935ea9e00d2a 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_physical_finalizing.cpp @@ -631,6 +631,12 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase { bool good = true; THashSet usedColumns; + if (NYql::HasSetting(*writer->Child(TYtTransientOpBase::idx_Settings), EYtSettingType::KeepSorted)) { + for (size_t i = 0; i < rowSpec.SortedBy.size(); ++i) { + usedColumns.insert(rowSpec.SortedBy[i]); + } + } + for (auto& item: x.second) { if (auto rawSection = std::get<1>(item)) { if (HasNonEmptyKeyFilter(TYtSection(rawSection))) { diff --git a/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp b/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp index 051a66cfc440..428913649c35 100644 --- a/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp +++ b/ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp @@ -6934,6 +6934,18 @@ class TYtPhysicalOptProposalTransformer : public TOptimizeTransformerBase { continue; } + if (NYql::HasSetting(innerMerge.Settings().Ref(), EYtSettingType::KeepSorted)) { + if (!AllOf(innerMergeSection.Paths(), [](const auto& path) { + auto op = path.Table().template Maybe().Operation(); + return op && (op.template Maybe() || (op.Raw()->HasResult() && op.Raw()->GetResult().IsWorld())); + })) { + continue; + } + } + if (hasTakeSkip && AnyOf(innerMergeSection.Paths(), [](const auto& path) { return !path.Ranges().template Maybe(); })) { + continue; + } + const bool unordered = IsUnorderedOutput(path.Table().Cast()); auto mergeOutRowSpec = TYqlRowSpecInfo(innerMerge.Output().Item(0).RowSpec()); if (innerMergeSection.Paths().Size() > 1) { @@ -7461,6 +7473,12 @@ class TYtPhysicalOptProposalTransformer : public TOptimizeTransformerBase { if (NYql::HasNonEmptyKeyFilter(section)) { return node; } + if (NYql::HasSetting(merge.Settings().Ref(), EYtSettingType::KeepSorted)) { + auto op = path.Table().Maybe().Operation().Cast(); + if (!(op.Ref().HasResult() && op.Ref().GetResult().Type() == TExprNode::World || op.Maybe())) { + return node; + } + } TYtOutTableInfo outTableInfo(merge.Output().Item(0)); if (!tableInfo->RowSpec->CompareSortness(*outTableInfo.RowSpec)) { return node; diff --git a/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json index 8d939fd964a0..abb3de896cd6 100644 --- a/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json @@ -1713,6 +1713,28 @@ } ], "test.test[optimizers-unused_columns_group_one_of_multi--Results]": [], + "test.test[optimizers-yql-17413-topsort--Analyze]": [ + { + "checksum": "7e96e5445456cbfd2e3622966d474857", + "size": 8796, + "uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Analyze_/plan.txt" + } + ], + "test.test[optimizers-yql-17413-topsort--Debug]": [ + { + "checksum": "8958442943f25fd8531144117a7279ba", + "size": 3948, + "uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Debug_/opt.yql_patched" + } + ], + "test.test[optimizers-yql-17413-topsort--Plan]": [ + { + "checksum": "7e96e5445456cbfd2e3622966d474857", + "size": 8796, + "uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Plan_/plan.txt" + } + ], + "test.test[optimizers-yql-17413-topsort--Results]": [], "test.test[optimizers-yql-9297_publish_ytcopy--Analyze]": [ { "checksum": "ea9a6185cef1a4220dfde9576ddc292c", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 98d2155d4cdf..751d5bcfa332 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -9988,6 +9988,13 @@ "uri": "https://{canondata_backend}/1936997/00f46808be87e2ae2d4ac3ac45675b659c5ace45/resource.tar.gz#test_sql2yql.test_optimizers-yql-16134_/sql.yql" } ], + "test_sql2yql.test[optimizers-yql-17413-topsort]": [ + { + "checksum": "aa2443a9da120b817151ed97e57ee43c", + "size": 3527, + "uri": "https://{canondata_backend}/1781765/7c17af1b33d2fbf006a6d558af8cb8ec9ef4e4a9/resource.tar.gz#test_sql2yql.test_optimizers-yql-17413-topsort_/sql.yql" + } + ], "test_sql2yql.test[optimizers-yql-2171_aggregate_desc_sort_and_extract]": [ { "checksum": "e46724f353c724da2d05f34ac86ebc12", @@ -27229,6 +27236,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_optimizers-yql-16134_/formatted.sql" } ], + "test_sql_format.test[optimizers-yql-17413-topsort]": [ + { + "checksum": "ecd0eaf6cba2da258637dba1c3c1ccec", + "size": 326, + "uri": "https://{canondata_backend}/1931696/e0f966d90e1f131295800c00caf45bfec971964e/resource.tar.gz#test_sql_format.test_optimizers-yql-17413-topsort_/formatted.sql" + } + ], "test_sql_format.test[optimizers-yql-2171_aggregate_desc_sort_and_extract]": [ { "checksum": "ea7ad8b12dbf78644e2a7ac94e10159b", diff --git a/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt b/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt new file mode 100644 index 000000000000..235fc7cd495c --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt @@ -0,0 +1,10 @@ +{"_yql_column_0"="\xE0\xC6\xCE\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="911";"subkey"="2";"value"="kkk"}; +{"_yql_column_0"="\xE0\xC8\xC9\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="761";"subkey"="6";"value"="ccc"}; +{"_yql_column_0"="\xE0\xCA\xCD\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="527";"subkey"="4";"value"="bbb"}; +{"_yql_column_0"="\xE0\xCD\xCF\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="200";"subkey"="7";"value"="qqq"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="1";"value"="aaa"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="3";"value"="iii"}; +{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="8";"value"="zzz"}; +{"_yql_column_0"="\xE0\xCF\xC8\xCA\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="075";"subkey"="1";"value"="abc"}; +{"_yql_column_0"="\xE0\xCF\xCC\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="037";"subkey"="5";"value"="ddd"}; +{"_yql_column_0"="\xE0\xCF\xCD\xCC\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="023";"subkey"="3";"value"="aaa"}; diff --git a/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt.attr b/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt.attr new file mode 100644 index 000000000000..5a6ca5761ed6 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt.attr @@ -0,0 +1,47 @@ +{ + "_yql_row_spec" = { + "SortMembers" = [ + "key" + ]; + "SortDirections" = [ + 0 + ]; + "UniqueKeys" = %false; + "SortedByTypes" = [ + [ + "DataType"; + "String" + ] + ]; + "StrictSchema" = %true; + "Type" = [ + "StructType"; + [ + [ + "key"; + [ + "DataType"; + "String" + ] + ]; + [ + "subkey"; + [ + "DataType"; + "String" + ] + ]; + [ + "value"; + [ + "DataType"; + "String" + ] + ] + ] + ]; + "SortedBy" = [ + "_yql_column_0" + ] + } +} \ No newline at end of file diff --git a/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.cfg b/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.cfg new file mode 100644 index 000000000000..c85115a8e43c --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.cfg @@ -0,0 +1,2 @@ +in Input sorted_desc.txt +res result.txt diff --git a/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.sql b/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.sql new file mode 100644 index 000000000000..3e291d25195c --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/yql-17413-topsort.sql @@ -0,0 +1,9 @@ +-- ignore runonopt plan diff, skip force_blocks +USE plato; + +$filtered = select * from Input where value != "xxx"; + +select distinct(subkey) as subkey +from (select * from $filtered order by key desc limit 3); + +select sum(cast(subkey as int32)) as c from $filtered; diff --git a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json index 8a7b1988eda2..aa475e74683e 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part19/canondata/result.json @@ -1490,6 +1490,27 @@ "uri": "https://{canondata_backend}/1937027/16b7289b1b8f5fdff728155d836fa2b238949b2d/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Results_/results.txt" } ], + "test.test[optimizers-yql-17413-topsort--Debug]": [ + { + "checksum": "a6b253a26321a218f3e46aa86dbe4c50", + "size": 5712, + "uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Debug_/opt.yql" + } + ], + "test.test[optimizers-yql-17413-topsort--Plan]": [ + { + "checksum": "0372943ac566f8ee02229da75bd4c1d2", + "size": 10305, + "uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Plan_/plan.txt" + } + ], + "test.test[optimizers-yql-17413-topsort--Results]": [ + { + "checksum": "d800fdb51897877b5908555f9f9499db", + "size": 1661, + "uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Results_/results.txt" + } + ], "test.test[optimizers-yql-9297_publish_ytcopy--Debug]": [ { "checksum": "4dff550757ceeaceb30f02aae54b5d63",