Skip to content

Commit

Permalink
[yt provider] Don't omit YtMerge with KeepSorted setting (YQL-17413) (y…
Browse files Browse the repository at this point in the history
…db-platform#755)

* [yt provider] Don't omit YtMerge with KeepSorted setting

* More fixes + tests
  • Loading branch information
rvu1024 authored and adameat committed Dec 29, 2023
1 parent abf23c0 commit 7678449
Show file tree
Hide file tree
Showing 10 changed files with 158 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase {
AddHandler(0, &TCoUnorderedBase::Match, HNDL(Unordered));
AddHandler(0, &TCoAggregate::Match, HNDL(CountAggregate));
AddHandler(0, &TYtReadTable::Match, HNDL(ZeroSampleToZeroLimit));
AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize));

AddHandler(1, &TCoFilterNullMembers::Match, HNDL(FilterNullMemebers<TCoFilterNullMembers>));
AddHandler(1, &TCoSkipNullMembers::Match, HNDL(FilterNullMemebers<TCoSkipNullMembers>));
Expand All @@ -75,8 +76,6 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase {

AddHandler(2, &TCoEquiJoin::Match, HNDL(ConvertToCommonTypeForForcedMergeJoin));
AddHandler(2, &TCoShuffleByKeys::Match, HNDL(ShuffleByKeys));

AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize));
#undef HNDL
}

Expand Down Expand Up @@ -139,13 +138,20 @@ class TYtLogicalOptProposalTransformer : public TOptimizeTransformerBase {
effectiveColumns.insert(column.Name);
}

if (NYql::HasSetting(op.Settings().Ref(), EYtSettingType::KeepSorted)) {
for (size_t i = 0; i < rowSpec->SortedBy.size(); ++i) {
const bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second;
keepColumns = keepColumns || inserted;
}
}

if (!path.Ranges().Maybe<TCoVoid>()) {
// add columns which are implicitly used by path.Ranges(), but not included in path.Columns();
const auto ranges = TYtRangesInfo(path.Ranges());
const size_t usedKeyPrefix = ranges.GetUsedKeyPrefixLength();
YQL_ENSURE(usedKeyPrefix <= rowSpec->SortedBy.size());
for (size_t i = 0; i < usedKeyPrefix; ++i) {
bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second;
const bool inserted = effectiveColumns.insert(rowSpec->SortedBy[i]).second;
keepColumns = keepColumns || inserted;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,12 @@ class TYtPhysicalFinalizingTransformer : public TSyncTransformerBase {

bool good = true;
THashSet<TString> usedColumns;
if (NYql::HasSetting(*writer->Child(TYtTransientOpBase::idx_Settings), EYtSettingType::KeepSorted)) {
for (size_t i = 0; i < rowSpec.SortedBy.size(); ++i) {
usedColumns.insert(rowSpec.SortedBy[i]);
}
}

for (auto& item: x.second) {
if (auto rawSection = std::get<1>(item)) {
if (HasNonEmptyKeyFilter(TYtSection(rawSection))) {
Expand Down
18 changes: 18 additions & 0 deletions ydb/library/yql/providers/yt/provider/yql_yt_physical_optimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6934,6 +6934,18 @@ class TYtPhysicalOptProposalTransformer : public TOptimizeTransformerBase {
continue;
}

if (NYql::HasSetting(innerMerge.Settings().Ref(), EYtSettingType::KeepSorted)) {
if (!AllOf(innerMergeSection.Paths(), [](const auto& path) {
auto op = path.Table().template Maybe<TYtOutput>().Operation();
return op && (op.template Maybe<TYtTouch>() || (op.Raw()->HasResult() && op.Raw()->GetResult().IsWorld()));
})) {
continue;
}
}
if (hasTakeSkip && AnyOf(innerMergeSection.Paths(), [](const auto& path) { return !path.Ranges().template Maybe<TCoVoid>(); })) {
continue;
}

const bool unordered = IsUnorderedOutput(path.Table().Cast<TYtOutput>());
auto mergeOutRowSpec = TYqlRowSpecInfo(innerMerge.Output().Item(0).RowSpec());
if (innerMergeSection.Paths().Size() > 1) {
Expand Down Expand Up @@ -7461,6 +7473,12 @@ class TYtPhysicalOptProposalTransformer : public TOptimizeTransformerBase {
if (NYql::HasNonEmptyKeyFilter(section)) {
return node;
}
if (NYql::HasSetting(merge.Settings().Ref(), EYtSettingType::KeepSorted)) {
auto op = path.Table().Maybe<TYtOutput>().Operation().Cast();
if (!(op.Ref().HasResult() && op.Ref().GetResult().Type() == TExprNode::World || op.Maybe<TYtTouch>())) {
return node;
}
}
TYtOutTableInfo outTableInfo(merge.Output().Item(0));
if (!tableInfo->RowSpec->CompareSortness(*outTableInfo.RowSpec)) {
return node;
Expand Down
22 changes: 22 additions & 0 deletions ydb/library/yql/tests/sql/dq_file/part19/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -1713,6 +1713,28 @@
}
],
"test.test[optimizers-unused_columns_group_one_of_multi--Results]": [],
"test.test[optimizers-yql-17413-topsort--Analyze]": [
{
"checksum": "7e96e5445456cbfd2e3622966d474857",
"size": 8796,
"uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Analyze_/plan.txt"
}
],
"test.test[optimizers-yql-17413-topsort--Debug]": [
{
"checksum": "8958442943f25fd8531144117a7279ba",
"size": 3948,
"uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Debug_/opt.yql_patched"
}
],
"test.test[optimizers-yql-17413-topsort--Plan]": [
{
"checksum": "7e96e5445456cbfd2e3622966d474857",
"size": 8796,
"uri": "https://{canondata_backend}/1903885/35226bfda385a540a941f0d8ba61073fa4188835/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Plan_/plan.txt"
}
],
"test.test[optimizers-yql-17413-topsort--Results]": [],
"test.test[optimizers-yql-9297_publish_ytcopy--Analyze]": [
{
"checksum": "ea9a6185cef1a4220dfde9576ddc292c",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -9988,6 +9988,13 @@
"uri": "https://{canondata_backend}/1936997/00f46808be87e2ae2d4ac3ac45675b659c5ace45/resource.tar.gz#test_sql2yql.test_optimizers-yql-16134_/sql.yql"
}
],
"test_sql2yql.test[optimizers-yql-17413-topsort]": [
{
"checksum": "aa2443a9da120b817151ed97e57ee43c",
"size": 3527,
"uri": "https://{canondata_backend}/1781765/7c17af1b33d2fbf006a6d558af8cb8ec9ef4e4a9/resource.tar.gz#test_sql2yql.test_optimizers-yql-17413-topsort_/sql.yql"
}
],
"test_sql2yql.test[optimizers-yql-2171_aggregate_desc_sort_and_extract]": [
{
"checksum": "e46724f353c724da2d05f34ac86ebc12",
Expand Down Expand Up @@ -27229,6 +27236,13 @@
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_optimizers-yql-16134_/formatted.sql"
}
],
"test_sql_format.test[optimizers-yql-17413-topsort]": [
{
"checksum": "ecd0eaf6cba2da258637dba1c3c1ccec",
"size": 326,
"uri": "https://{canondata_backend}/1931696/e0f966d90e1f131295800c00caf45bfec971964e/resource.tar.gz#test_sql_format.test_optimizers-yql-17413-topsort_/formatted.sql"
}
],
"test_sql_format.test[optimizers-yql-2171_aggregate_desc_sort_and_extract]": [
{
"checksum": "ea7ad8b12dbf78644e2a7ac94e10159b",
Expand Down
10 changes: 10 additions & 0 deletions ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{"_yql_column_0"="\xE0\xC6\xCE\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="911";"subkey"="2";"value"="kkk"};
{"_yql_column_0"="\xE0\xC8\xC9\xCE\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="761";"subkey"="6";"value"="ccc"};
{"_yql_column_0"="\xE0\xCA\xCD\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="527";"subkey"="4";"value"="bbb"};
{"_yql_column_0"="\xE0\xCD\xCF\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="200";"subkey"="7";"value"="qqq"};
{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="1";"value"="aaa"};
{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="3";"value"="iii"};
{"_yql_column_0"="\xE0\xCE\xCA\xCF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="150";"subkey"="8";"value"="zzz"};
{"_yql_column_0"="\xE0\xCF\xC8\xCA\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="075";"subkey"="1";"value"="abc"};
{"_yql_column_0"="\xE0\xCF\xCC\xC8\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="037";"subkey"="5";"value"="ddd"};
{"_yql_column_0"="\xE0\xCF\xCD\xCC\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC";"key"="023";"subkey"="3";"value"="aaa"};
47 changes: 47 additions & 0 deletions ydb/library/yql/tests/sql/suites/optimizers/sorted_desc.txt.attr
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
{
"_yql_row_spec" = {
"SortMembers" = [
"key"
];
"SortDirections" = [
0
];
"UniqueKeys" = %false;
"SortedByTypes" = [
[
"DataType";
"String"
]
];
"StrictSchema" = %true;
"Type" = [
"StructType";
[
[
"key";
[
"DataType";
"String"
]
];
[
"subkey";
[
"DataType";
"String"
]
];
[
"value";
[
"DataType";
"String"
]
]
]
];
"SortedBy" = [
"_yql_column_0"
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
in Input sorted_desc.txt
res result.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
-- ignore runonopt plan diff, skip force_blocks
USE plato;

$filtered = select * from Input where value != "xxx";

select distinct(subkey) as subkey
from (select * from $filtered order by key desc limit 3);

select sum(cast(subkey as int32)) as c from $filtered;
Original file line number Diff line number Diff line change
Expand Up @@ -1490,6 +1490,27 @@
"uri": "https://{canondata_backend}/1937027/16b7289b1b8f5fdff728155d836fa2b238949b2d/resource.tar.gz#test.test_optimizers-unused_columns_group_one_of_multi--Results_/results.txt"
}
],
"test.test[optimizers-yql-17413-topsort--Debug]": [
{
"checksum": "a6b253a26321a218f3e46aa86dbe4c50",
"size": 5712,
"uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Debug_/opt.yql"
}
],
"test.test[optimizers-yql-17413-topsort--Plan]": [
{
"checksum": "0372943ac566f8ee02229da75bd4c1d2",
"size": 10305,
"uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Plan_/plan.txt"
}
],
"test.test[optimizers-yql-17413-topsort--Results]": [
{
"checksum": "d800fdb51897877b5908555f9f9499db",
"size": 1661,
"uri": "https://{canondata_backend}/1784117/b3e9aeb7a41a8fdbf9c6bfb4a2025eafe94d4a19/resource.tar.gz#test.test_optimizers-yql-17413-topsort--Results_/results.txt"
}
],
"test.test[optimizers-yql-9297_publish_ytcopy--Debug]": [
{
"checksum": "4dff550757ceeaceb30f02aae54b5d63",
Expand Down

0 comments on commit 7678449

Please sign in to comment.