diff --git a/ydb/library/yql/core/type_ann/type_ann_core.cpp b/ydb/library/yql/core/type_ann/type_ann_core.cpp index 654ec85e42ec..9571bd75a8f3 100644 --- a/ydb/library/yql/core/type_ann/type_ann_core.cpp +++ b/ydb/library/yql/core/type_ann/type_ann_core.cpp @@ -12715,7 +12715,7 @@ template ColumnOrderFunctions["Merge"] = ColumnOrderFunctions["Extend"] = &OrderForMergeExtend; ColumnOrderFunctions[RightName] = &OrderFromFirst; - ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll; + ColumnOrderFunctions["UnionMerge"] = ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll; ColumnOrderFunctions["Union"] = &OrderForUnionAll; ColumnOrderFunctions["EquiJoin"] = &OrderForEquiJoin; ColumnOrderFunctions["CalcOverWindow"] = &OrderForCalcOverWindow; diff --git a/ydb/library/yql/sql/pg/pg_sql.cpp b/ydb/library/yql/sql/pg/pg_sql.cpp index a599e8dc242b..0896b8fedb56 100644 --- a/ydb/library/yql/sql/pg/pg_sql.cpp +++ b/ydb/library/yql/sql/pg/pg_sql.cpp @@ -1520,12 +1520,9 @@ class TConverter : public IPGParseEvents { } } else if (NodeTag(r->val) == T_FuncCall) { auto func = CAST_NODE(FuncCall, r->val); - TVector names; - if (!ExtractFuncName(func, names)) { + if (!ExtractFuncName(func, name, nullptr)) { return nullptr; } - - name = names.back(); } } @@ -3427,12 +3424,13 @@ class TConverter : public IPGParseEvents { return {}; } - auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true); + bool injectRead = false; + auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true, injectRead); if (!func) { return {}; } - return TFromDesc{ func, alias, colnames, false }; + return TFromDesc{ func, alias, colnames, injectRead }; } TMaybe ParseRangeSubselect(const RangeSubselect* value) { @@ -3723,7 +3721,8 @@ class TConverter : public IPGParseEvents { return ParseNullTestExpr(CAST_NODE(NullTest, node), settings); } case T_FuncCall: { - return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false); + bool injectRead; + return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false, injectRead); } case T_A_ArrayExpr: { return ParseAArrayExpr(CAST_NODE(A_ArrayExpr, node), settings); @@ -4009,7 +4008,124 @@ class TConverter : public IPGParseEvents { return L(A("PgSubLink"), QA(linkType), L(A("Void")), L(A("Void")), rowTest, L(A("lambda"), QL(), select)); } - TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction) { + TAstNode* ParseTableRangeFunction(const TString& name, const TString& schema, List* args) { + auto source = BuildClusterSinkOrSourceExpression(false, schema); + if (!source) { + return nullptr; + } + + TVector argStrs; + for (int i = 0; i < ListLength(args); ++i) { + auto arg = ListNodeNth(args, i); + if (NodeTag(arg) == T_A_Const && (NodeTag(CAST_NODE(A_Const, arg)->val) == T_String)) { + TString rawStr = StrVal(CAST_NODE(A_Const, arg)->val); + argStrs.push_back(rawStr); + } else { + AddError("Expected String argument for table function"); + return nullptr; + } + } + + if (argStrs.empty()) { + AddError("Expected at least argument for table function"); + return nullptr; + } + + TAstNode* key; + auto lowerName = to_lower(name); + auto options = QL(); + if (lowerName == "concat") { + TVector concatArgs; + concatArgs.push_back(A("MrTableConcat")); + for (const auto& s : argStrs) { + concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s))))); + } + + key = VL(concatArgs); + } else if (lowerName == "concat_view") { + if (argStrs.size() % 2 != 0) { + AddError("Expected sequence of pairs of table and view for concat_view"); + return nullptr; + } + + TVector concatArgs; + concatArgs.push_back(A("MrTableConcat")); + for (ui32 i = 0; i < argStrs.size(); i += 2) { + concatArgs.push_back(L(A("Key"), + QL(QA("table"),L(A("String"), QAX(argStrs[i]))), + QL(QA("view"),L(A("String"), QAX(argStrs[i + 1]))))); + } + + key = VL(concatArgs); + } else if (lowerName == "range") { + if (argStrs.size() > 5) { + AddError("Too many arguments"); + return nullptr; + } + + options = QL(QL(QA("ignorenonexisting"))); + TAstNode* expr; + if (argStrs.size() == 1) { + expr = L(A("Bool"),QA("true")); + } else if (argStrs.size() == 2) { + expr = L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))); + } else { + expr = L(A("And"), + L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))), + L(A("<="),A("item"),L(A("String"),QAX(argStrs[2]))) + ); + } + + auto lambda = L(A("lambda"), QL(A("item")), expr); + auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 4 ? "" : argStrs[3])); + if (argStrs.size() < 5) { + key = L(A("Key"), QL(QA("table"),range)); + } else { + key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[4])))); + } + } else if (lowerName == "regexp" || lowerName == "like") { + if (argStrs.size() < 2 || argStrs.size() > 4) { + AddError("Expected from 2 to 4 arguments"); + return nullptr; + } + + options = QL(QL(QA("ignorenonexisting"))); + TAstNode* expr; + if (lowerName == "regexp") { + expr = L(A("Apply"),L(A("Udf"),QA("Re2.Grep"), + QL(L(A("String"),QAX(argStrs[1])),L(A("Null")))), + A("item")); + } else { + expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"), + QL(L(A("Apply"), + L(A("Udf"), QA("Re2.PatternFromLike")), + L(A("String"),QAX(argStrs[1]))),L(A("Null")))), + A("item")); + } + + auto lambda = L(A("lambda"), QL(A("item")), expr); + auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 3 ? "" : argStrs[2])); + if (argStrs.size() < 4) { + key = L(A("Key"), QL(QA("table"),range)); + } else { + key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[3])))); + } + } else { + AddError(TStringBuilder() << "Unknown table function: " << name); + return nullptr; + } + + return L( + A("Read!"), + A("world"), + source, + key, + L(A("Void")), + options + ); + } + + TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction, bool& injectRead) { AT_LOCATION(value); if (ListLength(value->agg_order) > 0) { AddError("FuncCall: unsupported agg_order"); @@ -4052,12 +4168,17 @@ class TConverter : public IPGParseEvents { } } - TVector names; - if (!ExtractFuncName(value, names)) { + TString name; + TString schema; + if (!ExtractFuncName(value, name, rangeFunction ? &schema : nullptr)) { return nullptr; } - auto name = names.back(); + if (rangeFunction && schema != "pg_catalog") { + injectRead = true; + return ParseTableRangeFunction(name, schema, value->args); + } + if (name == "shobj_description" || name == "obj_description") { AddWarning(TIssuesIds::PG_COMPAT, name + " function forced to NULL"); return L(A("Null")); @@ -4159,7 +4280,8 @@ class TConverter : public IPGParseEvents { return VL(args.data(), args.size()); } - bool ExtractFuncName(const FuncCall* value, TVector& names) { + bool ExtractFuncName(const FuncCall* value, TString& name, TString* schemaName) { + TVector names; for (int i = 0; i < ListLength(value->funcname); ++i) { auto x = ListNodeNth(value->funcname, i); if (NodeTag(x) != T_String) { @@ -4180,11 +4302,18 @@ class TConverter : public IPGParseEvents { return false; } - if (names.size() == 2 && names[0] != "pg_catalog") { - AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]); - return false; + if (names.size() == 2) { + if (!schemaName && names[0] != "pg_catalog") { + AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]); + return false; + } + + if (schemaName) { + *schemaName = names[0]; + } } + name = names.back(); return true; } diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index 1e901fa579bb..5c2c84a6b293 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -2288,6 +2288,28 @@ } ], "test.test[pg-sublink_having_any-default.txt-Results]": [], + "test.test[pg-table_func-default.txt-Analyze]": [ + { + "checksum": "90f90cb0bb8d60304471e5cf9a37436a", + "size": 22788, + "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Analyze_/plan.txt" + } + ], + "test.test[pg-table_func-default.txt-Debug]": [ + { + "checksum": "009e570dc4b46891c5263130b7e90036", + "size": 6644, + "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched" + } + ], + "test.test[pg-table_func-default.txt-Plan]": [ + { + "checksum": "90f90cb0bb8d60304471e5cf9a37436a", + "size": 22788, + "uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt" + } + ], + "test.test[pg-table_func-default.txt-Results]": [], "test.test[pg-tpcds-q20-default.txt-Analyze]": [ { "checksum": "212be881133a20b5b73ef1250dbeda51", diff --git a/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json b/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json index a0037920143c..6488a67d02de 100644 --- a/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json +++ b/ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json @@ -2295,6 +2295,20 @@ "uri": "https://{canondata_backend}/1775319/3515b86fb929979a6751f93bd43a0291eaa01262/resource.tar.gz#test.test_pg-sublink_projection_exists_corr-default.txt-Plan_/plan.txt" } ], + "test.test[pg-table_func-default.txt-Debug]": [ + { + "checksum": "f58d79752c5632a904d7c675fd2cd887", + "size": 6681, + "uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched" + } + ], + "test.test[pg-table_func-default.txt-Plan]": [ + { + "checksum": "95e2fb9330b8431fa9d166b01b6a47b0", + "size": 19319, + "uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt" + } + ], "test.test[pg-tpcds-q07-default.txt-Debug]": [ { "checksum": "f61d3822f18e6a66d0991534554f20fb", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index b0e06b3a4857..1febcffbf951 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -14076,6 +14076,13 @@ "uri": "https://{canondata_backend}/1881367/79a71c1478c556da1931a7565c12bdd14cc63567/resource.tar.gz#test_sql2yql.test_pg-sublink_where_in_corr_/sql.yql" } ], + "test_sql2yql.test[pg-table_func]": [ + { + "checksum": "52fc030d0a5ec71d08efd26d9f101c65", + "size": 8198, + "uri": "https://{canondata_backend}/1784826/4a52e4f284dee1aa5ddb5ef05566fbf6d624ec38/resource.tar.gz#test_sql2yql.test_pg-table_func_/sql.yql" + } + ], "test_sql2yql.test[pg-tpcds-q01]": [ { "checksum": "d7a119a877ea0e8b9211601d372e99b9", diff --git a/ydb/library/yql/tests/sql/suites/pg/table_func.sql b/ydb/library/yql/tests/sql/suites/pg/table_func.sql new file mode 100644 index 000000000000..3d481c9cf891 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/pg/table_func.sql @@ -0,0 +1,15 @@ +--!syntax_pg +select count(*) from plato.concat('Input','Input'); +select count(*) from plato.concat_view('Input','raw','Input','raw'); +select count(*) from plato.range(''); +select count(*) from plato.range('','A'); +select count(*) from plato.range('','A','Z'); +select count(*) from plato.range('','A','Z',''); +select count(*) from plato.range('','A','Z','','raw'); +select count(*) from plato.regexp('','Inpu.?'); +select count(*) from plato.regexp('','Inpu.?',''); +select count(*) from plato.regexp('','Inpu.?','','raw'); +select count(*) from plato.like('','Inpu%'); +select count(*) from plato.like('','Inpu%',''); +select count(*) from plato.like('','Inpu%','','raw'); + diff --git a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json index 1aade14c399e..4ef8f8834a22 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json @@ -2042,6 +2042,27 @@ "uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_pg-sublink_having_any-default.txt-Results_/results.txt" } ], + "test.test[pg-table_func-default.txt-Debug]": [ + { + "checksum": "afed4824bc574f8c4d4470e01e377627", + "size": 4991, + "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql" + } + ], + "test.test[pg-table_func-default.txt-Plan]": [ + { + "checksum": "bf2b0c772eaf69c15399605d7fbd7b0e", + "size": 14773, + "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt" + } + ], + "test.test[pg-table_func-default.txt-Results]": [ + { + "checksum": "db24edd3094d41f02121a7b1f3629af3", + "size": 9490, + "uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Results_/results.txt" + } + ], "test.test[pg-tpcds-q20-default.txt-Debug]": [ { "checksum": "3d98e10d734329d04d97423b4026d52d",