Skip to content

Commit

Permalink
Support of table functions in pg syntax (#9040)
Browse files Browse the repository at this point in the history
  • Loading branch information
vitstn authored Sep 10, 2024
1 parent f9cd248 commit a8fb87e
Show file tree
Hide file tree
Showing 7 changed files with 224 additions and 16 deletions.
2 changes: 1 addition & 1 deletion ydb/library/yql/core/type_ann/type_ann_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12715,7 +12715,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>

ColumnOrderFunctions["Merge"] = ColumnOrderFunctions["Extend"] = &OrderForMergeExtend;
ColumnOrderFunctions[RightName] = &OrderFromFirst;
ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
ColumnOrderFunctions["UnionMerge"] = ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
ColumnOrderFunctions["Union"] = &OrderForUnionAll;
ColumnOrderFunctions["EquiJoin"] = &OrderForEquiJoin;
ColumnOrderFunctions["CalcOverWindow"] = &OrderForCalcOverWindow;
Expand Down
159 changes: 144 additions & 15 deletions ydb/library/yql/sql/pg/pg_sql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1520,12 +1520,9 @@ class TConverter : public IPGParseEvents {
}
} else if (NodeTag(r->val) == T_FuncCall) {
auto func = CAST_NODE(FuncCall, r->val);
TVector<TString> names;
if (!ExtractFuncName(func, names)) {
if (!ExtractFuncName(func, name, nullptr)) {
return nullptr;
}

name = names.back();
}
}

Expand Down Expand Up @@ -3427,12 +3424,13 @@ class TConverter : public IPGParseEvents {
return {};
}

auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true);
bool injectRead = false;
auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true, injectRead);
if (!func) {
return {};
}

return TFromDesc{ func, alias, colnames, false };
return TFromDesc{ func, alias, colnames, injectRead };
}

TMaybe<TFromDesc> ParseRangeSubselect(const RangeSubselect* value) {
Expand Down Expand Up @@ -3723,7 +3721,8 @@ class TConverter : public IPGParseEvents {
return ParseNullTestExpr(CAST_NODE(NullTest, node), settings);
}
case T_FuncCall: {
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false);
bool injectRead;
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false, injectRead);
}
case T_A_ArrayExpr: {
return ParseAArrayExpr(CAST_NODE(A_ArrayExpr, node), settings);
Expand Down Expand Up @@ -4009,7 +4008,124 @@ class TConverter : public IPGParseEvents {
return L(A("PgSubLink"), QA(linkType), L(A("Void")), L(A("Void")), rowTest, L(A("lambda"), QL(), select));
}

TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction) {
TAstNode* ParseTableRangeFunction(const TString& name, const TString& schema, List* args) {
auto source = BuildClusterSinkOrSourceExpression(false, schema);
if (!source) {
return nullptr;
}

TVector<TString> argStrs;
for (int i = 0; i < ListLength(args); ++i) {
auto arg = ListNodeNth(args, i);
if (NodeTag(arg) == T_A_Const && (NodeTag(CAST_NODE(A_Const, arg)->val) == T_String)) {
TString rawStr = StrVal(CAST_NODE(A_Const, arg)->val);
argStrs.push_back(rawStr);
} else {
AddError("Expected String argument for table function");
return nullptr;
}
}

if (argStrs.empty()) {
AddError("Expected at least one argument for table function");
return nullptr;
}

TAstNode* key;
auto lowerName = to_lower(name);
auto options = QL();
if (lowerName == "concat") {
TVector<TAstNode*> concatArgs;
concatArgs.push_back(A("MrTableConcat"));
for (const auto& s : argStrs) {
concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s)))));
}

key = VL(concatArgs);
} else if (lowerName == "concat_view") {
if (argStrs.size() % 2 != 0) {
AddError("Expected sequence of pairs of table and view for concat_view");
return nullptr;
}

TVector<TAstNode*> concatArgs;
concatArgs.push_back(A("MrTableConcat"));
for (ui32 i = 0; i < argStrs.size(); i += 2) {
concatArgs.push_back(L(A("Key"),
QL(QA("table"),L(A("String"), QAX(argStrs[i]))),
QL(QA("view"),L(A("String"), QAX(argStrs[i + 1])))));
}

key = VL(concatArgs);
} else if (lowerName == "range") {
if (argStrs.size() > 5) {
AddError("Too many arguments");
return nullptr;
}

options = QL(QL(QA("ignorenonexisting")));
TAstNode* expr;
if (argStrs.size() == 1) {
expr = L(A("Bool"),QA("true"));
} else if (argStrs.size() == 2) {
expr = L(A(">="),A("item"),L(A("String"),QAX(argStrs[1])));
} else {
expr = L(A("And"),
L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))),
L(A("<="),A("item"),L(A("String"),QAX(argStrs[2])))
);
}

auto lambda = L(A("lambda"), QL(A("item")), expr);
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 4 ? "" : argStrs[3]));
if (argStrs.size() < 5) {
key = L(A("Key"), QL(QA("table"),range));
} else {
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[4]))));
}
} else if (lowerName == "regexp" || lowerName == "like") {
if (argStrs.size() < 2 || argStrs.size() > 4) {
AddError("Expected from 2 to 4 arguments");
return nullptr;
}

options = QL(QL(QA("ignorenonexisting")));
TAstNode* expr;
if (lowerName == "regexp") {
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Grep"),
QL(L(A("String"),QAX(argStrs[1])),L(A("Null")))),
A("item"));
} else {
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"),
QL(L(A("Apply"),
L(A("Udf"), QA("Re2.PatternFromLike")),
L(A("String"),QAX(argStrs[1]))),L(A("Null")))),
A("item"));
}

auto lambda = L(A("lambda"), QL(A("item")), expr);
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 3 ? "" : argStrs[2]));
if (argStrs.size() < 4) {
key = L(A("Key"), QL(QA("table"),range));
} else {
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[3]))));
}
} else {
AddError(TStringBuilder() << "Unknown table function: " << name);
return nullptr;
}

return L(
A("Read!"),
A("world"),
source,
key,
L(A("Void")),
options
);
}

TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction, bool& injectRead) {
AT_LOCATION(value);
if (ListLength(value->agg_order) > 0) {
AddError("FuncCall: unsupported agg_order");
Expand Down Expand Up @@ -4052,12 +4168,17 @@ class TConverter : public IPGParseEvents {
}
}

TVector<TString> names;
if (!ExtractFuncName(value, names)) {
TString name;
TString schema;
if (!ExtractFuncName(value, name, rangeFunction ? &schema : nullptr)) {
return nullptr;
}

auto name = names.back();
if (rangeFunction && !schema.empty() && schema != "pg_catalog") {
injectRead = true;
return ParseTableRangeFunction(name, schema, value->args);
}

if (name == "shobj_description" || name == "obj_description") {
AddWarning(TIssuesIds::PG_COMPAT, name + " function forced to NULL");
return L(A("Null"));
Expand Down Expand Up @@ -4159,7 +4280,8 @@ class TConverter : public IPGParseEvents {
return VL(args.data(), args.size());
}

bool ExtractFuncName(const FuncCall* value, TVector<TString>& names) {
bool ExtractFuncName(const FuncCall* value, TString& name, TString* schemaName) {
TVector<TString> names;
for (int i = 0; i < ListLength(value->funcname); ++i) {
auto x = ListNodeNth(value->funcname, i);
if (NodeTag(x) != T_String) {
Expand All @@ -4180,11 +4302,18 @@ class TConverter : public IPGParseEvents {
return false;
}

if (names.size() == 2 && names[0] != "pg_catalog") {
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
return false;
if (names.size() == 2) {
if (!schemaName && names[0] != "pg_catalog") {
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
return false;
}

if (schemaName) {
*schemaName = names[0];
}
}

name = names.back();
return true;
}

Expand Down
22 changes: 22 additions & 0 deletions ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -2288,6 +2288,28 @@
}
],
"test.test[pg-sublink_having_any-default.txt-Results]": [],
"test.test[pg-table_func-default.txt-Analyze]": [
{
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
"size": 22788,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Analyze_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "009e570dc4b46891c5263130b7e90036",
"size": 6644,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
"size": 22788,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Results]": [],
"test.test[pg-tpcds-q20-default.txt-Analyze]": [
{
"checksum": "212be881133a20b5b73ef1250dbeda51",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -2295,6 +2295,20 @@
"uri": "https://{canondata_backend}/1775319/3515b86fb929979a6751f93bd43a0291eaa01262/resource.tar.gz#test.test_pg-sublink_projection_exists_corr-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "f58d79752c5632a904d7c675fd2cd887",
"size": 6681,
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "95e2fb9330b8431fa9d166b01b6a47b0",
"size": 19319,
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-tpcds-q07-default.txt-Debug]": [
{
"checksum": "f61d3822f18e6a66d0991534554f20fb",
Expand Down
7 changes: 7 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -14076,6 +14076,13 @@
"uri": "https://{canondata_backend}/1881367/79a71c1478c556da1931a7565c12bdd14cc63567/resource.tar.gz#test_sql2yql.test_pg-sublink_where_in_corr_/sql.yql"
}
],
"test_sql2yql.test[pg-table_func]": [
{
"checksum": "52fc030d0a5ec71d08efd26d9f101c65",
"size": 8198,
"uri": "https://{canondata_backend}/1784826/4a52e4f284dee1aa5ddb5ef05566fbf6d624ec38/resource.tar.gz#test_sql2yql.test_pg-table_func_/sql.yql"
}
],
"test_sql2yql.test[pg-tpcds-q01]": [
{
"checksum": "d7a119a877ea0e8b9211601d372e99b9",
Expand Down
15 changes: 15 additions & 0 deletions ydb/library/yql/tests/sql/suites/pg/table_func.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--!syntax_pg
select count(*) from plato.concat('Input','Input');
select count(*) from plato.concat_view('Input','raw','Input','raw');
select count(*) from plato.range('');
select count(*) from plato.range('','A');
select count(*) from plato.range('','A','Z');
select count(*) from plato.range('','A','Z','');
select count(*) from plato.range('','A','Z','','raw');
select count(*) from plato.regexp('','Inpu.?');
select count(*) from plato.regexp('','Inpu.?','');
select count(*) from plato.regexp('','Inpu.?','','raw');
select count(*) from plato.like('','Inpu%');
select count(*) from plato.like('','Inpu%','');
select count(*) from plato.like('','Inpu%','','raw');

Original file line number Diff line number Diff line change
Expand Up @@ -2042,6 +2042,27 @@
"uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_pg-sublink_having_any-default.txt-Results_/results.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "afed4824bc574f8c4d4470e01e377627",
"size": 4991,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "bf2b0c772eaf69c15399605d7fbd7b0e",
"size": 14773,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Results]": [
{
"checksum": "db24edd3094d41f02121a7b1f3629af3",
"size": 9490,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Results_/results.txt"
}
],
"test.test[pg-tpcds-q20-default.txt-Debug]": [
{
"checksum": "3d98e10d734329d04d97423b4026d52d",
Expand Down

0 comments on commit a8fb87e

Please sign in to comment.