Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support of table functions in pg syntax #9040

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ydb/library/yql/core/type_ann/type_ann_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12715,7 +12715,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>

ColumnOrderFunctions["Merge"] = ColumnOrderFunctions["Extend"] = &OrderForMergeExtend;
ColumnOrderFunctions[RightName] = &OrderFromFirst;
ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
ColumnOrderFunctions["UnionMerge"] = ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
ColumnOrderFunctions["Union"] = &OrderForUnionAll;
ColumnOrderFunctions["EquiJoin"] = &OrderForEquiJoin;
ColumnOrderFunctions["CalcOverWindow"] = &OrderForCalcOverWindow;
Expand Down
159 changes: 144 additions & 15 deletions ydb/library/yql/sql/pg/pg_sql.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1520,12 +1520,9 @@ class TConverter : public IPGParseEvents {
}
} else if (NodeTag(r->val) == T_FuncCall) {
auto func = CAST_NODE(FuncCall, r->val);
TVector<TString> names;
if (!ExtractFuncName(func, names)) {
if (!ExtractFuncName(func, name, nullptr)) {
return nullptr;
}

name = names.back();
}
}

Expand Down Expand Up @@ -3427,12 +3424,13 @@ class TConverter : public IPGParseEvents {
return {};
}

auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true);
bool injectRead = false;
auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true, injectRead);
if (!func) {
return {};
}

return TFromDesc{ func, alias, colnames, false };
return TFromDesc{ func, alias, colnames, injectRead };
}

TMaybe<TFromDesc> ParseRangeSubselect(const RangeSubselect* value) {
Expand Down Expand Up @@ -3723,7 +3721,8 @@ class TConverter : public IPGParseEvents {
return ParseNullTestExpr(CAST_NODE(NullTest, node), settings);
}
case T_FuncCall: {
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false);
bool injectRead;
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false, injectRead);
}
case T_A_ArrayExpr: {
return ParseAArrayExpr(CAST_NODE(A_ArrayExpr, node), settings);
Expand Down Expand Up @@ -4009,7 +4008,124 @@ class TConverter : public IPGParseEvents {
return L(A("PgSubLink"), QA(linkType), L(A("Void")), L(A("Void")), rowTest, L(A("lambda"), QL(), select));
}

TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction) {
TAstNode* ParseTableRangeFunction(const TString& name, const TString& schema, List* args) {
auto source = BuildClusterSinkOrSourceExpression(false, schema);
if (!source) {
return nullptr;
}

TVector<TString> argStrs;
for (int i = 0; i < ListLength(args); ++i) {
auto arg = ListNodeNth(args, i);
if (NodeTag(arg) == T_A_Const && (NodeTag(CAST_NODE(A_Const, arg)->val) == T_String)) {
TString rawStr = StrVal(CAST_NODE(A_Const, arg)->val);
argStrs.push_back(rawStr);
} else {
AddError("Expected String argument for table function");
return nullptr;
}
}

if (argStrs.empty()) {
AddError("Expected at least one argument for table function");
return nullptr;
}

TAstNode* key;
auto lowerName = to_lower(name);
auto options = QL();
if (lowerName == "concat") {
TVector<TAstNode*> concatArgs;
concatArgs.push_back(A("MrTableConcat"));
for (const auto& s : argStrs) {
concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s)))));
}

key = VL(concatArgs);
} else if (lowerName == "concat_view") {
if (argStrs.size() % 2 != 0) {
AddError("Expected sequence of pairs of table and view for concat_view");
return nullptr;
}

TVector<TAstNode*> concatArgs;
concatArgs.push_back(A("MrTableConcat"));
for (ui32 i = 0; i < argStrs.size(); i += 2) {
concatArgs.push_back(L(A("Key"),
QL(QA("table"),L(A("String"), QAX(argStrs[i]))),
QL(QA("view"),L(A("String"), QAX(argStrs[i + 1])))));
}

key = VL(concatArgs);
} else if (lowerName == "range") {
if (argStrs.size() > 5) {
AddError("Too many arguments");
return nullptr;
}

options = QL(QL(QA("ignorenonexisting")));
TAstNode* expr;
if (argStrs.size() == 1) {
expr = L(A("Bool"),QA("true"));
} else if (argStrs.size() == 2) {
expr = L(A(">="),A("item"),L(A("String"),QAX(argStrs[1])));
} else {
expr = L(A("And"),
L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))),
L(A("<="),A("item"),L(A("String"),QAX(argStrs[2])))
);
}

auto lambda = L(A("lambda"), QL(A("item")), expr);
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 4 ? "" : argStrs[3]));
if (argStrs.size() < 5) {
key = L(A("Key"), QL(QA("table"),range));
} else {
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[4]))));
}
} else if (lowerName == "regexp" || lowerName == "like") {
if (argStrs.size() < 2 || argStrs.size() > 4) {
AddError("Expected from 2 to 4 arguments");
return nullptr;
}

options = QL(QL(QA("ignorenonexisting")));
TAstNode* expr;
if (lowerName == "regexp") {
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Grep"),
QL(L(A("String"),QAX(argStrs[1])),L(A("Null")))),
A("item"));
} else {
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"),
QL(L(A("Apply"),
L(A("Udf"), QA("Re2.PatternFromLike")),
L(A("String"),QAX(argStrs[1]))),L(A("Null")))),
A("item"));
}

auto lambda = L(A("lambda"), QL(A("item")), expr);
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 3 ? "" : argStrs[2]));
if (argStrs.size() < 4) {
key = L(A("Key"), QL(QA("table"),range));
} else {
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[3]))));
}
} else {
AddError(TStringBuilder() << "Unknown table function: " << name);
return nullptr;
}

return L(
A("Read!"),
A("world"),
source,
key,
L(A("Void")),
options
);
}

TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction, bool& injectRead) {
AT_LOCATION(value);
if (ListLength(value->agg_order) > 0) {
AddError("FuncCall: unsupported agg_order");
Expand Down Expand Up @@ -4052,12 +4168,17 @@ class TConverter : public IPGParseEvents {
}
}

TVector<TString> names;
if (!ExtractFuncName(value, names)) {
TString name;
TString schema;
if (!ExtractFuncName(value, name, rangeFunction ? &schema : nullptr)) {
return nullptr;
}

auto name = names.back();
if (rangeFunction && schema != "pg_catalog") {
injectRead = true;
return ParseTableRangeFunction(name, schema, value->args);
}

if (name == "shobj_description" || name == "obj_description") {
AddWarning(TIssuesIds::PG_COMPAT, name + " function forced to NULL");
return L(A("Null"));
Expand Down Expand Up @@ -4159,7 +4280,8 @@ class TConverter : public IPGParseEvents {
return VL(args.data(), args.size());
}

bool ExtractFuncName(const FuncCall* value, TVector<TString>& names) {
bool ExtractFuncName(const FuncCall* value, TString& name, TString* schemaName) {
TVector<TString> names;
for (int i = 0; i < ListLength(value->funcname); ++i) {
auto x = ListNodeNth(value->funcname, i);
if (NodeTag(x) != T_String) {
Expand All @@ -4180,11 +4302,18 @@ class TConverter : public IPGParseEvents {
return false;
}

if (names.size() == 2 && names[0] != "pg_catalog") {
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
return false;
if (names.size() == 2) {
if (!schemaName && names[0] != "pg_catalog") {
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
return false;
}

if (schemaName) {
*schemaName = names[0];
}
}

name = names.back();
return true;
}

Expand Down
22 changes: 22 additions & 0 deletions ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -2288,6 +2288,28 @@
}
],
"test.test[pg-sublink_having_any-default.txt-Results]": [],
"test.test[pg-table_func-default.txt-Analyze]": [
{
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
"size": 22788,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Analyze_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "009e570dc4b46891c5263130b7e90036",
"size": 6644,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
"size": 22788,
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Results]": [],
"test.test[pg-tpcds-q20-default.txt-Analyze]": [
{
"checksum": "212be881133a20b5b73ef1250dbeda51",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2295,6 +2295,20 @@
"uri": "https://{canondata_backend}/1775319/3515b86fb929979a6751f93bd43a0291eaa01262/resource.tar.gz#test.test_pg-sublink_projection_exists_corr-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "f58d79752c5632a904d7c675fd2cd887",
"size": 6681,
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "95e2fb9330b8431fa9d166b01b6a47b0",
"size": 19319,
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-tpcds-q07-default.txt-Debug]": [
{
"checksum": "f61d3822f18e6a66d0991534554f20fb",
Expand Down
7 changes: 7 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -14076,6 +14076,13 @@
"uri": "https://{canondata_backend}/1881367/79a71c1478c556da1931a7565c12bdd14cc63567/resource.tar.gz#test_sql2yql.test_pg-sublink_where_in_corr_/sql.yql"
}
],
"test_sql2yql.test[pg-table_func]": [
{
"checksum": "52fc030d0a5ec71d08efd26d9f101c65",
"size": 8198,
"uri": "https://{canondata_backend}/1784826/4a52e4f284dee1aa5ddb5ef05566fbf6d624ec38/resource.tar.gz#test_sql2yql.test_pg-table_func_/sql.yql"
}
],
"test_sql2yql.test[pg-tpcds-q01]": [
{
"checksum": "d7a119a877ea0e8b9211601d372e99b9",
Expand Down
15 changes: 15 additions & 0 deletions ydb/library/yql/tests/sql/suites/pg/table_func.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
--!syntax_pg
select count(*) from plato.concat('Input','Input');
select count(*) from plato.concat_view('Input','raw','Input','raw');
select count(*) from plato.range('');
select count(*) from plato.range('','A');
select count(*) from plato.range('','A','Z');
select count(*) from plato.range('','A','Z','');
select count(*) from plato.range('','A','Z','','raw');
select count(*) from plato.regexp('','Inpu.?');
select count(*) from plato.regexp('','Inpu.?','');
select count(*) from plato.regexp('','Inpu.?','','raw');
select count(*) from plato.like('','Inpu%');
select count(*) from plato.like('','Inpu%','');
select count(*) from plato.like('','Inpu%','','raw');

Original file line number Diff line number Diff line change
Expand Up @@ -2042,6 +2042,27 @@
"uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_pg-sublink_having_any-default.txt-Results_/results.txt"
}
],
"test.test[pg-table_func-default.txt-Debug]": [
{
"checksum": "afed4824bc574f8c4d4470e01e377627",
"size": 4991,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql"
}
],
"test.test[pg-table_func-default.txt-Plan]": [
{
"checksum": "bf2b0c772eaf69c15399605d7fbd7b0e",
"size": 14773,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
}
],
"test.test[pg-table_func-default.txt-Results]": [
{
"checksum": "db24edd3094d41f02121a7b1f3629af3",
"size": 9490,
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Results_/results.txt"
}
],
"test.test[pg-tpcds-q20-default.txt-Debug]": [
{
"checksum": "3d98e10d734329d04d97423b4026d52d",
Expand Down
Loading