diff --git a/ydb/library/yql/core/issue/protos/issue_id.proto b/ydb/library/yql/core/issue/protos/issue_id.proto index c8ecb8f7ae3f..be254a95a234 100644 --- a/ydb/library/yql/core/issue/protos/issue_id.proto +++ b/ydb/library/yql/core/issue/protos/issue_id.proto @@ -141,6 +141,7 @@ message TIssuesIds { YQL_OFFSET_WITHOUT_SORT = 4537; YQL_DEPRECATED_BINDINGS = 4538; YQL_HINT_INVALID_PARAMETERS = 4539; + YQL_UNTYPED_STRING_LITERALS = 4540; // yql parser errors YQL_NOT_ALLOWED_IN_DISCOVERY = 4600; diff --git a/ydb/library/yql/core/issue/yql_issue.txt b/ydb/library/yql/core/issue/yql_issue.txt index a46832511d2e..675f1ae66991 100644 --- a/ydb/library/yql/core/issue/yql_issue.txt +++ b/ydb/library/yql/core/issue/yql_issue.txt @@ -646,4 +646,8 @@ ids { ids { code: PG_NO_LOCKING_SUPPORT severity: S_WARNING -} \ No newline at end of file +} +ids { + code: YQL_UNTYPED_STRING_LITERALS + severity: S_WARNING +} diff --git a/ydb/library/yql/sql/settings/translation_settings.h b/ydb/library/yql/sql/settings/translation_settings.h index 2dad2e646dda..9ea53b923a0b 100644 --- a/ydb/library/yql/sql/settings/translation_settings.h +++ b/ydb/library/yql/sql/settings/translation_settings.h @@ -114,6 +114,7 @@ namespace NSQLTranslation { THashSet AutoParametrizeExprDisabledScopes = {}; TGUCSettings::TPtr GUCSettings = std::make_shared(); + bool UnicodeLiterals = false; }; bool ParseTranslationSettings(const TString& query, NSQLTranslation::TTranslationSettings& settings, NYql::TIssues& issues); diff --git a/ydb/library/yql/sql/v1/SQLv1.g.in b/ydb/library/yql/sql/v1/SQLv1.g.in index 9be5921ef070..05da99626b15 100644 --- a/ydb/library/yql/sql/v1/SQLv1.g.in +++ b/ydb/library/yql/sql/v1/SQLv1.g.in @@ -1783,7 +1783,7 @@ fragment STRING_SINGLE: (QUOTE_SINGLE STRING_CORE_SINGLE* QUOTE_SINGLE); fragment STRING_DOUBLE: (QUOTE_DOUBLE STRING_CORE_DOUBLE* QUOTE_DOUBLE); fragment STRING_MULTILINE: (DOUBLE_AT .* DOUBLE_AT)+ AT?; -STRING_VALUE: ((STRING_SINGLE | STRING_DOUBLE | STRING_MULTILINE) (U | Y | J | P (T | B | V)?)?); +STRING_VALUE: ((STRING_SINGLE | STRING_DOUBLE | STRING_MULTILINE) (S | B | T | U | Y | J | P (T | B | V)?)?); ID_PLAIN: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | DIGIT)*; diff --git a/ydb/library/yql/sql/v1/context.cpp b/ydb/library/yql/sql/v1/context.cpp index b85e015789e6..4adbbf2d0c91 100644 --- a/ydb/library/yql/sql/v1/context.cpp +++ b/ydb/library/yql/sql/v1/context.cpp @@ -94,6 +94,7 @@ TContext::TContext(const NSQLTranslation::TTranslationSettings& settings, Scoped = MakeIntrusive(); AllScopes.push_back(Scoped); + Scoped->UnicodeLiterals = settings.UnicodeLiterals; if (settings.DefaultCluster) { Scoped->CurrCluster = TDeferredAtom({}, settings.DefaultCluster); auto provider = GetClusterProvider(settings.DefaultCluster); diff --git a/ydb/library/yql/sql/v1/context.h b/ydb/library/yql/sql/v1/context.h index 0ab493a62a1f..c33c574b5f07 100644 --- a/ydb/library/yql/sql/v1/context.h +++ b/ydb/library/yql/sql/v1/context.h @@ -50,6 +50,8 @@ namespace NSQLTranslationV1 { bool PragmaClassicDivision = true; bool PragmaCheckedOps = false; bool StrictJoinKeyTypes = false; + bool UnicodeLiterals = false; + bool WarnUntypedStringLiterals = false; TNamedNodesMap NamedNodes; struct TLocal { diff --git a/ydb/library/yql/sql/v1/node.cpp b/ydb/library/yql/sql/v1/node.cpp index e8fe845cd6e1..84df7e5b608b 100644 --- a/ydb/library/yql/sql/v1/node.cpp +++ b/ydb/library/yql/sql/v1/node.cpp @@ -1406,10 +1406,7 @@ StringContentInternal(TContext& ctx, TPosition pos, const TString& input, EStrin TString str = input; if (mode == EStringContentMode::TypedStringLiteral) { auto lower = to_lower(str); - if (lower.EndsWith("u")) { - str = str.substr(0, str.Size() - 1); - result.Type = NKikimr::NUdf::EDataSlot::Utf8; - } else if (lower.EndsWith("y")) { + if (lower.EndsWith("y")) { str = str.substr(0, str.Size() - 1); result.Type = NKikimr::NUdf::EDataSlot::Yson; } else if (lower.EndsWith("j")) { @@ -1427,6 +1424,21 @@ StringContentInternal(TContext& ctx, TPosition pos, const TString& input, EStrin } else if (lower.EndsWith("pv")) { str = str.substr(0, str.Size() - 2); result.PgType = "PgVarchar"; + } else if (lower.EndsWith("s") || lower.EndsWith("b")) { + str = str.substr(0, str.Size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::String; + } else if (lower.EndsWith("u") || lower.EndsWith("t")) { + str = str.substr(0, str.Size() - 1); + result.Type = NKikimr::NUdf::EDataSlot::Utf8; + } else { + if (ctx.Scoped->WarnUntypedStringLiterals) { + ctx.Warning(pos, TIssuesIds::YQL_UNTYPED_STRING_LITERALS) + << "Please add suffix u or t for Utf8 strings or s or b for arbitrary binary strings"; + } + + if (ctx.Scoped->UnicodeLiterals) { + result.Type = NKikimr::NUdf::EDataSlot::Utf8; + } } } diff --git a/ydb/library/yql/sql/v1/sql_query.cpp b/ydb/library/yql/sql/v1/sql_query.cpp index 79f9c3469644..9689e8beeab4 100644 --- a/ydb/library/yql/sql/v1/sql_query.cpp +++ b/ydb/library/yql/sql/v1/sql_query.cpp @@ -1620,7 +1620,16 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success } const bool withConfigure = prefix || normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "udf"; - static const THashSet lexicalScopePragmas = {"classicdivision", "strictjoinkeytypes", "disablestrictjoinkeytypes", "checkedops"}; + static const THashSet lexicalScopePragmas = { + "classicdivision", + "strictjoinkeytypes", + "disablestrictjoinkeytypes", + "checkedops", + "unicodeliterals", + "disableunicodeliterals", + "warnuntypedstringliterals", + "disableuntypedstringliterals", + }; const bool hasLexicalScope = withConfigure || lexicalScopePragmas.contains(normalizedPragma); const bool withFileAlias = normalizedPragma == "file" || normalizedPragma == "folder" || normalizedPragma == "library" || normalizedPragma == "udf"; for (auto pragmaValue : pragmaValues) { @@ -2190,6 +2199,18 @@ TNodePtr TSqlQuery::PragmaStatement(const TRule_pragma_stmt& stmt, bool& success } else if (normalizedPragma == "disablestrictjoinkeytypes") { Ctx.Scoped->StrictJoinKeyTypes = false; Ctx.IncrementMonCounter("sql_pragma", "DisableStrictJoinKeyTypes"); + } else if (normalizedPragma == "unicodeliterals") { + Ctx.Scoped->UnicodeLiterals = true; + Ctx.IncrementMonCounter("sql_pragma", "UnicodeLiterals"); + } else if (normalizedPragma == "disableunicodeliterals") { + Ctx.Scoped->UnicodeLiterals = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableUnicodeLiterals"); + } else if (normalizedPragma == "warnuntypedstringliterals") { + Ctx.Scoped->WarnUntypedStringLiterals = true; + Ctx.IncrementMonCounter("sql_pragma", "WarnUntypedStringLiterals"); + } else if (normalizedPragma == "disablewarnuntypedstringliterals") { + Ctx.Scoped->WarnUntypedStringLiterals = false; + Ctx.IncrementMonCounter("sql_pragma", "DisableWarnUntypedStringLiterals"); } else if (normalizedPragma == "unorderedsubqueries") { Ctx.UnorderedSubqueries = true; Ctx.IncrementMonCounter("sql_pragma", "UnorderedSubqueries"); diff --git a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json index c5cbc0f2e045..f2861f50e2a6 100644 --- a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json @@ -1011,6 +1011,35 @@ } ], "test.test[expr-to_sorted_set_tuple_key-default.txt-Results]": [], + "test.test[expr-unicode_literals-default.txt-Analyze]": [ + { + "checksum": "a3b64a2cf9903b3868a2dd88a18fc46e", + "size": 922, + "uri": "https://{canondata_backend}/1871002/fb6fb37c565974a6f0c497e8b3e58f6b5bf320b2/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Analyze_/plan.txt" + }, + { + "uri": "file://test.test_expr-unicode_literals-default.txt-Analyze_/extracted" + } + ], + "test.test[expr-unicode_literals-default.txt-Debug]": [ + { + "checksum": "9201dbe44a3334deb0a063d58468a160", + "size": 522, + "uri": "https://{canondata_backend}/1871002/fb6fb37c565974a6f0c497e8b3e58f6b5bf320b2/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Debug_/opt.yql_patched" + } + ], + "test.test[expr-unicode_literals-default.txt-Plan]": [ + { + "checksum": "a3b64a2cf9903b3868a2dd88a18fc46e", + "size": 922, + "uri": "https://{canondata_backend}/1871002/fb6fb37c565974a6f0c497e8b3e58f6b5bf320b2/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Plan_/plan.txt" + } + ], + "test.test[expr-unicode_literals-default.txt-Results]": [ + { + "uri": "file://test.test_expr-unicode_literals-default.txt-Results_/extracted" + } + ], "test.test[expr-variant_tuple_comp-default.txt-Analyze]": [ { "checksum": "01775e7c945a56ebf0edc2d478f4f68d", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 0a281e083086..d8f3feb34e58 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -5795,6 +5795,13 @@ "uri": "https://{canondata_backend}/1871182/6b10ad6d9884e5faf3a77187ffb9b38b59b46458/resource.tar.gz#test_sql2yql.test_expr-udaf_with_list_zip_/sql.yql" } ], + "test_sql2yql.test[expr-unicode_literals]": [ + { + "checksum": "9be93914e3d28b675e0eee080ef248ec", + "size": 1964, + "uri": "https://{canondata_backend}/1937367/9f749035d8f07b7ae5537f5aebd224641b378134/resource.tar.gz#test_sql2yql.test_expr-unicode_literals_/sql.yql" + } + ], "test_sql2yql.test[expr-untag]": [ { "checksum": "e83bb3d6e0abd1069a2c5e30a7ec6409", @@ -23267,6 +23274,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_expr-udaf_with_list_zip_/formatted.sql" } ], + "test_sql_format.test[expr-unicode_literals]": [ + { + "checksum": "b470490a33e28dd2537f12d80329216a", + "size": 374, + "uri": "https://{canondata_backend}/1937367/9f749035d8f07b7ae5537f5aebd224641b378134/resource.tar.gz#test_sql_format.test_expr-unicode_literals_/formatted.sql" + } + ], "test_sql_format.test[expr-untag]": [ { "checksum": "af1b548d1c51945be876993b053bcc11", diff --git a/ydb/library/yql/tests/sql/suites/expr/unicode_literals.sql b/ydb/library/yql/tests/sql/suites/expr/unicode_literals.sql new file mode 100644 index 000000000000..bf7379e784d0 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/expr/unicode_literals.sql @@ -0,0 +1,26 @@ +pragma WarnUntypedStringLiterals; +pragma UnicodeLiterals; +$f = ()->{ + return ( + "a"s, + "b"b, + "c"t, + "d"u, + "e"); +}; + +select $f(); + +pragma DisableWarnUntypedStringLiterals; +pragma DisableUnicodeLiterals; +$g = ()->{ + return ( + "a"s, + "b"b, + "c"t, + "d"u, + "e"); +}; + +select $g(); + diff --git a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json index caab1b1a3f5c..ad4a13a1ae2e 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json @@ -1025,6 +1025,30 @@ "uri": "https://{canondata_backend}/1937367/40af353047a2965dc4907c6a6b7a0b86a14045dd/resource.tar.gz#test.test_expr-to_sorted_set_tuple_key-default.txt-Results_/results.txt" } ], + "test.test[expr-unicode_literals-default.txt-Debug]": [ + { + "checksum": "b21fde16b24ef5500d9c21f811fc800b", + "size": 452, + "uri": "https://{canondata_backend}/1942671/fe81aca6675f95264895c6b4c3bafedf6b92cfd5/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Debug_/opt.yql" + } + ], + "test.test[expr-unicode_literals-default.txt-Plan]": [ + { + "checksum": "a3b64a2cf9903b3868a2dd88a18fc46e", + "size": 922, + "uri": "https://{canondata_backend}/1942671/fe81aca6675f95264895c6b4c3bafedf6b92cfd5/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Plan_/plan.txt" + } + ], + "test.test[expr-unicode_literals-default.txt-Results]": [ + { + "checksum": "634838888e147228dfbca0438c1c75d5", + "size": 3698, + "uri": "https://{canondata_backend}/1942671/fe81aca6675f95264895c6b4c3bafedf6b92cfd5/resource.tar.gz#test.test_expr-unicode_literals-default.txt-Results_/results.txt" + }, + { + "uri": "file://test.test_expr-unicode_literals-default.txt-Results_/extracted" + } + ], "test.test[expr-variant_tuple_comp-default.txt-Debug]": [ { "checksum": "535e6582b45481ccb48fdce0a827a92d",