From 26ed08b1b003cb7a4ed27010286a2117f7e8ec8a Mon Sep 17 00:00:00 2001 From: Pisarenko Grigoriy Date: Tue, 19 Nov 2024 16:50:40 +0300 Subject: [PATCH] YQ RD added brackets during pushdown (#11742) --- .../yql_generic_predicate_pushdown.cpp | 8 +-- ydb/tests/fq/yds/test_row_dispatcher.py | 52 ++++++++++--------- 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp index 7dbbf2fb6925..028401af2a57 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_predicate_pushdown.cpp @@ -629,7 +629,7 @@ namespace NYql { auto left = FormatExpression(comparison.left_value()); auto right = FormatExpression(comparison.right_value()); - return left + operation + right; + return TStringBuilder() << "(" << left << operation << right << ")"; } TString FormatIn(const TPredicate_TIn& in) { @@ -639,7 +639,7 @@ namespace NYql { if (!list.empty()) { list << ", "; } else { - list << value << " IN ("; + list << "(" << value << " IN ("; } list << FormatExpression(expr); } @@ -648,14 +648,14 @@ namespace NYql { throw yexception() << "failed to format IN statement, no operands"; } - list << ")"; + list << "))"; return list.Str(); } TString FormatRegexp(const TPredicate::TRegexp& regexp) { const auto& value = FormatExpression(regexp.value()); const auto& pattern = FormatExpression(regexp.pattern()); - return TStringBuilder() << value << " REGEXP " << pattern; + return TStringBuilder() << "(" << value << " REGEXP " << pattern << ")"; } TString FormatPredicate(const TPredicate& predicate) { diff --git a/ydb/tests/fq/yds/test_row_dispatcher.py b/ydb/tests/fq/yds/test_row_dispatcher.py index 1f121edc8d6b..94e31a4f7983 100644 --- a/ydb/tests/fq/yds/test_row_dispatcher.py +++ b/ydb/tests/fq/yds/test_row_dispatcher.py @@ -314,21 +314,23 @@ def test_filters_non_optional_field(self, kikimr, client): '{"time": 102, "data": "hello2", "event": "event2"}'] filter = "time > 101;" expected = ['102'] - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `time` > 101') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`time` > 101)') filter = 'data = "hello2"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `data` = \\"hello2\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`data` = \\"hello2\\")') filter = ' event IS NOT DISTINCT FROM "event2"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS NOT DISTINCT FROM \\"event2\\")') filter = ' event IS DISTINCT FROM "event1"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM \\"event1\\")') filter = 'event IN ("event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"event2\\")') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IN (\\"event2\\"))') + filter = 'event NOT IN ("event1", "event3")' + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (NOT (`event` IN (\\"event1\\", \\"event3\\")))') filter = 'event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IN (\\"1\\"') filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND `event` IN (\\"1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((`event` IS DISTINCT FROM `data`) AND (`event` IN (\\"1\\"') filter = ' IF(event = "event2", event IS DISTINCT FROM data, FALSE)' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(`event` = \\"event2\\", `event` IS DISTINCT FROM `data`, FALSE)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF((`event` = \\"event2\\"), (`event` IS DISTINCT FROM `data`), FALSE)') @yq_v1 def test_filters_optional_field(self, kikimr, client): @@ -346,37 +348,39 @@ def test_filters_optional_field(self, kikimr, client): '{"time": 102, "data": "hello2", "event": "event2", "flag": true, "field1": 5, "field2": 1005}'] expected = ['102'] filter = 'data = "hello2"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `data` = \\"hello2\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`data` = \\"hello2\\")') filter = 'flag' self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `flag`') filter = 'time * (field2 - field1) != 0' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`time` * (`field2` - `field1`)) <> 0') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((`time` * (`field2` - `field1`)) <> 0)') filter = '(field1 % field2) / 5 = 1' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((`field1` % `field2`) / 5) = 1') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (((`field1` % `field2`) / 5) = 1)') filter = ' event IS NOT DISTINCT FROM "event2"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS NOT DISTINCT FROM \\"event2\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS NOT DISTINCT FROM \\"event2\\")') filter = ' event IS DISTINCT FROM "event1"' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IS DISTINCT FROM \\"event1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM \\"event1\\")') filter = ' field1 IS DISTINCT FROM field2' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `field1` IS DISTINCT FROM `field2`') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`field1` IS DISTINCT FROM `field2`)') filter = 'time == 102 OR (field2 IS NOT DISTINCT FROM 1005 AND Random(field1) < 10.0)' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`time` = 102 OR `field2` IS NOT DISTINCT FROM 1005)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((`time` = 102) OR (`field2` IS NOT DISTINCT FROM 1005))') filter = 'event IN ("event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"event2\\")') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IN (\\"event2\\"))') filter = 'event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE `event` IN (\\"1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IN (\\"1\\"') filter = ' event IS DISTINCT FROM data AND event IN ("1", "2", "3", "4", "5", "6", "7", "event2")' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (`event` IS DISTINCT FROM `data` AND COALESCE(`event` IN (\\"1\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((`event` IS DISTINCT FROM `data`) AND COALESCE((`event` IN (\\"1\\"') filter = ' IF(event == "event2", event IS DISTINCT FROM data, FALSE)' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(COALESCE(`event` = \\"event2\\", FALSE), `event` IS DISTINCT FROM `data`, FALSE)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE IF(COALESCE((`event` = \\"event2\\"), FALSE), (`event` IS DISTINCT FROM `data`), FALSE)') filter = ' COALESCE(event = "event2", TRUE)' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE(`event` = \\"event2\\", TRUE)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE((`event` = \\"event2\\"), TRUE)') filter = ' COALESCE(event = "event2", data = "hello2", TRUE)' - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE(`event` = \\"event2\\", `data` = \\"hello2\\", TRUE)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE((`event` = \\"event2\\"), (`data` = \\"hello2\\"), TRUE)') filter = " event ?? '' REGEXP @@e.*e.*t2@@" - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE COALESCE(`event`, \\"\\") REGEXP \\"e.*e.*t2\\"') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (COALESCE(`event`, \\"\\") REGEXP \\"e.*e.*t2\\")') + filter = " event ?? '' NOT REGEXP @@e.*e.*t1@@" + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (NOT (COALESCE(`event`, \\"\\") REGEXP \\"e.*e.*t1\\"))') filter = " event ?? '' REGEXP data ?? '' OR time = 102" - self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE (COALESCE(`event`, \\"\\") REGEXP COALESCE(`data`, \\"\\") OR `time` = 102)') + self.run_and_check(kikimr, client, sql + filter, data, expected, 'predicate: WHERE ((COALESCE(`event`, \\"\\") REGEXP COALESCE(`data`, \\"\\")) OR (`time` = 102))') @yq_v1 def test_filter_missing_fields(self, kikimr, client): @@ -487,7 +491,7 @@ def test_filter_with_mr(self, kikimr, client): stop_yds_query(client, query_id) issues = str(client.describe_query(query_id).result.query.transient_issue) - assert "Row dispatcher will use the predicate: WHERE `event_class` =" in issues, "Incorrect Issues: " + issues + assert "Row dispatcher will use the predicate: WHERE (`event_class` =" in issues, "Incorrect Issues: " + issues @yq_v1 def test_start_new_query(self, kikimr, client):