From ea6a93e9283ce3cd1a6a40034985b4ecdd92e839 Mon Sep 17 00:00:00 2001 From: Dmitry Kardymon Date: Fri, 17 May 2024 16:02:50 +0300 Subject: [PATCH] YQ-3221 Result set row is empty with empty schema (#4607) --- ydb/core/external_sources/object_storage.cpp | 2 +- .../providers/common/provider/yql_provider.cpp | 2 +- ydb/tests/fq/s3/test_bindings.py | 14 ++++++++++++++ ydb/tests/fq/s3/test_formats.py | 17 +++++++++++++++++ ydb/tests/fq/yds/test_yds_bindings.py | 15 +++++++++++++++ 5 files changed, 48 insertions(+), 2 deletions(-) diff --git a/ydb/core/external_sources/object_storage.cpp b/ydb/core/external_sources/object_storage.cpp index 40c2c324e8b5..cb3cebe24aa7 100644 --- a/ydb/core/external_sources/object_storage.cpp +++ b/ydb/core/external_sources/object_storage.cpp @@ -248,7 +248,7 @@ struct TObjectStorageExternalSource : public IExternalSource { ++realSchemaColumnsCount; } - if (realSchemaColumnsCount > 1) { + if (realSchemaColumnsCount != 1) { issues.AddIssue(MakeErrorIssue(Ydb::StatusIds::BAD_REQUEST, TStringBuilder{} << TStringBuilder() << "Only one column in schema supported in raw format (you have " << realSchemaColumnsCount << " fields)")); } diff --git a/ydb/library/yql/providers/common/provider/yql_provider.cpp b/ydb/library/yql/providers/common/provider/yql_provider.cpp index ed354a3af4fb..a3d50c675d35 100644 --- a/ydb/library/yql/providers/common/provider/yql_provider.cpp +++ b/ydb/library/yql/providers/common/provider/yql_provider.cpp @@ -1532,7 +1532,7 @@ bool ValidateFormatForInput( ++realSchemaColumnsCount; } - if (realSchemaColumnsCount > 1) { + if (realSchemaColumnsCount != 1) { ctx.AddError(TIssue(TStringBuilder() << "Only one column in schema supported in raw format (you have " << realSchemaColumnsCount << " fields)")); return false; diff --git a/ydb/tests/fq/s3/test_bindings.py b/ydb/tests/fq/s3/test_bindings.py index 0f378a88d20d..b901cecdf676 100644 --- a/ydb/tests/fq/s3/test_bindings.py +++ b/ydb/tests/fq/s3/test_bindings.py @@ -645,3 +645,17 @@ def test_ast_in_failed_query_compilation(self, kikimr, s3, client, unique_prefix ast = client.describe_query(query_id).result.query.ast.data assert "(\'columns \'(\'\"some_unknown_column\"))" in ast, "Invalid query ast" + + @yq_all + @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) + def test_raw_empty_schema_binding(self, kikimr, client, unique_prefix): + kikimr.control_plane.wait_bootstrap(1) + connection_response = client.create_storage_connection(unique_prefix + "fruitbucket", "fbucket") + binding_response = client.create_object_storage_binding(name=unique_prefix + "my_binding", + path="fruits.csv", + format="raw", + connection_id=connection_response.result.connection_id, + columns=[], + check_issues=False) + assert "Only one column in schema supported in raw format" in str(binding_response.issues), str( + binding_response.issues) \ No newline at end of file diff --git a/ydb/tests/fq/s3/test_formats.py b/ydb/tests/fq/s3/test_formats.py index dac932a7e2a6..56b4b6eee792 100644 --- a/ydb/tests/fq/s3/test_formats.py +++ b/ydb/tests/fq/s3/test_formats.py @@ -492,3 +492,20 @@ def test_precompute(self, kikimr, s3, client, unique_prefix): assert len(result_set.rows) == 1 assert result_set.rows[0].items[0].bytes_value == b"Pear" assert result_set.rows[0].items[1].int32_value == 15 + + @yq_all + @pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True) + def test_raw_empty_schema_query(self, kikimr, s3, client, unique_prefix): + self.create_bucket_and_upload_file("test.parquet", s3, kikimr) + storage_connection_name = unique_prefix + "fruitbucket" + client.create_storage_connection(storage_connection_name, "fbucket") + sql = f''' + SELECT * FROM `{storage_connection_name}`.`*` + WITH (format=raw, SCHEMA ()); + ''' + + query_id = client.create_query("test_raw_empty_schema", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id + client.wait_query_status(query_id, fq.QueryMeta.FAILED) + describe_result = client.describe_query(query_id).result + describe_string = "{}".format(describe_result) + assert r"Only one column in schema supported in raw format" in describe_string diff --git a/ydb/tests/fq/yds/test_yds_bindings.py b/ydb/tests/fq/yds/test_yds_bindings.py index 4b64cd648297..ab9974cae5ee 100644 --- a/ydb/tests/fq/yds/test_yds_bindings.py +++ b/ydb/tests/fq/yds/test_yds_bindings.py @@ -55,3 +55,18 @@ def test_yds_insert(self, client): assert result_set.rows[0].items[1].text_value == 'xxx' assert result_set.rows[1].items[0].int32_value == 456 assert result_set.rows[1].items[1].text_value == 'yyy' + + @yq_v1 + def test_raw_empty_schema_binding(self, kikimr, client, yq_version): + self.init_topics(f"pq_test_raw_empty_schema_binding_{yq_version}") + connection_response = client.create_yds_connection("myyds2", os.getenv("YDB_DATABASE"), + os.getenv("YDB_ENDPOINT")) + assert not connection_response.issues, str(connection_response.issues) + binding_response = client.create_yds_binding(name="my_binding", + stream=self.input_topic, + format="raw", + connection_id=connection_response.result.connection_id, + columns=[], + check_issues=False) + assert "Only one column in schema supported in raw format" in str(binding_response.issues), str( + binding_response.issues)