From 609949d1011849a3c828e49e899ae30288ac91e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Santos?= Date: Wed, 18 Sep 2024 14:24:51 +0100 Subject: [PATCH 1/2] Comparison row count check secondary datasource filter fix --- .../check/row_count_comparison_check.py | 7 +++++ .../data_source/test_row_count_comparison.py | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/soda/core/soda/execution/check/row_count_comparison_check.py b/soda/core/soda/execution/check/row_count_comparison_check.py index 03cdbaee5..2fef849c2 100644 --- a/soda/core/soda/execution/check/row_count_comparison_check.py +++ b/soda/core/soda/execution/check/row_count_comparison_check.py @@ -26,6 +26,7 @@ def __init__( ) data_source_scan = self.data_source_scan + data_source_scan_cfg = self.data_source_scan.data_source_scan_cfg scan = data_source_scan.scan row_count_comparison_check_cfg: RowCountComparisonCheckCfg = self.check_cfg @@ -38,6 +39,12 @@ def __init__( other_table: Table = data_source_scan.get_or_create_table(row_count_comparison_check_cfg.other_table_name) self.other_partition = other_table.get_or_create_partition(row_count_comparison_check_cfg.other_partition_name) + # If the other partition is None, we ignore the partition_cfg setup + if row_count_comparison_check_cfg.other_partition_name: + other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg(row_count_comparison_check_cfg.other_table_name) + other_partition_cfg = other_table_cfg.find_partition(row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name) + self.other_partition.set_partition_cfg(other_partition_cfg) + self.metrics["row_count"] = self.data_source_scan.resolve_metric( NumericQueryMetric( data_source_scan=self.data_source_scan, diff --git a/soda/core/tests/data_source/test_row_count_comparison.py b/soda/core/tests/data_source/test_row_count_comparison.py index c117faae0..10bbd0af3 100644 --- a/soda/core/tests/data_source/test_row_count_comparison.py +++ b/soda/core/tests/data_source/test_row_count_comparison.py @@ -61,3 +61,33 @@ def test_row_count_comparison_cross_data_source(data_source_fixture: DataSourceF scan.execute() scan.assert_all_checks_pass() + +def test_row_count_comparison_cross_data_source_with_filter(data_source_fixture: DataSourceFixture): + """Does not really create two connections and test cross data sources with filtering, that is handled in integration tests. + + Tests syntax parsing and check execution. + """ + customers_table_name = data_source_fixture.ensure_test_table(customers_test_table) + rawcustomers_table_name = data_source_fixture.ensure_test_table(raw_customers_test_table) + + # Reuse the same data source name + other_data_source_name = data_source_fixture.data_source.data_source_name + + scan = data_source_fixture.create_test_scan() + scan.add_sodacl_yaml_str( + f""" + filter {customers_table_name} [daily]: + where: cst_size IS NULL + + filter {rawcustomers_table_name} [daily-ref]: + where: cst_size IS NULL + + checks for {customers_table_name} [daily]: + - row_count same as {rawcustomers_table_name} daily-ref in {other_data_source_name} + """ + ) + + scan.execute() + print(scan.get_passing_queries()) + + scan.assert_all_checks_pass() From 425ee8de31b55f1cfd3bb3bf4e93d04cf16ce96e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:53:14 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../soda/execution/check/row_count_comparison_check.py | 8 ++++++-- soda/core/tests/data_source/test_row_count_comparison.py | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/soda/core/soda/execution/check/row_count_comparison_check.py b/soda/core/soda/execution/check/row_count_comparison_check.py index 2fef849c2..9adc4f0ad 100644 --- a/soda/core/soda/execution/check/row_count_comparison_check.py +++ b/soda/core/soda/execution/check/row_count_comparison_check.py @@ -41,8 +41,12 @@ def __init__( # If the other partition is None, we ignore the partition_cfg setup if row_count_comparison_check_cfg.other_partition_name: - other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg(row_count_comparison_check_cfg.other_table_name) - other_partition_cfg = other_table_cfg.find_partition(row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name) + other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg( + row_count_comparison_check_cfg.other_table_name + ) + other_partition_cfg = other_table_cfg.find_partition( + row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name + ) self.other_partition.set_partition_cfg(other_partition_cfg) self.metrics["row_count"] = self.data_source_scan.resolve_metric( diff --git a/soda/core/tests/data_source/test_row_count_comparison.py b/soda/core/tests/data_source/test_row_count_comparison.py index 10bbd0af3..9186b3386 100644 --- a/soda/core/tests/data_source/test_row_count_comparison.py +++ b/soda/core/tests/data_source/test_row_count_comparison.py @@ -62,6 +62,7 @@ def test_row_count_comparison_cross_data_source(data_source_fixture: DataSourceF scan.assert_all_checks_pass() + def test_row_count_comparison_cross_data_source_with_filter(data_source_fixture: DataSourceFixture): """Does not really create two connections and test cross data sources with filtering, that is handled in integration tests.