From b91b1a92b43baafc71090a71226d8271a4a63dbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Santos?= Date: Tue, 22 Oct 2024 20:05:50 +0100 Subject: [PATCH] Comparison row count check secondary datasource filter fix (#2165) * Comparison row count check secondary datasource filter fix * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Milan Lukac --- .../check/row_count_comparison_check.py | 11 +++++++ .../data_source/test_row_count_comparison.py | 31 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/soda/core/soda/execution/check/row_count_comparison_check.py b/soda/core/soda/execution/check/row_count_comparison_check.py index 03cdbaee5..9adc4f0ad 100644 --- a/soda/core/soda/execution/check/row_count_comparison_check.py +++ b/soda/core/soda/execution/check/row_count_comparison_check.py @@ -26,6 +26,7 @@ def __init__( ) data_source_scan = self.data_source_scan + data_source_scan_cfg = self.data_source_scan.data_source_scan_cfg scan = data_source_scan.scan row_count_comparison_check_cfg: RowCountComparisonCheckCfg = self.check_cfg @@ -38,6 +39,16 @@ def __init__( other_table: Table = data_source_scan.get_or_create_table(row_count_comparison_check_cfg.other_table_name) self.other_partition = other_table.get_or_create_partition(row_count_comparison_check_cfg.other_partition_name) + # If the other partition is None, we ignore the partition_cfg setup + if row_count_comparison_check_cfg.other_partition_name: + other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg( + row_count_comparison_check_cfg.other_table_name + ) + other_partition_cfg = other_table_cfg.find_partition( + row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name + ) + self.other_partition.set_partition_cfg(other_partition_cfg) + self.metrics["row_count"] = self.data_source_scan.resolve_metric( NumericQueryMetric( data_source_scan=self.data_source_scan, diff --git a/soda/core/tests/data_source/test_row_count_comparison.py b/soda/core/tests/data_source/test_row_count_comparison.py index c117faae0..9186b3386 100644 --- a/soda/core/tests/data_source/test_row_count_comparison.py +++ b/soda/core/tests/data_source/test_row_count_comparison.py @@ -61,3 +61,34 @@ def test_row_count_comparison_cross_data_source(data_source_fixture: DataSourceF scan.execute() scan.assert_all_checks_pass() + + +def test_row_count_comparison_cross_data_source_with_filter(data_source_fixture: DataSourceFixture): + """Does not really create two connections and test cross data sources with filtering, that is handled in integration tests. + + Tests syntax parsing and check execution. + """ + customers_table_name = data_source_fixture.ensure_test_table(customers_test_table) + rawcustomers_table_name = data_source_fixture.ensure_test_table(raw_customers_test_table) + + # Reuse the same data source name + other_data_source_name = data_source_fixture.data_source.data_source_name + + scan = data_source_fixture.create_test_scan() + scan.add_sodacl_yaml_str( + f""" + filter {customers_table_name} [daily]: + where: cst_size IS NULL + + filter {rawcustomers_table_name} [daily-ref]: + where: cst_size IS NULL + + checks for {customers_table_name} [daily]: + - row_count same as {rawcustomers_table_name} daily-ref in {other_data_source_name} + """ + ) + + scan.execute() + print(scan.get_passing_queries()) + + scan.assert_all_checks_pass()