Skip to content

Commit

Permalink
Comparison row count check secondary datasource filter fix (#2165)
Browse files Browse the repository at this point in the history
* Comparison row count check secondary datasource filter fix

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Milan Lukac <m1n0@users.noreply.github.com>
  • Loading branch information
3 people authored Oct 22, 2024
1 parent fb60b83 commit b91b1a9
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
11 changes: 11 additions & 0 deletions soda/core/soda/execution/check/row_count_comparison_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
)

data_source_scan = self.data_source_scan
data_source_scan_cfg = self.data_source_scan.data_source_scan_cfg
scan = data_source_scan.scan

row_count_comparison_check_cfg: RowCountComparisonCheckCfg = self.check_cfg
Expand All @@ -38,6 +39,16 @@ def __init__(
other_table: Table = data_source_scan.get_or_create_table(row_count_comparison_check_cfg.other_table_name)
self.other_partition = other_table.get_or_create_partition(row_count_comparison_check_cfg.other_partition_name)

# If the other partition is None, we ignore the partition_cfg setup
if row_count_comparison_check_cfg.other_partition_name:
other_table_cfg = data_source_scan_cfg.get_or_create_table_cfg(
row_count_comparison_check_cfg.other_table_name
)
other_partition_cfg = other_table_cfg.find_partition(
row_count_comparison_check_cfg.location.file_path, row_count_comparison_check_cfg.other_partition_name
)
self.other_partition.set_partition_cfg(other_partition_cfg)

self.metrics["row_count"] = self.data_source_scan.resolve_metric(
NumericQueryMetric(
data_source_scan=self.data_source_scan,
Expand Down
31 changes: 31 additions & 0 deletions soda/core/tests/data_source/test_row_count_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,34 @@ def test_row_count_comparison_cross_data_source(data_source_fixture: DataSourceF
scan.execute()

scan.assert_all_checks_pass()


def test_row_count_comparison_cross_data_source_with_filter(data_source_fixture: DataSourceFixture):
"""Does not really create two connections and test cross data sources with filtering, that is handled in integration tests.
Tests syntax parsing and check execution.
"""
customers_table_name = data_source_fixture.ensure_test_table(customers_test_table)
rawcustomers_table_name = data_source_fixture.ensure_test_table(raw_customers_test_table)

# Reuse the same data source name
other_data_source_name = data_source_fixture.data_source.data_source_name

scan = data_source_fixture.create_test_scan()
scan.add_sodacl_yaml_str(
f"""
filter {customers_table_name} [daily]:
where: cst_size IS NULL
filter {rawcustomers_table_name} [daily-ref]:
where: cst_size IS NULL
checks for {customers_table_name} [daily]:
- row_count same as {rawcustomers_table_name} daily-ref in {other_data_source_name}
"""
)

scan.execute()
print(scan.get_passing_queries())

scan.assert_all_checks_pass()

0 comments on commit b91b1a9

Please sign in to comment.