From ff841c32fa9bc7849c7d38506ca5cd56f75321dc Mon Sep 17 00:00:00 2001 From: Claire Carroll Date: Wed, 10 Mar 2021 10:52:02 -0500 Subject: [PATCH] Add fewer_rows_than schema test (#343) --- CHANGELOG.md | 1 + README.md | 14 ++++++ .../data_test_fewer_rows_than_table_1.csv | 4 ++ .../data_test_fewer_rows_than_table_2.csv | 5 +++ .../schema_tests/test_fewer_rows_than.sql | 9 ++++ macros/schema_tests/fewer_rows_than.sql | 43 +++++++++++++++++++ 6 files changed, 76 insertions(+) create mode 100644 integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv create mode 100644 integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv create mode 100644 integration_tests/models/schema_tests/test_fewer_rows_than.sql create mode 100644 macros/schema_tests/fewer_rows_than.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 46e03eb2..9bc34c80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Add new schema test, `sequential_values` ([#318](https://github.com/fishtown-analytics/dbt-utils/pull/318), inspired by [@hundredwatt](https://github.com/hundredwatt)) * Support `quarter` in the `postgres__last_day` macro ([#333](https://github.com/fishtown-analytics/dbt-utils/pull/333/files), [@seunghanhong](https://github.com/seunghanhong)) * Add new argument, `unit`, to `haversine_distance` [#340](https://github.com/fishtown-analytics/dbt-utils/pull/340) [@bastienboutonnet](https://github.com/bastienboutonnet) +* Add new schema test, `fewer_rows_than` (code originally in [#221](https://github.com/fishtown-analytics/dbt-utils/pull/230/) from [@dmarts](https://github.com/dmarts), merged via [#343]) ## Fixes diff --git a/README.md b/README.md index 61a853a1..61a093d0 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,20 @@ models: ``` +#### fewer_rows_than ([source](macros/schema_tests/fewer_rows_than.sql)) +This schema test asserts that this model has fewer rows than the referenced model. + +Usage: +```yaml +version: 2 + +models: + - name: model_name + tests: + - dbt_utils.fewer_rows_than: + compare_model: ref('other_table_name') +``` + #### equality ([source](macros/schema_tests/equality.sql)) This schema test asserts the equality of two relations. Optionally specify a subset of columns to compare. diff --git a/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv new file mode 100644 index 00000000..ba0e5ec6 --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_1.csv @@ -0,0 +1,4 @@ +field +1 +2 +3 diff --git a/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv new file mode 100644 index 00000000..eb0f035a --- /dev/null +++ b/integration_tests/data/schema_tests/data_test_fewer_rows_than_table_2.csv @@ -0,0 +1,5 @@ +field +1 +2 +3 +4 diff --git a/integration_tests/models/schema_tests/test_fewer_rows_than.sql b/integration_tests/models/schema_tests/test_fewer_rows_than.sql new file mode 100644 index 00000000..c2ad4cbd --- /dev/null +++ b/integration_tests/models/schema_tests/test_fewer_rows_than.sql @@ -0,0 +1,9 @@ +with data as ( + + select * from {{ ref('data_test_fewer_rows_than_table_1') }} + +) + +select + field +from data \ No newline at end of file diff --git a/macros/schema_tests/fewer_rows_than.sql b/macros/schema_tests/fewer_rows_than.sql new file mode 100644 index 00000000..328c0ea0 --- /dev/null +++ b/macros/schema_tests/fewer_rows_than.sql @@ -0,0 +1,43 @@ +{% macro test_fewer_rows_than(model) %} + {{ return(adapter.dispatch('test_fewer_rows_than', packages = dbt_utils._get_utils_namespaces())(model, combination_of_columns, quote_columns, where)) }} +{% endmacro %} + +{% macro default__test_fewer_rows_than(model) %} + +{% set compare_model = kwargs.get('compare_model', kwargs.get('arg')) %} + +with a as ( + + select count(*) as count_ourmodel from {{ model }} + +), +b as ( + + select count(*) as count_comparisonmodel from {{ compare_model }} + +), +counts as ( + + select + (select count_ourmodel from a) as count_model_with_fewer_rows, + (select count_comparisonmodel from b) as count_model_with_more_rows + +), +final as ( + + select + case + -- fail the test if we have more rows than the reference model and return the row count delta + when count_model_with_fewer_rows > count_model_with_more_rows then (count_model_with_fewer_rows - count_model_with_more_rows) + -- fail the test if they are the same number + when count_model = count_comparison then 1 + -- pass the test if the delta is positive (i.e. return the number 0) + else 0 + end as row_count_delta + from counts + +) + +select row_count_delta from final + +{% endmacro %}