Skip to content

Commit

Permalink
Fix: column comparison logic for check-strategy snapshots (#5223)
Browse files Browse the repository at this point in the history
* Add test case

* Update comparison in snapshot_check_all_get_existing_columns

* Add changelog entry
  • Loading branch information
jtcohen6 authored May 10, 2022
1 parent aa8115a commit 3996a69
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 22 deletions.
8 changes: 8 additions & 0 deletions .changes/unreleased/Fixes-20220509-130021.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
kind: Fixes
body: Fix column comparison in snapshot_check_all_get_existing_columns for check-strategy
snapshots with explicit check_cols defined
time: 2022-05-09T13:00:21.649028+02:00
custom:
Author: jtcohen6
Issue: "5222"
PR: "5223"
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,30 @@


{% macro snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) -%}
{%- if not target_exists -%}
{#-- no table yet -> return whatever the query does --#}
{{ return((false, query_columns)) }}
{%- endif -%}

{#-- handle any schema changes --#}
{%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) -%}

{% if check_cols_config == 'all' %}
{%- set query_columns = get_columns_in_query(node['compiled_sql']) -%}

{% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}
{% set query_columns = check_cols_config %}
{#-- query for proper casing/quoting, to support comparison below --#}
{%- set select_check_cols_from_target -%}
select {{ check_cols_config | join(', ') }} from ({{ node['compiled_sql'] }}) subq
{%- endset -%}
{% set query_columns = get_columns_in_query(select_check_cols_from_target) %}

{% else %}
{% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %}
{% endif %}
{%- if not target_exists -%}
{# no table yet -> return whatever the query does #}
{{ return([false, query_columns]) }}
{%- endif -%}
{# handle any schema changes #}
{%- set target_table = node.get('alias', node.get('name')) -%}
{%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=target_table) -%}
{%- set existing_cols = get_columns_in_query('select * from ' ~ target_relation) -%}
{%- set ns = namespace() -%} {# handle for-loop scoping with a namespace #}

{%- set existing_cols = get_columns_in_relation(target_relation) | map(attribute = 'name') | list -%}
{%- set ns = namespace() -%} {#-- handle for-loop scoping with a namespace --#}
{%- set ns.column_added = false -%}

{%- set intersection = [] -%}
Expand All @@ -130,7 +138,7 @@
{% set ns.column_added = true %}
{%- endif -%}
{%- endfor -%}
{{ return([ns.column_added, intersection]) }}
{{ return((ns.column_added, intersection)) }}
{%- endmacro %}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def project_config_update():
}


def test_simple_snapshot(project):
def run_check_cols_snapshot_with_schema_change(project, check_cols_override=None):
"""
Test that snapshots using the "check" strategy and explicit check_cols support adding columns.
Expand All @@ -74,28 +74,53 @@ def test_simple_snapshot(project):
As long as no error is thrown, then the snapshot was successful
"""

check_cols = check_cols_override or ["name", "other"]

# 1. Create a table that represents the expected data after a series of snapshots
results = run_dbt(["seed", "--show", "--vars", "{version: 1, updated_at: 2016-07-01}"])
vars_dict = {"version": 1, "updated_at": "2016-07-01"}
results = run_dbt(["seed", "--show", "--vars", str(vars_dict)])
assert len(results) == 1

# Snapshot 1
results = run_dbt(
["snapshot", "--vars", "{version: 1, check_cols: ['name'], updated_at: 2016-07-01}"]
)
# Use only 'name' for check_cols
vars_dict = {"version": 1, "check_cols": [check_cols[0]], "updated_at": "2016-07-01"}
results = run_dbt(["snapshot", "--vars", str(vars_dict)])
assert len(results) == 1

# Snapshot 2
results = run_dbt(
[
"snapshot",
"--vars",
"{version: 2, check_cols: ['name', 'other'], updated_at: 2016-07-02}",
]
# Use both 'name' and 'other' for check_cols
vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"}
results = run_dbt(["snapshot", "--vars", str(vars_dict)])
assert len(results) == 1

check_relations_equal(
project.adapter,
["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"],
compare_snapshot_cols=True,
)

# Snapshot 3
# Run it again. Nothing has changed — ensure we don't detect changes
vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"}
results = run_dbt(["snapshot", "--vars", str(vars_dict)])
assert len(results) == 1

check_relations_equal(
project.adapter,
["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"],
compare_snapshot_cols=True,
)


def test_check_cols_snapshot_with_schema_change(project):
run_check_cols_snapshot_with_schema_change(project)


def test_check_cols_snapshot_with_schema_change_and_mismatched_casing(project):
"""
Test that this still works if the database-stored version of 'name' + 'other'
differs from the user-configured 'NAME' and 'OTHER'
"""
run_check_cols_snapshot_with_schema_change(
project=project, check_cols_override=["NAME", "OTHER"]
)

0 comments on commit 3996a69

Please sign in to comment.