diff --git a/.changes/unreleased/Fixes-20220509-130021.yaml b/.changes/unreleased/Fixes-20220509-130021.yaml new file mode 100644 index 00000000000..97480239154 --- /dev/null +++ b/.changes/unreleased/Fixes-20220509-130021.yaml @@ -0,0 +1,8 @@ +kind: Fixes +body: Fix column comparison in snapshot_check_all_get_existing_columns for check-strategy + snapshots with explicit check_cols defined +time: 2022-05-09T13:00:21.649028+02:00 +custom: + Author: jtcohen6 + Issue: "5222" + PR: "5223" diff --git a/core/dbt/include/global_project/macros/materializations/snapshots/strategies.sql b/core/dbt/include/global_project/macros/materializations/snapshots/strategies.sql index fd8beddcb7e..d2dc9e2f83f 100644 --- a/core/dbt/include/global_project/macros/materializations/snapshots/strategies.sql +++ b/core/dbt/include/global_project/macros/materializations/snapshots/strategies.sql @@ -104,22 +104,30 @@ {% macro snapshot_check_all_get_existing_columns(node, target_exists, check_cols_config) -%} + {%- if not target_exists -%} + {#-- no table yet -> return whatever the query does --#} + {{ return((false, query_columns)) }} + {%- endif -%} + + {#-- handle any schema changes --#} + {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=node.alias) -%} + {% if check_cols_config == 'all' %} {%- set query_columns = get_columns_in_query(node['compiled_sql']) -%} + {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %} - {% set query_columns = check_cols_config %} + {#-- query for proper casing/quoting, to support comparison below --#} + {%- set select_check_cols_from_target -%} + select {{ check_cols_config | join(', ') }} from ({{ node['compiled_sql'] }}) subq + {%- endset -%} + {% set query_columns = get_columns_in_query(select_check_cols_from_target) %} + {% else %} {% do exceptions.raise_compiler_error("Invalid value for 'check_cols': " ~ check_cols_config) %} {% endif %} - {%- if not target_exists -%} - {# no table yet -> return whatever the query does #} - {{ return([false, query_columns]) }} - {%- endif -%} - {# handle any schema changes #} - {%- set target_table = node.get('alias', node.get('name')) -%} - {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=target_table) -%} - {%- set existing_cols = get_columns_in_query('select * from ' ~ target_relation) -%} - {%- set ns = namespace() -%} {# handle for-loop scoping with a namespace #} + + {%- set existing_cols = get_columns_in_relation(target_relation) | map(attribute = 'name') | list -%} + {%- set ns = namespace() -%} {#-- handle for-loop scoping with a namespace --#} {%- set ns.column_added = false -%} {%- set intersection = [] -%} @@ -130,7 +138,7 @@ {% set ns.column_added = true %} {%- endif -%} {%- endfor -%} - {{ return([ns.column_added, intersection]) }} + {{ return((ns.column_added, intersection)) }} {%- endmacro %} diff --git a/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py b/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py index 26c4b901413..0aee4aedb99 100644 --- a/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py +++ b/tests/functional/simple_snapshot/test_changing_check_cols_snapshot.py @@ -62,7 +62,7 @@ def project_config_update(): } -def test_simple_snapshot(project): +def run_check_cols_snapshot_with_schema_change(project, check_cols_override=None): """ Test that snapshots using the "check" strategy and explicit check_cols support adding columns. @@ -74,24 +74,35 @@ def test_simple_snapshot(project): As long as no error is thrown, then the snapshot was successful """ + check_cols = check_cols_override or ["name", "other"] + # 1. Create a table that represents the expected data after a series of snapshots - results = run_dbt(["seed", "--show", "--vars", "{version: 1, updated_at: 2016-07-01}"]) + vars_dict = {"version": 1, "updated_at": "2016-07-01"} + results = run_dbt(["seed", "--show", "--vars", str(vars_dict)]) assert len(results) == 1 # Snapshot 1 - results = run_dbt( - ["snapshot", "--vars", "{version: 1, check_cols: ['name'], updated_at: 2016-07-01}"] - ) + # Use only 'name' for check_cols + vars_dict = {"version": 1, "check_cols": [check_cols[0]], "updated_at": "2016-07-01"} + results = run_dbt(["snapshot", "--vars", str(vars_dict)]) assert len(results) == 1 # Snapshot 2 - results = run_dbt( - [ - "snapshot", - "--vars", - "{version: 2, check_cols: ['name', 'other'], updated_at: 2016-07-02}", - ] + # Use both 'name' and 'other' for check_cols + vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"} + results = run_dbt(["snapshot", "--vars", str(vars_dict)]) + assert len(results) == 1 + + check_relations_equal( + project.adapter, + ["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"], + compare_snapshot_cols=True, ) + + # Snapshot 3 + # Run it again. Nothing has changed — ensure we don't detect changes + vars_dict = {"version": 2, "check_cols": check_cols, "updated_at": "2016-07-02"} + results = run_dbt(["snapshot", "--vars", str(vars_dict)]) assert len(results) == 1 check_relations_equal( @@ -99,3 +110,17 @@ def test_simple_snapshot(project): ["snapshot_check_cols_new_column", "snapshot_check_cols_new_column_expected"], compare_snapshot_cols=True, ) + + +def test_check_cols_snapshot_with_schema_change(project): + run_check_cols_snapshot_with_schema_change(project) + + +def test_check_cols_snapshot_with_schema_change_and_mismatched_casing(project): + """ + Test that this still works if the database-stored version of 'name' + 'other' + differs from the user-configured 'NAME' and 'OTHER' + """ + run_check_cols_snapshot_with_schema_change( + project=project, check_cols_override=["NAME", "OTHER"] + )