dbt-labs · jtcohen6 · Jul 21, 2021 · Apr 22, 2021 · Apr 22, 2021 · Apr 22, 2021
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -37,6 +37,7 @@ Contributors:
 - Support disabling schema tests, and configuring tests from `dbt_project.yml` ([#3252](https://github.com/fishtown-analytics/dbt/issues/3252),
 [#3253](https://github.com/fishtown-analytics/dbt/issues/3253), [#3257](https://github.com/fishtown-analytics/dbt/pull/3257))
 - Add Jinja tag for tests ([#1173](https://github.com/fishtown-analytics/dbt/issues/1173), [#3261](https://github.com/fishtown-analytics/dbt/pull/3261))
+- Support detecting schema changes on incremental models ([#1132](https://github.com/fishtown-analytics/dbt/issues/1132), [#3288](https://github.com/fishtown-analytics/dbt/issues/3288))
 - Add native support for Postgres index creation ([#804](https://github.com/fishtown-analytics/dbt/issues/804), [3106](https://github.com/fishtown-analytics/dbt/pull/3106))
 - Less greedy test selection: expand to select unselected tests if and only if all parents are selected ([#2891](https://github.com/fishtown-analytics/dbt/issues/2891), [#3235](https://github.com/fishtown-analytics/dbt/pull/3235))
 - Prevent locks in Redshift during full refresh in incremental materialization. ([#2426](https://github.com/fishtown-analytics/dbt/issues/2426), [#2998](https://github.com/fishtown-analytics/dbt/pull/2998))

diff --git a/core/dbt/adapters/base/impl.py b/core/dbt/adapters/base/impl.py
@@ -513,7 +513,7 @@ def rename_relation(
     def get_columns_in_relation(
         self, relation: BaseRelation
     ) -> List[BaseColumn]:
-        """Get a list of the columns in the given Relation."""
+        """Get a list of the columns in the given Relation. """
         raise NotImplementedException(
             '`get_columns_in_relation` is not implemented for this adapter!'
         )

diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py
@@ -394,6 +394,7 @@ class NodeConfig(BaseConfig):
                        CompareBehavior.Exclude),
     )
     full_refresh: Optional[bool] = None
+    on_schema_change: str = 'ignore'
 
     @classmethod
     def __pre_deserialize__(cls, data):

diff --git a/core/dbt/include/global_project/macros/adapters/common.sql b/core/dbt/include/global_project/macros/adapters/common.sql
@@ -311,3 +311,35 @@
   {{ config.set('sql_header', caller()) }}
 {%- endmacro %}
 
+{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
+  {{ return(adapter.dispatch('alter_relation_add_remove_columns')(relation, add_columns, remove_columns)) }}
+{% endmacro %}
+
+{% macro default__alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
+
+  {% set sql -%}
+
+      alter {{ relation.type }} {{ relation }}
+          {% if add_columns %}
+             add 
+          {% endif %} 
+          {% for column in add_columns %}
+            column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
+          {% endfor %}
+
+          {{ ', ' if add_columns and remove_columns }}
+
+          {% if remove_columns %}
+            drop 
+            {% for column in remove_columns %}
+              column {{ column.name }}{{ ',' if not loop.last }}
+            {% endfor %}
+          {% endif %}
+
+  {%- endset %}
+
+  {{ return(run_query(sql)) }}
+
+{% endmacro %}
+
+{% endmacro %}
diff --git a/core/dbt/include/global_project/macros/materializations/helpers.sql b/core/dbt/include/global_project/macros/materializations/helpers.sql
@@ -72,3 +72,5 @@
   {% endif %}
   {% do return(config_full_refresh) %}
 {% endmacro %}
+
+
diff --git a/core/dbt/include/global_project/macros/materializations/incremental/helpers.sql b/core/dbt/include/global_project/macros/materializations/incremental/helpers.sql
@@ -1,5 +1,6 @@
 
 {% macro incremental_upsert(tmp_relation, target_relation, unique_key=none, statement_name="main") %}
+
     {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}
     {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
 

diff --git a/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql b/core/dbt/include/global_project/macros/materializations/incremental/incremental.sql
@@ -5,19 +5,29 @@
 
   {% set target_relation = this.incorporate(type='table') %}
   {% set existing_relation = load_relation(this) %}
+  {%- set full_refresh_mode = (should_full_refresh()) -%}
+
+  {% set on_schema_change = incremental_validate_on_schema_change(config.get('on_schema_change'), default='ignore') %}
 
   {{ run_hooks(pre_hooks, inside_transaction=False) }}
 
   -- `BEGIN` happens here:
   {{ run_hooks(pre_hooks, inside_transaction=True) }}
 
   {% set to_drop = [] %}
+
+  {# -- first check whether we want to full refresh for source view or config reasons #}
+  {% set trigger_full_refresh = (full_refresh_mode or existing_relation.is_view) %}
+  {% do log('full refresh mode: %s' % trigger_full_refresh) %}
+
   {% if existing_relation is none %}
       {% set build_sql = create_table_as(False, target_relation, sql) %}
-  {% elif existing_relation.is_view or should_full_refresh() %}
+
+  {% elif trigger_full_refresh %}
       {#-- Make sure the backup doesn't exist so we don't encounter issues with the rename below #}
+      {% do log('running full refresh procedure', info=true) %}
       {% set tmp_identifier = model['name'] + '__dbt_tmp' %}
-      {% set backup_identifier = model['name'] + "__dbt_backup" %}
+      {% set backup_identifier = model['name'] + '__dbt_backup' %}
 
       {% set intermediate_relation = existing_relation.incorporate(path={"identifier": tmp_identifier}) %}
       {% set backup_relation = existing_relation.incorporate(path={"identifier": backup_identifier}) %}
@@ -28,13 +38,32 @@
       {% set build_sql = create_table_as(False, intermediate_relation, sql) %}
       {% set need_swap = true %}
       {% do to_drop.append(backup_relation) %}
+
   {% else %}
-      {% set tmp_relation = make_temp_relation(target_relation) %}
-      {% do run_query(create_table_as(True, tmp_relation, sql)) %}
+    {% set tmp_relation = make_temp_relation(target_relation) %}
+    {% do run_query(create_table_as(True, tmp_relation, sql)) %}
+
+    {% if on_schema_change != 'ignore' %}
+      {% set schema_changed = check_for_schema_changes(tmp_relation, target_relation) %}
+      {% do log('schema changed: %s' % schema_changed, info=true) %}
+      {% if schema_changed %}
+        {% do process_schema_changes(on_schema_change, tmp_relation, existing_relation) %}
+        {% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}
+
+      {% else %}
+        {% do adapter.expand_target_column_types(
+             from_relation=tmp_relation,
+             to_relation=target_relation) %}
+        {% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}
+      {% endif %}
+
+    {% else %}
       {% do adapter.expand_target_column_types(
              from_relation=tmp_relation,
              to_relation=target_relation) %}
       {% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}
+    {% endif %}
+
   {% endif %}
 
   {% call statement("main") %}

diff --git a/core/dbt/include/global_project/macros/materializations/incremental/on_schema_change.sql b/core/dbt/include/global_project/macros/materializations/incremental/on_schema_change.sql
@@ -0,0 +1,138 @@
+{% macro incremental_validate_on_schema_change(on_schema_change, default='ignore') %}
+
+   {% if on_schema_change not in ['sync_all_columns', 'append_new_columns', 'fail', 'ignore'] %}
+
+     {% set log_message = 'invalid value for on_schema_change (%s) specified. Setting default value of %s.' % (on_schema_change, default_value) %}
+     {% do log(log_message, info=true) %}
+
+     {{ return(default) }}
+
+   {% else %}
+     {{ return(on_schema_change) }}
+
+   {% endif %}
+
+{% endmacro %}
+
+{% macro get_column_names(columns) %}
+
+  {% set result = [] %}
+
+  {% for col in columns %}
+    {{ result.append(col.column) }}
+  {% endfor %}
+
+  {{ return(result) }}
+
+{% endmacro %}
+
+{% macro diff_arrays(source_array, target_array) %}
+
+  {% set result = [] %}
+  {%- for elem in source_array -%}
+     {% if elem not in target_array %}
+
+       {{ result.append(elem) }}
+
+     {% endif %}
+
+  {%- endfor -%}
+
+  {{ return(result) }}
+
+{% endmacro %}
+
+{% macro diff_columns(source_columns, target_columns) %}
+
+  {% set result = [] %}
+  {% set source_names = get_column_names(source_columns) %}
+  {% set target_names = get_column_names(target_columns) %}
+
+   {# --check whether the name attribute exists in the target, but dont worry about data type differences #}
+   {%- for col in source_columns -%} 
+     {%- if col.column not in target_names -%}
+      {{ result.append(col) }}
+      {%- endif -%}
+   {%- endfor -%}
+
+  {{ return(result) }}
+
+{% endmacro %}
+
+{% macro check_for_schema_changes(source_relation, target_relation) %}
+
+  {% set schema_changed = False %}
+
+  {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%}
+  {%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}
+  {%- set source_not_in_target = diff_columns(source_columns, target_columns) -%}
+  {%- set target_not_in_source = diff_columns(target_columns, source_columns) -%}
+
+  {% if source_not_in_target != [] %}
+    {% set schema_changed = True %}
+  {% elif target_not_in_source != [] %}
+    {% set schema_changed = True %}
+  {% endif %}
+
+  {{ return(schema_changed) }}
+
+{% endmacro %}
+
+{% macro sync_schemas(source_relation, target_relation, on_schema_change='append_new_columns') %}
+
+  {%- set source_columns = adapter.get_columns_in_relation(source_relation) -%}
+  {%- set target_columns = adapter.get_columns_in_relation(target_relation) -%}
+  {%- set add_to_target_arr = diff_columns(source_columns, target_columns) -%}
+  {%- set remove_from_target_arr = diff_columns(target_columns, source_columns) -%}
+
+  -- Validates on_schema_change config vs. whether there are column differences
+  {% if on_schema_change=='append_new_columns' and add_to_target_arr == [] %}
+
+    {{ 
+        exceptions.raise_compiler_error('append_new_columns was set, but no new columns to append. 
+              This can occur when columns are removed from the source dataset unintentionally.
+              Review the schemas in the source and target relations, and consider re-running with the --full-refresh option.') 
+    }}
+
+  {% endif %}
+
+  {%- if on_schema_change == 'append_new_columns' -%}
+   {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr) -%}
+  {% elif on_schema_change == 'sync_all_columns' %}
+   {%- do alter_relation_add_remove_columns(target_relation, add_to_target_arr, remove_from_target_arr) -%}
+  {% endif %}
+
+  {{ 
+      return(
+             {
+              'columns_added': add_to_target_arr,
+              'columns_removed': remove_from_target_arr
+             }
+          )
+  }}
+
+{% endmacro %}
+
+{% macro process_schema_changes(on_schema_change, source_relation, target_relation) %}
+
+    {% if on_schema_change=='fail' %}
+
+      {{ 
+        exceptions.raise_compiler_error("The source and target schemas on this incremental model are out of sync!
+             You can specify one of ['fail', 'ignore', 'append_new_columns', 'sync_all_columns'] in the on_schema_change config to control this behavior.
+             Please re-run the incremental model with full_refresh set to True to update the target schema.
+             Alternatively, you can update the schema manually and re-run the process.") 
+      }}
+
+    {# unless we ignore, run the sync operation per the config #}
+    {% else %}
+
+      {% set schema_changes = sync_schemas(source_relation, target_relation, on_schema_change) %}
+      {% set columns_added = schema_changes['columns_added'] %}
+      {% set columns_removed = schema_changes['columns_removed'] %}
+      {% do log('columns added: ' + columns_added|join(', '), info=true) %}
+      {% do log('columns removed: ' + columns_removed|join(', '), info=true) %}
+
+    {% endif %}
+
+{% endmacro %}
diff --git a/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql b/plugins/bigquery/dbt/include/bigquery/macros/adapters.sql
@@ -128,6 +128,37 @@
   {% do adapter.rename_relation(from_relation, to_relation) %}
 {% endmacro %}
 
+{% macro alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
+  {{ return(adapter.dispatch('alter_relation_add_remove_columns')(relation, add_columns, remove_columns)) }}
+{% endmacro %}
+
+{% macro bigquery__alter_relation_add_remove_columns(relation, add_columns = none, remove_columns = none) -%}
+
+  {% set sql -%}
+
+      alter {{ relation.type }} {{ relation }}
+          {% if add_columns %}
+
+            {% for column in add_columns %}
+              add column {{ column.name }} {{ column.data_type }}{{ ',' if not loop.last }}
+            {% endfor %}
+          {% endif %}
+
+          {{ ', ' if add_columns and remove_columns }}
+
+          {% if remove_columns %}
+            {% for column in remove_columns %}
+              drop column {{ column.name }}{{ ',' if not loop.last }}
+            {% endfor %}
+          {% endif %}
+
+  {%- endset %}
+
+  {{ return(run_query(sql)) }}
+
+{% endmacro %}
+
+
 {% macro bigquery__alter_column_type(relation, column_name, new_column_type) -%}
   {#
     Changing a column's data type using a query requires you to scan the entire table.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -72,3 +72,5 @@
		{% endif %}
		{% do return(config_full_refresh) %}
		{% endmacro %}