Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CT 2196, CT2121 constraints column order #7161

Merged
merged 12 commits into from
Mar 19, 2023
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20230313-135917.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Make model contracts agnostic to ordering
time: 2023-03-13T13:59:17.255368-04:00
custom:
Author: gshank
Issue: 6975 7064
2 changes: 1 addition & 1 deletion core/dbt/clients/jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def get_environment(
native: bool = False,
) -> jinja2.Environment:
args: Dict[str, List[Union[str, Type[jinja2.ext.Extension]]]] = {
"extensions": ["jinja2.ext.do"]
"extensions": ["jinja2.ext.do", "jinja2.ext.loopcontrols"]
}

if capture_macros:
Expand Down
6 changes: 6 additions & 0 deletions core/dbt/context/exceptions_jinja.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
PropertyYMLError,
NotImplementedError,
RelationWrongTypeError,
ContractError,
ColumnTypeMissingError,
)

Expand Down Expand Up @@ -66,6 +67,10 @@ def raise_compiler_error(msg, node=None) -> NoReturn:
raise CompilationError(msg, node)


def raise_contract_error(yaml_columns, sql_columns) -> NoReturn:
raise ContractError(yaml_columns, sql_columns)


def raise_database_error(msg, node=None) -> NoReturn:
raise DbtDatabaseError(msg, node)

Expand Down Expand Up @@ -124,6 +129,7 @@ def column_type_missing(column_names) -> NoReturn:
raise_invalid_property_yml_version,
raise_not_implemented,
relation_wrong_type,
raise_contract_error,
column_type_missing,
]
}
Expand Down
17 changes: 17 additions & 0 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2124,6 +2124,23 @@ def get_message(self) -> str:
return msg


class ContractError(CompilationError):
def __init__(self, yaml_columns, sql_columns):
self.yaml_columns = yaml_columns
self.sql_columns = sql_columns
super().__init__(msg=self.get_message())

def get_message(self) -> str:
msg = (
"Contracts are enabled for this model. "
"Please ensure the name, data_type, and number of columns in your `yml` file "
"match the columns in your SQL file.\n"
f"Schema File Columns: {self.yaml_columns}\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be useful to sort these both alphabetically (message only) so that it's easier for the user to spot the difference.

f"SQL File Columns: {self.sql_columns}"
)
return msg


# not modifying these since rpc should be deprecated soon
class UnknownAsyncIDException(Exception):
CODE = 10012
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,60 @@

{#
Compares the column schema provided by a model's sql file to the column schema provided by a model's schema file.
If any differences in name, data_type or order of columns exist between the two schemas, raises a compiler error
If any differences in name, data_type or number of columns exist between the two schemas, raises a compiler error
#}
{% macro assert_columns_equivalent(sql) %}
{#-- Obtain the column schema provided by sql file. #}
{%- set sql_file_provided_columns = get_column_schema_from_query(sql) -%}
{#--Obtain the column schema provided by the schema file by generating an 'empty schema' query from the model's columns. #}
{%- set schema_file_provided_columns = get_column_schema_from_query(get_empty_schema_sql(model['columns'])) -%}

{%- set sql_file_provided_columns_formatted = format_columns(sql_file_provided_columns) -%}
{%- set schema_file_provided_columns_formatted = format_columns(schema_file_provided_columns) -%}
{#-- create dictionaries with name and formatted data type and strings for exception #}
{%- set sql_columns = format_columns(sql_file_provided_columns) -%}
{%- set string_sql_columns = stringify_formatted_columns(sql_columns) -%}
{%- set yaml_columns = format_columns(schema_file_provided_columns) -%}
{%- set string_yaml_columns = stringify_formatted_columns(yaml_columns) -%}

{%- if sql_file_provided_columns_formatted != schema_file_provided_columns_formatted -%}
{%- do exceptions.raise_compiler_error('Please ensure the name, data_type, order, and number of columns in your `yml` file match the columns in your SQL file.\nSchema File Columns: ' ~ (schema_file_provided_columns_formatted|trim) ~ '\n\nSQL File Columns: ' ~ (sql_file_provided_columns_formatted|trim) ~ ' ' ) %}
{%- if sql_columns|length != yaml_columns|length -%}
{%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%}
{%- endif -%}

{%- for sql_col in sql_columns -%}
{%- set yaml_col = [] -%}
{%- for this_col in yaml_columns -%}
{%- if this_col['name'] == sql_col['name'] -%}
{%- do yaml_col.append(this_col) -%}
{%- break -%}
{%- endif -%}
{%- endfor -%}
{%- if not yaml_col -%}
{#-- Column with name not found in yaml #}
{%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%}
{%- endif -%}
{%- if sql_col['formatted'] != yaml_col[0]['formatted'] -%}
{#-- Column data types don't match #}
{%- do exceptions.raise_contract_error(string_yaml_columns, string_sql_columns) -%}
{%- endif -%}
{%- endfor -%}

{% endmacro %}

{% macro format_columns(columns) %}
{% set formatted_columns = [] %}
{% for column in columns %}
{%- set formatted_column = adapter.dispatch('format_column', 'dbt')(column) -%}
{%- do formatted_columns.append(formatted_column) -%}
{%- do formatted_columns.append({'name': column.name, 'formatted': formatted_column}) -%}
{% endfor %}
{{ return(formatted_columns|join(', ')) }}
{%- endmacro -%}
{{ return(formatted_columns) }}
{% endmacro %}

{% macro stringify_formatted_columns(formatted_columns) %}
{% set column_strings = [] %}
{% for column in formatted_columns %}
{% do column_strings.append(column['formatted']) %}
{% endfor %}
{{ return(column_strings|join(', ')) }}
{% endmacro %}

{% macro default__format_column(column) -%}
{{ return(column.column.lower() ~ " " ~ column.dtype) }}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,26 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking through what this might look like across all potential adapters, and if we need to add contract-related items in the future. With that context, I have the following questions:

  1. Do you think this could benefit from becoming a "dispatch" method? The goal of this would be to create a hard divide between contracted models and non-contracted models.
  2. Do we need to include the column select statement in default__get_select_subquery if we already validated get_assert_columns_equivalent? We validate that the number of columns are the same, and the names are the same. So I think the subquery already limits to just the columns that we want.
  3. It looks like get_assert_columns_equivalent might have been renamed assert_columns_equivalent.

With my assumptions (not necessarily true of course):

{% macro default__create_table_as(temporary, relation, sql) -%}
  {% if config.get('contract', False) %}
    {{ default__create_table_as_with_contract(temporary, relation, sql }}
  {% else %}
    {{ default__create_table_as_without_contract(temporary, relation, sql }}
  {% endif %}
{% endmacro %}

{% macro default__create_table_as_with_contract(temporary, relation, sql) %}
  {{ get_assert_columns_equivalent(sql) }}
  
  create {% if temporary: -%}temporary{%- endif %} table
    {{ relation.include(database=(not temporary), schema=(not temporary)) }}
    {{ get_columns_spec_ddl() }}
  as ({{ sql }})

{% endmacro %}

{% macro default__create_table_as_without_contract(temporary, relation, sql) %}
  create {% if temporary: -%}temporary{%- endif %} table
    {{ relation.include(database=(not temporary), schema=(not temporary)) }}
  as ({{ sql }})
{% endmacro %}

This would maintain backwards compatibility because we're keeping the macro create_table_as, which is what would have been overridden. And if we need to update only one of these in the future, it isolates the updates, instead of impacting all "create table" workflows. I'm open to feedback and I'm just trying to communicate a thought here. Please let me know what you think of these recommendations.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need the get_select_subquery in order to put the columns in the right order. We removed validating the order because we added the subquery. As far as splitting out the macro into two, probably @jtcohen6 and @MichelleArk should weigh in on that.

create {% if temporary: -%}temporary{%- endif %} table
{{ relation.include(database=(not temporary), schema=(not temporary)) }}
{% if config.get('contract', False) %}
{{ get_assert_columns_equivalent(sql) }}
{{ get_columns_spec_ddl() }}
{% endif %}
{% if config.get('contract', False) %}
{{ get_assert_columns_equivalent(sql) }}
{{ get_columns_spec_ddl() }}
{%- set sql = get_select_subquery(sql) %}
{% endif %}
as (
{{ sql }}
);
{%- endmacro %}

{% macro get_select_subquery(sql) %}
{{ return(adapter.dispatch('get_select_subquery', 'dbt')(sql)) }}
{% endmacro %}

{% macro default__get_select_subquery(sql) %}
select
{% for column in model['columns'] %}
{{ column }}{{ ", " if not loop.last }}
{% endfor %}
from (
{{ sql }}
) as model_subq
{%- endmacro %}
7 changes: 4 additions & 3 deletions plugins/postgres/dbt/include/postgres/macros/adapters.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
{{ get_assert_columns_equivalent(sql) }}
{{ get_columns_spec_ddl() }} ;
insert into {{ relation }} {{ get_column_names() }}
{% else %}
as
{%- set sql = get_select_subquery(sql) %}
{% else %}
as
{% endif %}
(
(
{{ sql }}
);
{%- endmacro %}
Expand Down
30 changes: 15 additions & 15 deletions tests/adapter/dbt/tests/adapter/constraints/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
}}

select
1 as id,
'blue' as color,
cast('2019-01-01' as date) as date_day
1 as id,
'2019-01-01' as date_day
"""

my_model_wrong_order_sql = """
Expand All @@ -21,7 +21,7 @@
select
'blue' as color,
1 as id,
cast('2019-01-01' as date) as date_day
'2019-01-01' as date_day
"""

my_model_wrong_name_sql = """
Expand All @@ -32,9 +32,9 @@
}}

select
1 as error,
'blue' as color,
cast('2019-01-01' as date) as date_day
1 as error,
'2019-01-01' as date_day
"""

my_model_data_type_sql = """
Expand All @@ -60,7 +60,7 @@
cast(null as {{ dbt.type_int() }}) as id,
-- change the color as well (to test rollback)
'red' as color,
cast('2019-01-01' as date) as date_day
'2019-01-01' as date_day
"""

model_schema_yml = """
Expand All @@ -81,7 +81,7 @@
- name: color
data_type: text
- name: date_day
data_type: date
data_type: text
- name: my_model_error
config:
contract: true
Expand All @@ -96,7 +96,7 @@
- name: color
data_type: text
- name: date_day
data_type: date
data_type: text
- name: my_model_wrong_order
config:
contract: true
Expand All @@ -111,7 +111,7 @@
- name: color
data_type: text
- name: date_day
data_type: date
data_type: text
- name: my_model_wrong_name
config:
contract: true
Expand All @@ -126,7 +126,7 @@
- name: color
data_type: text
- name: date_day
data_type: date
data_type: text
"""

model_data_type_schema_yml = """
Expand All @@ -150,7 +150,7 @@
select
1 as id,
'blue' as color,
cast('2019-01-01' as date) as date_day
'2019-01-01' as date_day
"""

my_model_view_wrong_order_sql = """
Expand All @@ -163,7 +163,7 @@
select
'blue' as color,
1 as id,
cast('2019-01-01' as date) as date_day
'2019-01-01' as date_day
"""

my_model_view_wrong_name_sql = """
Expand All @@ -174,9 +174,9 @@
}}

select
1 as error,
'blue' as color,
cast('2019-01-01' as date) as date_day
1 as error,
'2019-01-01' as date_day
"""

my_model_view_with_nulls_sql = """
Expand All @@ -191,5 +191,5 @@
cast(null as {{ dbt.type_int() }}) as id,
-- change the color as well (to test rollback)
'red' as color,
cast('2019-01-01' as date) as date_day
'2019-01-01' as date_day
"""
Loading