From e74d8c9f4ae3234c5f6d9466e9e6015c3f8b2fbb Mon Sep 17 00:00:00 2001 From: Jacek Date: Thu, 18 May 2023 17:03:35 +0200 Subject: [PATCH 1/7] Add support for Vertica database --- macros/vertica__create_constraints.sql | 339 +++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 macros/vertica__create_constraints.sql diff --git a/macros/vertica__create_constraints.sql b/macros/vertica__create_constraints.sql new file mode 100644 index 0000000..1900da3 --- /dev/null +++ b/macros/vertica__create_constraints.sql @@ -0,0 +1,339 @@ +{# Vertica specific implementation to create a primary key #} +{%- macro vertica__create_primary_key(table_relation, column_names, verify_permissions, quote_columns=false, constraint_name=none, lookup_cache=none) -%} + {%- set constraint_name = (constraint_name or table_relation.identifier ~ "_" ~ column_names|join('_') ~ "_PK") | upper -%} + {%- set columns_csv = dbt_constraints.get_quoted_column_csv(column_names, quote_columns) -%} + + {#- Check that the table does not already have this PK/UK -#} + {%- if not dbt_constraints.unique_constraint_exists(table_relation, column_names, lookup_cache) -%} + + {%- if dbt_constraints.have_ownership_priv(table_relation, verify_permissions, lookup_cache) -%} + + {%- set query -%} + ALTER TABLE {{table_relation}} ADD CONSTRAINT {{constraint_name}} PRIMARY KEY ( {{columns_csv}} ) + {%- endset -%} + {%- do log("Creating primary key: " ~ constraint_name, info=true) -%} + {%- do run_query(query) -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.unique_keys.update({table_relation: {constraint_name: column_names} }) -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because of insufficient privileges: " ~ table_relation, info=true) -%} + {%- endif -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because PK/UK already exists: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {%- endif -%} +{%- endmacro -%} + + + +{# Vertica specific implementation to create a unique key #} +{%- macro vertica__create_unique_key(table_relation, column_names, verify_permissions, quote_columns=false, constraint_name=none, lookup_cache=none) -%} + {%- set constraint_name = (constraint_name or table_relation.identifier ~ "_" ~ column_names|join('_') ~ "_UK") | upper -%} + {%- set columns_csv = dbt_constraints.get_quoted_column_csv(column_names, quote_columns) -%} + + {#- Check that the table does not already have this PK/UK -#} + {%- if not dbt_constraints.unique_constraint_exists(table_relation, column_names, lookup_cache) -%} + + {%- if dbt_constraints.have_ownership_priv(table_relation, verify_permissions, lookup_cache) -%} + + {%- set query -%} + ALTER TABLE {{table_relation}} ADD CONSTRAINT {{constraint_name}} UNIQUE ( {{columns_csv}} ) + {%- endset -%} + {%- do log("Creating unique key: " ~ constraint_name, info=true) -%} + {%- do run_query(query) -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.unique_keys.update({table_relation: {constraint_name: column_names} }) -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because of insufficient privileges: " ~ table_relation, info=true) -%} + {%- endif -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because PK/UK already exists: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {%- endif -%} +{%- endmacro -%} + + + +{# Vertica specific implementation to create a foreign key #} +{%- macro vertica__create_foreign_key(pk_table_relation, pk_column_names, fk_table_relation, fk_column_names, verify_permissions, quote_columns, constraint_name, lookup_cache) -%} + {%- set constraint_name = (constraint_name or fk_table_relation.identifier ~ "_" ~ fk_column_names|join('_') ~ "_FK") | upper -%} + {%- set fk_columns_csv = dbt_constraints.get_quoted_column_csv(fk_column_names, quote_columns) -%} + {%- set pk_columns_csv = dbt_constraints.get_quoted_column_csv(pk_column_names, quote_columns) -%} + {#- Check that the PK table has a PK or UK -#} + {%- if dbt_constraints.unique_constraint_exists(pk_table_relation, pk_column_names, lookup_cache) -%} + {#- Check if the table already has this foreign key -#} + {%- if not dbt_constraints.foreign_key_exists(fk_table_relation, fk_column_names, lookup_cache) -%} + + {%- if dbt_constraints.have_ownership_priv(fk_table_relation, verify_permissions, lookup_cache) and dbt_constraints.have_references_priv(pk_table_relation, verify_permissions, lookup_cache) -%} + + {%- set query -%} + ALTER TABLE {{fk_table_relation}} ADD CONSTRAINT {{constraint_name}} FOREIGN KEY ( {{fk_columns_csv}} ) REFERENCES {{pk_table_relation}} ( {{pk_columns_csv}} ) + {%- endset -%} + {%- do log("Creating foreign key: " ~ constraint_name ~ " referencing " ~ pk_table_relation.identifier ~ " " ~ pk_column_names, info=true) -%} + {%- do run_query(query) -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.foreign_keys.update({fk_table_relation: {constraint_name: fk_column_names} }) -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because of insufficient privileges: " ~ fk_table_relation ~ " referencing " ~ pk_table_relation, info=true) -%} + {%- endif -%} + + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because FK already exists: " ~ fk_table_relation ~ " " ~ fk_column_names, info=false) -%} + {%- endif -%} + {%- else -%} + {%- do log("Skipping " ~ constraint_name ~ " because a PK/UK was not found on the PK table: " ~ pk_table_relation ~ " " ~ pk_column_names, info=true) -%} + {%- endif -%} +{%- endmacro -%} + + + +{# Vertica specific implementation to create a not null constraint #} +{%- macro vertica__create_not_null(table_relation, column_names, verify_permissions, quote_columns, lookup_cache) -%} + + {%- set existing_not_null_col = lookup_cache.not_null_col[table_relation] -%} + + {%- set columns_to_change = [] -%} + {%- for column_name in column_names if column_name|upper not in existing_not_null_col -%} + {%- do columns_to_change.append(column_name) -%} + {%- do existing_not_null_col.append(column_name) -%} + {%- endfor -%} + {%- if columns_to_change|count > 0 -%} + {%- set columns_list = dbt_constraints.get_quoted_column_list(columns_to_change, quote_columns) -%} + + {%- if dbt_constraints.have_ownership_priv(table_relation, verify_permissions, lookup_cache) -%} + {%- set modify_statements= [] -%} + {%- for column in columns_list -%} + {%- set modify_statements = modify_statements.append( "COLUMN " ~ column ~ " SET NOT NULL" ) -%} + {%- endfor -%} + {%- set modify_statement_csv = modify_statements | join(", ") -%} + {%- set query -%} + ALTER TABLE {{table_relation}} ALTER {{ modify_statement_csv }}; + {%- endset -%} + {%- do log("Creating not null constraint for: " ~ columns_to_change | join(", ") ~ " in " ~ table_relation, info=true) -%} + {%- do run_query(query) -%} + {#- Add this constraint to the lookup cache -#} + {%- set constraint_key = table_relation.identifier ~ "_" ~ columns_to_change|join('_') ~ "_NN" -%} + {%- do lookup_cache.not_null_col.update({table_relation: existing_not_null_col }) -%} + {%- else -%} + {%- do log("Skipping not null constraint for " ~ columns_to_change | join(", ") ~ " in " ~ table_relation ~ " because of insufficient privileges: " ~ table_relation, info=true) -%} + {%- endif -%} + {%- else -%} + {%- do log("Skipping not null constraint for " ~ column_names | join(", ") ~ " in " ~ table_relation ~ " because all columns are already not null", info=false) -%} + {%- endif -%} +{%- endmacro -%} + + + +{#- This macro is used in create macros to avoid duplicate PK/UK constraints + and to skip FK where no PK/UK constraint exists on the parent table -#} +{%- macro vertica__unique_constraint_exists(table_relation, column_names, lookup_cache) -%} + + {#- Check if we can find this constraint in the lookup cache -#} + {%- if table_relation in lookup_cache.unique_keys -%} + {%- set cached_unique_keys = lookup_cache.unique_keys[table_relation] -%} + {%- for cached_columns in cached_unique_keys.values() -%} + {%- if dbt_constraints.column_list_matches(cached_columns, column_names ) -%} + {%- do log("Found UK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(true) }} + {%- endif -%} + {% endfor %} + {%- endif -%} + + {%- set lookup_query -%} + select constraint_name, column_name + from constraint_columns + where + table_schema ilike '{{table_relation.schema}}' + and table_name ilike '{{table_relation.identifier}}' + and constraint_type in ('u') + order by constraint_name + {%- endset -%} + {%- set constraint_list = run_query(lookup_query) -%} + {%- if constraint_list.columns["column_name"].values() | count > 0 -%} + {%- for constraint in constraint_list.group_by("constraint_name") -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.unique_keys.update({table_relation: {constraint.key_name: constraint.columns["column_name"].values()} }) -%} + {% endfor %} + {%- for constraint in constraint_list.group_by("constraint_name") -%} + {%- if dbt_constraints.column_list_matches(constraint.columns["column_name"].values(), column_names ) -%} + {%- do log("Found UK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(true) }} + {%- endif -%} + {% endfor %} + {%- endif -%} + + {%- set lookup_query -%} + select constraint_name, column_name + from constraint_columns + where + table_schema ilike '{{table_relation.schema}}' + and table_name ilike '{{table_relation.identifier}}' + and constraint_type in ('p') + order by constraint_name + {%- endset -%} + {%- set constraint_list = run_query(lookup_query) -%} + {%- if constraint_list.columns["column_name"].values() | count > 0 -%} + {%- for constraint in constraint_list.group_by("constraint_name") -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.unique_keys.update({table_relation: {constraint.key_name: constraint.columns["column_name"].values()} }) -%} + {% endfor %} + {%- for constraint in constraint_list.group_by("constraint_name") -%} + {%- if dbt_constraints.column_list_matches(constraint.columns["column_name"].values(), column_names ) -%} + {%- do log("Found PK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(true) }} + {%- endif -%} + {% endfor %} + {%- endif -%} + + {#- If we get this far then the table does not have either constraint -#} + {%- do log("No PK/UK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(false) }} +{%- endmacro -%} + + + +{#- This macro is used in create macros to avoid duplicate FK constraints -#} +{%- macro vertica__foreign_key_exists(table_relation, column_names, lookup_cache) -%} + + {#- Check if we can find this constraint in the lookup cache -#} + {%- if table_relation in lookup_cache.foreign_keys -%} + {%- set cached_foreign_keys = lookup_cache.foreign_keys[table_relation] -%} + {%- for cached_columns in cached_foreign_keys.values() -%} + {%- if dbt_constraints.column_list_matches(cached_columns, column_names ) -%} + {%- do log("Found FK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(true) }} + {%- endif -%} + {% endfor %} + {%- endif -%} + + {%- set lookup_query -%} + select constraint_name as fk_name, column_name as fk_column_name + from constraint_columns + where + table_schema ilike '{{table_relation.schema}}' + and table_name ilike '{{table_relation.identifier}}' + and constraint_type in ('f') + order by constraint_name + {%- endset -%} + {%- set constraint_list = run_query(lookup_query) -%} + {%- if constraint_list.columns["fk_column_name"].values() | count > 0 -%} + {%- for constraint in constraint_list.group_by("fk_name") -%} + {#- Add this constraint to the lookup cache -#} + {%- do lookup_cache.foreign_keys.update({table_relation: {constraint.key_name: constraint.columns["fk_column_name"].values()} }) -%} + {% endfor %} + {%- for constraint in constraint_list.group_by("fk_name") -%} + {%- if dbt_constraints.column_list_matches(constraint.columns["fk_column_name"].values(), column_names ) -%} + {%- do log("Found FK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(true) }} + {%- endif -%} + {% endfor %} + {%- endif -%} + + {#- If we get this far then the table does not have this constraint -#} + {%- do log("No FK key: " ~ table_relation ~ " " ~ column_names, info=false) -%} + {{ return(false) }} +{%- endmacro -%} + + + +{%- macro vertica__have_references_priv(table_relation, verify_permissions, lookup_cache) -%} + {%- if verify_permissions is sameas true -%} + + {%- set table_privileges = vertica__lookup_table_privileges(table_relation, lookup_cache) -%} + {%- if "REFERENCES" in table_privileges or "OWNERSHIP" in table_privileges -%} + {{ return(true) }} + {%- else -%} + {{ return(false) }} + {%- endif -%} + + {%- else -%} + {{ return(true) }} + {%- endif -%} +{%- endmacro -%} + + + +{%- macro vertica__have_ownership_priv(table_relation, verify_permissions, lookup_cache) -%} + {%- if verify_permissions is sameas true -%} + + {%- set table_privileges = vertica__lookup_table_privileges(table_relation, lookup_cache) -%} + {%- if "OWNERSHIP" in table_privileges -%} + {{ return(true) }} + {%- else -%} + {{ return(false) }} + {%- endif -%} + + {%- else -%} + {{ return(true) }} + {%- endif -%} +{%- endmacro -%} + + + +{%- macro vertica__lookup_table_privileges(table_relation, lookup_cache) -%} + + {%- if table_relation.database not in lookup_cache.table_privileges -%} + {%- set lookup_query -%} + select distinct + upper(tp.table_schema) as "table_schema", + upper(tp.table_name) as "table_name", + tp.privilege_type as "privilege_type" + from {{table_relation.database}}.information_schema.table_privileges tp + where is_role_in_session(tp.grantee) + and tp.privilege_type in ('OWNERSHIP', 'REFERENCES') + {%- endset -%} + {%- do log("Caching privileges for database: " ~ table_relation.database, info=false) -%} + {%- set privilege_list = run_query(lookup_query) -%} + {%- do lookup_cache.table_privileges.update({ table_relation.database: privilege_list }) -%} + {%- endif -%} + + {%- set tab_priv_list = [] -%} + {%- set schema_name = table_relation.schema|upper -%} + {%- set table_name = table_relation.identifier|upper -%} + {%- for row in lookup_cache.table_privileges[table_relation.database].rows -%} + {%- if row["table_schema"] == schema_name and row["table_name"] == table_name -%} + {%- do tab_priv_list.append(row["privilege_type"]) -%} + {%- endif -%} + {%- endfor -%} + {{ return(tab_priv_list) }} + +{%- endmacro -%} + + + +{%- macro vertica__lookup_table_columns(table_relation, lookup_cache) -%} + + {%- if table_relation not in lookup_cache.table_columns -%} + {%- set lookup_query -%} + select column_name, is_nullable + from columns + where table_schema ilike '{{table_relation.schema}}' + and table_name ilike '{{table_relation.identifier}}' + {%- endset -%} + {%- set results = run_query(lookup_query) -%} + {%- set not_null_col = [] -%} + {%- set upper_column_list = [] -%} + {%- for row in results.rows -%} + {%- do upper_column_list.append(row["column_name"]|upper) -%} + {%- if row['is_nullable'] == False -%} + {%- do not_null_col.append(row["column_name"]|upper) -%} + {%- endif -%} + {%- endfor -%} + {%- do lookup_cache.table_columns.update({ table_relation: upper_column_list }) -%} + + {%- do lookup_cache.not_null_col.update({ table_relation: not_null_col }) -%} + {%- endif -%} + {{ return(lookup_cache.table_columns[table_relation]) }} + +{%- endmacro -%} + + + +{%- macro vertica__get_create_index_sql(table_relation, lookup_cache) -%} + + {%- do log("Skipping creation of indexes, they are not supported by Vertica", info=true) -%} + +{%- endmacro -%} From 8572cd6996763477989011df3c33fce836d86f12 Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Fri, 28 Jul 2023 17:27:29 -0400 Subject: [PATCH 2/7] Excluding error_if and warn_if tests --- integration_tests/models/schema.yml | 2 +- macros/create_constraints.sql | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index 491de1c..ac642bd 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -225,7 +225,7 @@ models: to: ref('supplier') field: s_suppkey tests: - - dbt_constraints.unique_key: + - dbt_constraints.primary_key: column_names: - ps_partkey - ps_suppkey diff --git a/macros/create_constraints.sql b/macros/create_constraints.sql index f175036..7b7e9dd 100644 --- a/macros/create_constraints.sql +++ b/macros/create_constraints.sql @@ -172,6 +172,9 @@ and res.node.config.materialized == "test" and res.node.test_metadata and res.node.test_metadata.name is in( constraint_types ) + and res.failures == 0 + and res.node.config.error_if == '!= 0' + and res.node.config.warn_if == '!= 0' and res.node.config.where is none -%} {%- set test_model = res.node -%} From f2c4329f5f3ec6ae7b338d7dcbbac5069c5da2b3 Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Mon, 31 Jul 2023 11:55:12 -0400 Subject: [PATCH 3/7] Added always_create_constraint config option --- integration_tests/models/schema.yml | 10 ++++++++++ macros/create_constraints.sql | 12 ++++++------ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index ac642bd..12df991 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -147,14 +147,24 @@ models: - name: o_orderkey_seq description: "duplicate seq column to test UK" tests: + # This constraint should be skipped because it has failures - dbt_constraints.primary_key: column_name: o_orderkey config: severity: warn + # This constraint should be still generated because always_create_constraint=true + - dbt_constraints.unique_key: + column_name: o_orderkey + config: + warn_if: ">= 5000" + error_if: ">= 10000" + always_create_constraint: true + # This constraint should be still generated because always_create_constraint=true - dbt_constraints.unique_key: column_name: o_orderkey_seq config: severity: warn + always_create_constraint: true - name: fact_order_line_missing_orders description: "Test that we do not create FK on failed tests" diff --git a/macros/create_constraints.sql b/macros/create_constraints.sql index 7b7e9dd..faf6299 100644 --- a/macros/create_constraints.sql +++ b/macros/create_constraints.sql @@ -168,14 +168,14 @@ {#- Loop through the results and find all tests that passed and match the constraint_types -#} {#- Issue #2: added condition that the where config must be empty -#} {%- for res in results - if res.status == "pass" - and res.node.config.materialized == "test" + if res.node.config.materialized == "test" + and res.status in ("pass", "warn") and res.node.test_metadata and res.node.test_metadata.name is in( constraint_types ) - and res.failures == 0 - and res.node.config.error_if == '!= 0' - and res.node.config.warn_if == '!= 0' - and res.node.config.where is none -%} + and ( res.failures == 0 or + res.node.config.get("always_create_constraint", false) ) + and ( res.node.config.where is none or + res.node.config.get("always_create_constraint", false) ) -%} {%- set test_model = res.node -%} {%- set test_parameters = test_model.test_metadata.kwargs -%} From 52f8773a749f8900fbdd653d6bc08931321a9859 Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Mon, 31 Jul 2023 12:18:20 -0400 Subject: [PATCH 4/7] Initially comment out feature --- integration_tests/models/schema.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/integration_tests/models/schema.yml b/integration_tests/models/schema.yml index 12df991..8e498a2 100644 --- a/integration_tests/models/schema.yml +++ b/integration_tests/models/schema.yml @@ -152,19 +152,19 @@ models: column_name: o_orderkey config: severity: warn - # This constraint should be still generated because always_create_constraint=true + # This constraint can be generated if you uncomment always_create_constraint=true - dbt_constraints.unique_key: column_name: o_orderkey config: warn_if: ">= 5000" error_if: ">= 10000" - always_create_constraint: true - # This constraint should be still generated because always_create_constraint=true + # always_create_constraint: true + # This constraint can be generated if you uncomment always_create_constraint=true - dbt_constraints.unique_key: column_name: o_orderkey_seq config: severity: warn - always_create_constraint: true + # always_create_constraint: true - name: fact_order_line_missing_orders description: "Test that we do not create FK on failed tests" From 52c1f276bfd0240f101c29e33dfb8da7aa61d70f Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Mon, 31 Jul 2023 12:42:32 -0400 Subject: [PATCH 5/7] Updated docs for always_create_constraint --- README.md | 47 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ddf4aec..fd24392 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # dbt Constraints Package -This package generates database constraints based on the tests in a dbt project. It is currently compatible with Snowflake, PostgreSQL, and Oracle only. +This package generates database constraints based on the tests in a dbt project. It is currently compatible with Snowflake, PostgreSQL, Oracle, Redshift, and Vertica only. ## Why data engineers should add referential integrity constraints @@ -117,13 +117,13 @@ packages: Generally, if you don't meet a requirement, tests are still executed but the constraint is skipped rather than producing an error. -- All models involved in a constraint must be materialized as table, incremental, or snapshot. +- All models involved in a constraint must be materialized as table, incremental, snapshot, or seed. - If source constraints are enabled, the source must be a table. You must also have the `OWNERSHIP` table privilege to add a constraint. For foreign keys you also need the `REFERENCES` privilege on the parent table with the primary or unique key. The package will identify when you lack these privileges on Snowflake and PostgreSQL. Oracle does not provide an easy way to look up your effective privileges so it has an exception handler and will display Oracle's error messages. - All columns on constraints must be individual column names, not expressions. You can reference columns on a model that come from an expression. -- Constraints are not created for failed tests +- Constraints are not created for failed tests. See how to get around this using severity and `config: always_create_constraint: true` in the next section. - `primary_key`, `unique_key`, and `foreign_key` tests are considered first and duplicate constraints are skipped. One exception is that you will get an error if you add two different `primary_key` tests to the same model. @@ -133,7 +133,46 @@ Generally, if you don't meet a requirement, tests are still executed but the con - The `foreign_key` test will ignore any rows with a null column, even if only one of two columns in a compound key is null. If you also want to ensure FK columns are not null, you should add standard `not_null` tests to your model which will add not null constraints to the table. -- Referential constraints must apply to all the rows in a table so any tests with a `config: where:` property will be skipped when creating constraints. +- Referential constraints must apply to all the rows in a table so any tests with a `config: where:` property will be skipped when creating constraints. See how to disable this rule using `config: always_create_constraint: true` in the next section. + + +## Advanced: `config: always_create_constraint: true` property +There is an advanced option to force a constraint to be generated when there is a `config: where:` property or if the constraint has a threshold. The `config: always_create_constraint: true` property will override those exclusions. When this setting is in effect, you can create constraints even when you have excluded some records or have a number of failures below a threshold. If your test has a status of 'failed', it will still be skipped. Please see [dbt's documentation on how to set a threshold for failures](https://docs.getdbt.com/reference/resource-configs/severity). + +__Caveat Emptor:__ +* You will get an error if you try to force constraints to be generated that are enforced by your database. On Snowflake that is only a not_null constraint but on databases like Oracle, all the generated constraints are enforced. +* This feature could cause unexpected query results on Snowflake due to [join elimination](https://docs.snowflake.com/en/user-guide/join-elimination). + +This is an example using the feature: +```yml + - name: dim_duplicate_orders + description: "Test that we do not try to create PK/UK on failed tests" + columns: + - name: o_orderkey + description: "The primary key for this table" + - name: o_orderkey_seq + description: "duplicate seq column to test UK" + tests: + # This constraint should be skipped because it has failures + - dbt_constraints.primary_key: + column_name: o_orderkey + config: + severity: warn + # This constraint should be still generated because always_create_constraint=true + - dbt_constraints.unique_key: + column_name: o_orderkey + config: + warn_if: ">= 5000" + error_if: ">= 10000" + always_create_constraint: true + # This constraint should be still generated because always_create_constraint=true + - dbt_constraints.unique_key: + column_name: o_orderkey_seq + config: + severity: warn + always_create_constraint: true +``` + ## Primary Maintainers From 156d923df21013c5bbc5310c770090d4f53ccc34 Mon Sep 17 00:00:00 2001 From: Dan Flippo Date: Mon, 31 Jul 2023 13:27:13 -0400 Subject: [PATCH 6/7] Added differences from model contracts --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index fd24392..ec73eb7 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,10 @@ This package generates database constraints based on the tests in a dbt project. It is currently compatible with Snowflake, PostgreSQL, Oracle, Redshift, and Vertica only. +## How the dbt Constraints Package differs from dbt's Model Contracts feature + +This package focuses on automatically generating constraints base on the tests already in a user's dbt project. In most cases, merely adding the dbt Constraints package is all that is needed to generate constraints. dbt's recent [model contracts feature](https://docs.getdbt.com/docs/collaborate/govern/model-contracts) allows users to explicitly document constraints for models in yml. This package and the core feature are 100% compatible with one another and the dbt Constraints package will skip generating constraints already created by a model constract. However, the dbt Constraints package will also generate constraints for any tests that are not documented as model contracts. As decribed in the next section dbt Constraints is also designed to provide join elimination on Snowflake. + ## Why data engineers should add referential integrity constraints The primary reason to add constraints to your database tables is that many tools including [DBeaver](https://dbeaver.io) and [Oracle SQL Developer Data Modeler](https://community.snowflake.com/s/article/How-To-Customizing-Oracle-SQL-Developer-Data-Modeler-SDDM-to-Support-Snowflake-Variant) can correctly reverse-engineer data model diagrams if there are primary keys, unique keys, and foreign keys on tables. Most BI tools will also add joins automatically between tables when you import tables that have foreign keys. This can both save time and avoid mistakes. From 6ae918a23afc56d7bd87cba6de1dfccc97f02b51 Mon Sep 17 00:00:00 2001 From: Andrew Coleman Date: Tue, 15 Aug 2023 10:07:35 -0400 Subject: [PATCH 7/7] Update README.md Minor grammar/spelling fixes --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ec73eb7..b110c35 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This package generates database constraints based on the tests in a dbt project. ## How the dbt Constraints Package differs from dbt's Model Contracts feature -This package focuses on automatically generating constraints base on the tests already in a user's dbt project. In most cases, merely adding the dbt Constraints package is all that is needed to generate constraints. dbt's recent [model contracts feature](https://docs.getdbt.com/docs/collaborate/govern/model-contracts) allows users to explicitly document constraints for models in yml. This package and the core feature are 100% compatible with one another and the dbt Constraints package will skip generating constraints already created by a model constract. However, the dbt Constraints package will also generate constraints for any tests that are not documented as model contracts. As decribed in the next section dbt Constraints is also designed to provide join elimination on Snowflake. +This package focuses on automatically generating constraints based on the tests already in a user's dbt project. In most cases, merely adding the dbt Constraints package is all that is needed to generate constraints. dbt's recent [model contracts feature](https://docs.getdbt.com/docs/collaborate/govern/model-contracts) allows users to explicitly document constraints for models in yml. This package and the core feature are 100% compatible with one another and the dbt Constraints package will skip generating constraints already created by a model contract. However, the dbt Constraints package will also generate constraints for any tests that are not documented as model contracts. As described in the next section, dbt Constraints is also designed to provide join elimination on Snowflake. ## Why data engineers should add referential integrity constraints @@ -14,7 +14,7 @@ In addition, although Snowflake doesn't enforce most constraints, the [query opt Many databases including [Snowflake](https://docs.snowflake.com/en/user-guide/join-elimination.html), PostgreSQL, Oracle, SQL Server, MySQL, and DB2 can use referential integrity constraints to perform "[Join Elimination](https://blog.jooq.org/join-elimination-an-essential-optimiser-feature-for-advanced-sql-usage/)" to remove tables from an execution plan. This commonly occurs when you query a subset of columns from a view and some of the tables in the view are unnecessary. In addition, on databases that do not support join elimination, some [BI and visualization tools will also rewrite their queries](https://docs.snowflake.com/en/user-guide/table-considerations.html#referential-integrity-constraints) based on constraint information, producing the same effect. -Finally, although most columnar databases including Snowflake do not use or need indexes, most row-oriented databases including PostgreSQL and Oracle require indexes on their primary key columns in order to perform efficient joins between tables. Typically a primary key or unique key constraint is enforced on such databases using such indexes. Having dbt create the unique indexes automatically can slightly reduce the degree of performance tuning necessary for row-oriented databases. Row-oriented databases frequently also need indexes on foreign key columns but [that is something best added manually](https://docs.getdbt.com/reference/resource-configs/postgres-configs#indexes). +Finally, although most columnar databases including Snowflake do not use or need indexes, most row-oriented databases including PostgreSQL and Oracle require indexes on their primary key columns in order to perform efficient joins between tables. A primary key or unique key constraint is typically enforced on databases using such indexes. Having dbt create the unique indexes automatically can slightly reduce the degree of performance tuning necessary for row-oriented databases. Row-oriented databases frequently also need indexes on foreign key columns but [that is something best added manually](https://docs.getdbt.com/reference/resource-configs/postgres-configs#indexes). ## Please note @@ -123,7 +123,7 @@ Generally, if you don't meet a requirement, tests are still executed but the con - All models involved in a constraint must be materialized as table, incremental, snapshot, or seed. -- If source constraints are enabled, the source must be a table. You must also have the `OWNERSHIP` table privilege to add a constraint. For foreign keys you also need the `REFERENCES` privilege on the parent table with the primary or unique key. The package will identify when you lack these privileges on Snowflake and PostgreSQL. Oracle does not provide an easy way to look up your effective privileges so it has an exception handler and will display Oracle's error messages. +- If source constraints are enabled, the source must be a table. You must also have the `OWNERSHIP` table privilege to add a constraint. For foreign keys you also need the `REFERENCES` privilege on the parent table with the primary or unique key. The package will identify when you lack these privileges on Snowflake and PostgreSQL. Oracle does not provide an easy way to look up your effective privileges so it has an exception handler and will display Oracle's error messages. - All columns on constraints must be individual column names, not expressions. You can reference columns on a model that come from an expression.