From bc80c6585c63aacec7a6d6247a87a93fe0beea29 Mon Sep 17 00:00:00 2001 From: avishalom Date: Fri, 10 Apr 2020 15:38:38 -0400 Subject: [PATCH 1/4] Adding some logic to handle unpivoting boolean columns --- .../data/sql/data_unpivot_bool.csv | 4 +++ .../data/sql/data_unpivot_bool_expected.csv | 10 ++++++ integration_tests/models/sql/schema.yml | 5 +++ .../models/sql/test_unpivot_bool.sql | 33 +++++++++++++++++++ macros/sql/unpivot.sql | 7 +++- 5 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 integration_tests/data/sql/data_unpivot_bool.csv create mode 100644 integration_tests/data/sql/data_unpivot_bool_expected.csv create mode 100644 integration_tests/models/sql/test_unpivot_bool.sql diff --git a/integration_tests/data/sql/data_unpivot_bool.csv b/integration_tests/data/sql/data_unpivot_bool.csv new file mode 100644 index 00000000..e1562a68 --- /dev/null +++ b/integration_tests/data/sql/data_unpivot_bool.csv @@ -0,0 +1,4 @@ +customer_id,created_at,status,segment,is_updated +123,2017-01-01,active,tier 1,TRUE +234,2017-02-01,active,tier 3,FALSE +567,2017-03-01,churned,tier 2,TRUE diff --git a/integration_tests/data/sql/data_unpivot_bool_expected.csv b/integration_tests/data/sql/data_unpivot_bool_expected.csv new file mode 100644 index 00000000..302ba6b0 --- /dev/null +++ b/integration_tests/data/sql/data_unpivot_bool_expected.csv @@ -0,0 +1,10 @@ +customer_id,created_at,prop,val +123,"2017-01-01","segment","tier 1" +123,"2017-01-01","status","active" +123,"2017-01-01","is_updated","true" +234,"2017-02-01","segment","tier 3" +234,"2017-02-01","status","active" +234,"2017-02-01","is_updated","false" +567,"2017-03-01","status","churned" +567,"2017-03-01","is_updated","true" +567,"2017-03-01","segment","tier 2" diff --git a/integration_tests/models/sql/schema.yml b/integration_tests/models/sql/schema.yml index dcf0b8ad..07ce68ba 100644 --- a/integration_tests/models/sql/schema.yml +++ b/integration_tests/models/sql/schema.yml @@ -96,6 +96,11 @@ models: - dbt_utils.equality: compare_model: ref('data_unpivot_expected') + - name: test_unpivot_bool + tests: + - dbt_utils.equality: + compare_model: ref('data_unpivot_bool_expected') + - name: test_star tests: - dbt_utils.equality: diff --git a/integration_tests/models/sql/test_unpivot_bool.sql b/integration_tests/models/sql/test_unpivot_bool.sql new file mode 100644 index 00000000..9a7161f5 --- /dev/null +++ b/integration_tests/models/sql/test_unpivot_bool.sql @@ -0,0 +1,33 @@ + +-- snowflake messes with these tests pretty badly since the +-- output of the macro considers the casing of the source +-- table columns. Using some hacks here to get this to work, +-- but we should consider lowercasing the unpivot macro output +-- at some point in the future for consistency + +{% if target.name == 'snowflake' %} + {% set exclude = ['CUSTOMER_ID', 'CREATED_AT'] %} +{% else %} + {% set exclude = ['customer_id', 'created_at'] %} +{% endif %} + + +select + customer_id, + created_at, + case + when '{{ target.name }}' = 'snowflake' then lower(prop) + else prop + end as prop, + val + +from ( + {{ dbt_utils.unpivot( + relation=ref('data_unpivot_bool'), + cast_to=dbt_utils.type_string(), + exclude=exclude, + remove='is_updated', + field_name='prop', + value_name='val' + ) }} +) as sbq diff --git a/macros/sql/unpivot.sql b/macros/sql/unpivot.sql index de1a1b5c..348f8797 100644 --- a/macros/sql/unpivot.sql +++ b/macros/sql/unpivot.sql @@ -58,7 +58,12 @@ Arguments: {%- endfor %} cast('{{ col.column }}' as {{ dbt_utils.type_string() }}) as {{ field_name }}, - cast({{ col.column }} as {{ cast_to }}) as {{ value_name }} + cast( {% if col.data_type == 'boolean' %} + case when {{ col.column }} then 'true' else 'false' end + {% else %} + {{ col.column }} + {% endif %} + as {{ cast_to }}) as {{ value_name }} from {{ relation }} From 7c1082d6552d50338d0108231cbbd0445ee0afad Mon Sep 17 00:00:00 2001 From: avishalom Date: Fri, 10 Apr 2020 16:07:09 -0400 Subject: [PATCH 2/4] Readme & deleted the 'remove' line which does nothing. --- README.md | 1 + integration_tests/models/sql/test_unpivot_bool.sql | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ebe1ddfe..f1aebadb 100644 --- a/README.md +++ b/README.md @@ -663,6 +663,7 @@ Arguments: #### unpivot ([source](macros/sql/unpivot.sql)) This macro "un-pivots" a table from wide format to long format. Functionality is similar to pandas [melt](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.melt.html) function. +Boolean values are replaced with the strings 'true'|'false' Usage: ``` diff --git a/integration_tests/models/sql/test_unpivot_bool.sql b/integration_tests/models/sql/test_unpivot_bool.sql index 9a7161f5..26842c01 100644 --- a/integration_tests/models/sql/test_unpivot_bool.sql +++ b/integration_tests/models/sql/test_unpivot_bool.sql @@ -26,7 +26,6 @@ from ( relation=ref('data_unpivot_bool'), cast_to=dbt_utils.type_string(), exclude=exclude, - remove='is_updated', field_name='prop', value_name='val' ) }} From f67875f655a4184d5ad8c31c1f44183adec1880a Mon Sep 17 00:00:00 2001 From: Claire Carroll <20294432+clrcrl@users.noreply.github.com> Date: Mon, 14 Dec 2020 17:05:04 -0500 Subject: [PATCH 3/4] Finish off upivot boolean logic --- .../data/cross_db/data_cast_bool_to_text | 4 ++++ .../data/sql/data_unpivot_bool.csv | 2 +- .../data/sql/data_unpivot_bool_expected.csv | 18 +++++++++--------- macros/cross_db_utils/cast_bool_to_text.sql | 15 +++++++++++++++ macros/sql/unpivot.sql | 2 +- 5 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 integration_tests/data/cross_db/data_cast_bool_to_text create mode 100644 macros/cross_db_utils/cast_bool_to_text.sql diff --git a/integration_tests/data/cross_db/data_cast_bool_to_text b/integration_tests/data/cross_db/data_cast_bool_to_text new file mode 100644 index 00000000..23090b82 --- /dev/null +++ b/integration_tests/data/cross_db/data_cast_bool_to_text @@ -0,0 +1,4 @@ +id,my_bool +1,true +2,false +3, diff --git a/integration_tests/data/sql/data_unpivot_bool.csv b/integration_tests/data/sql/data_unpivot_bool.csv index e1562a68..51277792 100644 --- a/integration_tests/data/sql/data_unpivot_bool.csv +++ b/integration_tests/data/sql/data_unpivot_bool.csv @@ -1,4 +1,4 @@ customer_id,created_at,status,segment,is_updated 123,2017-01-01,active,tier 1,TRUE 234,2017-02-01,active,tier 3,FALSE -567,2017-03-01,churned,tier 2,TRUE +567,2017-03-01,churned,tier 2, diff --git a/integration_tests/data/sql/data_unpivot_bool_expected.csv b/integration_tests/data/sql/data_unpivot_bool_expected.csv index 302ba6b0..4e688a6b 100644 --- a/integration_tests/data/sql/data_unpivot_bool_expected.csv +++ b/integration_tests/data/sql/data_unpivot_bool_expected.csv @@ -1,10 +1,10 @@ customer_id,created_at,prop,val -123,"2017-01-01","segment","tier 1" -123,"2017-01-01","status","active" -123,"2017-01-01","is_updated","true" -234,"2017-02-01","segment","tier 3" -234,"2017-02-01","status","active" -234,"2017-02-01","is_updated","false" -567,"2017-03-01","status","churned" -567,"2017-03-01","is_updated","true" -567,"2017-03-01","segment","tier 2" +123,2017-01-01,segment,tier 1 +123,2017-01-01,status,active +123,2017-01-01,is_updated,true +234,2017-02-01,segment,tier 3 +234,2017-02-01,status,active +234,2017-02-01,is_updated,false +567,2017-03-01,status,churned +567,2017-03-01,is_updated, +567,2017-03-01,segment,tier 2 diff --git a/macros/cross_db_utils/cast_bool_to_text.sql b/macros/cross_db_utils/cast_bool_to_text.sql new file mode 100644 index 00000000..1953c526 --- /dev/null +++ b/macros/cross_db_utils/cast_bool_to_text.sql @@ -0,0 +1,15 @@ +{% macro cast_bool_to_text(field) %} + {{ adapter.dispatch('cast_bool_to_text', packages = dbt_utils._get_utils_namespaces()) (field) }} +{% endmacro %} + + +{% macro default__cast_bool_to_text(field) %} + cast({{ field }} as text) +{% endmacro %} + +{% macro redshift__cast_bool_to_text(field) %} + case + when {{ field }} is true then 'true' + when {{ field }} is false then 'false' + end::text +{% endmacro %} diff --git a/macros/sql/unpivot.sql b/macros/sql/unpivot.sql index 348f8797..0087cdd2 100644 --- a/macros/sql/unpivot.sql +++ b/macros/sql/unpivot.sql @@ -59,7 +59,7 @@ Arguments: cast('{{ col.column }}' as {{ dbt_utils.type_string() }}) as {{ field_name }}, cast( {% if col.data_type == 'boolean' %} - case when {{ col.column }} then 'true' else 'false' end + {{ dbt_utils.cast_bool_to_text(col.column) }} {% else %} {{ col.column }} {% endif %} From 3f477bb90f81642b385fc8939b22f4b0446d5e3c Mon Sep 17 00:00:00 2001 From: Claire Carroll <20294432+clrcrl@users.noreply.github.com> Date: Mon, 14 Dec 2020 17:10:42 -0500 Subject: [PATCH 4/4] Update changelog [ci skip] --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2efb0d2c..818e7381 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ ## Features * Add new `accepted_range` test ([#276](https://github.com/fishtown-analytics/dbt-utils/pull/276) [@joellabes](https://github.com/joellabes)) + +## Fixes +* Handle booleans gracefully in the unpivot macro ([#305](https://github.com/fishtown-analytics/dbt-utils/pull/305) [@avishalom](https://github.com/avishalom)) + # dbt-utils v0.6.2 ## Fixes