Skip to content

Commit

Permalink
removed tag in each single model and updated the README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
inazr committed Feb 9, 2025
1 parent b600288 commit 76c2f5f
Show file tree
Hide file tree
Showing 8 changed files with 152 additions and 13 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
Welcome to the snap_join macro project!

[![dbt - >=1.7.0](https://img.shields.io/static/v1?label=dbt&message=>%3D1.7.0&color=%23FF694B&logo=dbt)](https://getdbt.com)

tested databases:

[![db - BigQuery](https://img.shields.io/static/v1?label=db&message=BigQuery&color=%23669DF6&logo=googlebigquery)](https://cloud.google.com/bigquery)
[![db - duckdb](https://img.shields.io/static/v1?label=db&message=duckdb&color=%23FFF000&logo=duckdb)](https://motherduck.com)
[![db - Snowflake](https://img.shields.io/static/v1?label=db&message=Snowflake&color=%2329B5E8&logo=Snowflake)](https://www.snowflake.com)
Expand All @@ -23,10 +26,11 @@ Each record in a snapshot table has a valid_from and a valid_to value.

### Features of this macro:

- You can join as many source snapshots into a single output snapshot as you like.
- There are no naming restrictions for any column. e.g. the column that represents the valid_from point in time can be named `valid_from` in one of the tables and `dbt_valid_from` in any of the other source tables.
- The resulting snapshot will be reduced to the minimum number of records needed to represent the state of any given column set. If you join two tables the number of records in the output snapshot depends on the selected column set.
- The column name in each source table needs to be unique. But any column name can appear in any table.
- You can join as many source snapshots into a single output snapshot as you like.
- There are no naming restrictions for any column. e.g. the column that represents the valid_from point in time can be named `valid_from` in one of the tables and `dbt_valid_from` in any of the other source tables.


### How to use the macro?:

Expand All @@ -36,7 +40,7 @@ This is the model code from one of the examples:
{{ snap_join(['stg_seed_a', 'stg_seed_b','stg_seed_c'],
['valid_from', 'valid_from', 'dbt_valid_from'],
['valid_to', 'valid_to', 'dbt_valid_to'],
['primary_key', 'primary_key', 'primary_key_with_another_name'],
['primary_key', 'any_column', 'primary_key_with_another_name'],
[['dim_a', 'dim_b', 'dim_c'], ['dim_d', 'dim_e', 'dim_f'], ['dim_a', 'dim_h']]
) }}
Expand Down
120 changes: 120 additions & 0 deletions macros/snap_join_inc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
{% macro snap_join_inc(list_of_models, valid_from_columns, valid_to_columns, list_of_join_keys, list_of_columns) %}

{% set column_list = [] %}
{% set column_list_alias = [] %}
{% set column_list_labels = [] %}
{% set column_list_with_join_key = [] %}

{%- for model in list_of_models -%}
{%- for column in list_of_columns[loop.index0] -%}
{{ column_list.append(model+"."+column) or "" }}
{{ column_list_alias.append(model+"."+column+" AS "+model+"_"+column) or "" }}
{{ column_list_labels.append(model+"_"+column) or "" }}
{{ column_list_with_join_key.append(model+"."+column) or "" }}
{%- endfor -%}
{% endfor -%}

{{ column_list_with_join_key.append('join_key') or "" }}

{% if target.type == 'bigquery' %}
{% set exclude_except = 'EXCEPT' %}
{% elif target.type == 'duckdb' %}
{% set exclude_except = 'EXCLUDE' %}
{% else %}
{% set exclude_except = 'EXCEPT' %}
{% endif %}

WITH
all_distinct_valid_from AS (

{% for model in list_of_models %}

SELECT
{{ model }}.{{ valid_from_columns[loop.index0] }} AS valid_from,
{{ model }}.{{ list_of_join_keys[loop.index0] }} AS join_key
FROM
{{ ref(model) }}

{% if is_incremental() %}
WHERE 1=1
AND ({{ model }}.{{ valid_to_columns[loop.index0] }} IS NULL OR {{ model }}.{{ valid_to_columns[loop.index0] }} > (SELECT MAX(dbt_valid_to) FROM {{ this }}))
{% endif -%}

{% if not loop.last %}
UNION DISTINCT
{% endif -%}

{% endfor %}
)

, valid_from_to AS (

SELECT
all_distinct_valid_from.valid_from,
LEAD(all_distinct_valid_from.valid_from) OVER (PARTITION BY all_distinct_valid_from.join_key ORDER BY all_distinct_valid_from.valid_from) AS valid_to,
all_distinct_valid_from.join_key
FROM
all_distinct_valid_from

WHERE 1=1
AND valid_from <='2024-03-31'

{% if is_incremental() %}
QUALIFY 1=1
AND (LEAD(all_distinct_valid_from.valid_from) OVER (PARTITION BY all_distinct_valid_from.join_key ORDER BY all_distinct_valid_from.valid_from) <= '2024-03-31'
OR LEAD(all_distinct_valid_from.valid_from) OVER (PARTITION BY all_distinct_valid_from.join_key ORDER BY all_distinct_valid_from.valid_from) IS NULL)

{% endif -%}
)

, joining_data AS (

SELECT
valid_from_to.valid_from,
valid_from_to.valid_to,
valid_from_to.join_key,
{{ column_list_alias | join(",\n") }},
CASE WHEN {{ dbt_utils.generate_surrogate_key(column_list_with_join_key) }} = LAG({{ dbt_utils.generate_surrogate_key(column_list_with_join_key) }}) OVER (PARTITION BY valid_from_to.join_key ORDER BY valid_from_to.valid_from ASC)
THEN NULL
ELSE {{ dbt_utils.generate_surrogate_key(column_list_with_join_key) }}
END AS _surrogate_key
FROM
valid_from_to

{% for model in list_of_models -%}

LEFT JOIN
{{ ref(model) }}
ON valid_from_to.join_key = {{ model }}.{{ list_of_join_keys[loop.index0] }}
AND valid_from_to.valid_from >= {{ model }}.{{ valid_from_columns[loop.index0] }}
AND COALESCE(valid_from_to.valid_to, '8888-12-31') <= COALESCE({{ model }}.{{ valid_to_columns[loop.index0] }}, '9999-12-31')

{% endfor %}
)

, surrogate_to_primary_key AS (

SELECT
joining_data.* {{ exclude_except }}(_surrogate_key),
TO_HEX(MD5(STRING_AGG(joining_data._surrogate_key) OVER (PARTITION BY joining_data.join_key ORDER BY joining_data.valid_from ASC))) AS _surrogate_key
FROM
joining_data

)

, snap_join AS (

SELECT
MIN(surrogate_to_primary_key.valid_from) AS dbt_valid_from,
NULLIF(MAX(COALESCE(surrogate_to_primary_key.valid_to, '9999-12-31')), '9999-12-31') AS dbt_valid_to,
surrogate_to_primary_key.* {{ exclude_except }}(valid_from, valid_to),
FROM
surrogate_to_primary_key

GROUP BY
surrogate_to_primary_key.join_key,
{{ column_list_labels | join(",\n") }},
surrogate_to_primary_key._surrogate_key

)
{% endmacro %}
2 changes: 0 additions & 2 deletions models/example/core_snap__a_b.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{{ config(tag='snap_join_example') }}

{{ snap_join(['stg_seed_a', 'stg_seed_b'],
['valid_from', 'valid_from'],
['valid_to', 'valid_to'],
Expand Down
2 changes: 0 additions & 2 deletions models/example/core_snap__a_b_c.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{{ config(tag='snap_join_example') }}

{{ snap_join(['stg_seed_a', 'stg_seed_b','stg_seed_c'],
['valid_from', 'valid_from', 'dbt_valid_from'],
['valid_to', 'valid_to', 'dbt_valid_to'],
Expand Down
25 changes: 25 additions & 0 deletions models/example/core_snap__a_b_c_inc.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{{
config(
materialized='incremental',
incremental_strategy='merge',
unique_key=['dbt_valid_to', 'join_key'],
cluster_by=['dbt_valid_to', 'join_key']

)

}}


{{ snap_join_inc(['stg_seed_a', 'stg_seed_b','stg_seed_c'],
['valid_from', 'valid_from', 'dbt_valid_from'],
['valid_to', 'valid_to', 'dbt_valid_to'],
['primary_key', 'primary_key', 'primary_key_with_another_name'],
[['dim_a', 'dim_b', 'dim_c'], ['dim_d', 'dim_e', 'dim_f'], ['dim_a', 'dim_h']]
) }}

SELECT
*
FROM
snap_join

WHERE 1=1
2 changes: 0 additions & 2 deletions models/example/stg_seed_a.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{{ config(tag='snap_join_example') }}

SELECT
*
FROM
Expand Down
2 changes: 0 additions & 2 deletions models/example/stg_seed_b.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{{ config(tag='snap_join_example') }}

SELECT
*
FROM
Expand Down
2 changes: 0 additions & 2 deletions models/example/stg_seed_c.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
{{ config(tag='snap_join_example') }}

SELECT
*
FROM
Expand Down

0 comments on commit 76c2f5f

Please sign in to comment.