Skip to content

Commit

Permalink
Fix invalid generated column names in conversion events (#327)
Browse files Browse the repository at this point in the history
* Add valid_column_name macro

* Update models using conversion_events

* Add tests

* Update project name in tests to call macros

* Add new macro to unit tests

* Fix invalid escape character

* Fix page test by harmonizing event name

* Fix jinja range error in test

---------

Co-authored-by: Adam Ribaudo <adam.ribaudo@velir.com>
  • Loading branch information
jerome-laurent-pro and adamribaudo-velir authored Jun 5, 2024
1 parent 3ef51b6 commit df85449
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 15 deletions.
14 changes: 14 additions & 0 deletions macros/valid_column_name.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{% macro valid_column_name(column_name) %}
{% set re = modules.re %}
{% set pattern = '[^a-zA-Z0-9_]' %}
{# a column name can't contain a non alphanumeric or _ character #}
{% set cleaned_name = re.sub(pattern, '_', column_name|string) %}

{% if re.match('^\\d', cleaned_name) %}
{# a column name can't start by a number #}
{{ return("_" ~ cleaned_name) }}
{% else %}
{{ return(cleaned_name) }}
{% endif %}

{% endmacro %}
3 changes: 2 additions & 1 deletion models/marts/core/fct_ga4__client_keys.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ select
count(distinct session_key) as count_sessions
{% if var('conversion_events', false) %}
{% for ce in var('conversion_events',[]) %}
, sum(count_{{ce}}) as count_{{ce}}
{% set clean_ce = ga4.valid_column_name(ce) %}
, sum(count_{{clean_ce}}) as count_{{clean_ce}}
{% endfor %}
{% endif %}
from {{ref('fct_ga4__sessions')}}
Expand Down
3 changes: 2 additions & 1 deletion models/marts/core/fct_ga4__sessions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ select
min(session_number) as session_number
{% if var('conversion_events', false) %}
{% for ce in var('conversion_events',[]) %}
, sum({{ce}}_count) as count_{{ce}}
{% set clean_ce = ga4.valid_column_name(ce) %}
, sum({{clean_ce}}_count) as count_{{clean_ce}}
{% endfor %}
{% endif %}
from {{ref('fct_ga4__sessions_daily')}}
Expand Down
3 changes: 2 additions & 1 deletion models/marts/core/fct_ga4__user_ids.sql
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ select
sum(count_sessions) as count_sessions
{% if var('conversion_events', false) %}
{% for ce in var('conversion_events',[]) %}
, sum(count_{{ce}}) as count_{{ce}}
{% set clean_ce = ga4.valid_column_name(ce) %}
, sum(count_{{clean_ce}}) as count_{{clean_ce}}
{% endfor %}
{% endif %}
from user_id_mapped
Expand Down
2 changes: 1 addition & 1 deletion models/staging/stg_ga4__page_conversions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
select
page_key
{% for ce in var('conversion_events',[]) %}
, countif(event_name = '{{ce}}') as {{ce}}_count
, countif(event_name = '{{ce}}') as {{ga4.valid_column_name(ce)}}_count
{% endfor %}
from {{ref('stg_ga4__events')}}
group by 1
2 changes: 1 addition & 1 deletion models/staging/stg_ga4__session_conversions_daily.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ with event_counts as (
session_partition_key,
min(event_date_dt) as session_partition_date -- The date of this session partition
{% for ce in var('conversion_events',[]) %}
, countif(event_name = '{{ce}}') as {{ce}}_count
, countif(event_name = '{{ce}}') as {{ga4.valid_column_name(ce)}}_count
{% endfor %}
from {{ref('stg_ga4__events')}}
where 1=1
Expand Down
58 changes: 53 additions & 5 deletions unit_tests/test_stg_ga4__page_conversions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from dbt.tests.util import read_file,check_relations_equal,run_dbt
from dbt.tests.util import check_relations_equal, read_file, run_dbt

# Define mocks via CSV (seeds) or SQL (models)
mock_stg_ga4__events_csv = """event_name,page_key
Expand All @@ -8,14 +8,33 @@
page_view,B
""".lstrip()

mock_stg_ga4__nonstandard_events_csv = """event_name,page_key
page-view,A
page-view,A
page-view,B
""".lstrip()

expected_csv = """page_key,page_view_count
A,2
B,1
""".lstrip()

actual = read_file('../models/staging/stg_ga4__page_conversions.sql')
actual = read_file("../models/staging/stg_ga4__page_conversions.sql")


class TestPageConversions:
# Update project name to ga4 so we can call macros with ga4.macro_name
@pytest.fixture(scope="class")
def project_config_update(self):
return {"name": "ga4"}

# everything that goes in the "macros"
@pytest.fixture(scope="class")
def macros(self):
return {
"valid_column_name.sql": read_file("../macros/valid_column_name.sql"),
}

class TestPageConversions():
# everything that goes in the "seeds" directory (= CSV format)
@pytest.fixture(scope="class")
def seeds(self):
Expand All @@ -30,8 +49,37 @@ def models(self):
return {
"actual.sql": actual,
}

def test_mock_run_and_check(self, project):
run_dbt(["build", "--vars", "conversion_events: ['page_view']"])
#breakpoint()
# breakpoint()
check_relations_equal(project.adapter, ["actual", "expected"])


class TestPageConversionsNonStandardEventName:
# everything that goes in the "seeds" directory (= CSV format)
@pytest.fixture(scope="class")
def seeds(self):
return {
"stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv,
"expected.csv": expected_csv,
}

# everything that goes in the "macros"
@pytest.fixture(scope="class")
def macros(self):
return {
"valid_column_name.sql": read_file("../macros/valid_column_name.sql"),
}

# everything that goes in the "models" directory (= SQL)
@pytest.fixture(scope="class")
def models(self):
return {
"actual.sql": actual,
}

def test_mock_run_and_check(self, project):
run_dbt(["build", "--vars", "conversion_events: ['page-view']"])
# breakpoint()
check_relations_equal(project.adapter, ["actual", "expected"])
61 changes: 56 additions & 5 deletions unit_tests/test_stg_ga4__session_conversions_daily.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from dbt.tests.util import read_file,check_relations_equal,run_dbt
from dbt.tests.util import check_relations_equal, read_file, run_dbt

# Define mocks via CSV (seeds) or SQL (models)
mock_stg_ga4__events_csv = """session_key,session_partition_key,event_name,event_date_dt
Expand All @@ -11,16 +11,31 @@
A,A2022-01-02,my_conversion,2022-01-02
""".lstrip()

mock_stg_ga4__nonstandard_events_csv = """session_key,session_partition_key,event_name,event_date_dt
A,A2022-01-01,page_view,2022-01-01
A,A2022-01-01,my-conversion,2022-01-01
A,A2022-01-01,my-conversion,2022-01-01
B,B2022-01-01,my-conversion,2022-01-01
C,C2022-01-01,some_other_event,2022-01-01
A,A2022-01-02,my-conversion,2022-01-02
""".lstrip()

expected_csv = """session_key,session_partition_key,session_partition_date,my_conversion_count
A,A2022-01-01,2022-01-01,2
B,B2022-01-01,2022-01-01,1
C,C2022-01-01,2022-01-01,0
A,A2022-01-02,2022-01-02,1
""".lstrip()

actual = read_file('../models/staging/stg_ga4__session_conversions_daily.sql')
actual = read_file("../models/staging/stg_ga4__session_conversions_daily.sql")


class TestUsersFirstLastEvents:
# Update project name to ga4 so we can call macros with ga4.macro_name
@pytest.fixture(scope="class")
def project_config_update(self):
return {"name": "ga4", "vars": {"static_incremental_days": 3}}

class TestUsersFirstLastEvents():
# everything that goes in the "seeds" directory (= CSV format)
@pytest.fixture(scope="class")
def seeds(self):
Expand All @@ -29,14 +44,50 @@ def seeds(self):
"expected.csv": expected_csv,
}

# everything that goes in the "macros"
@pytest.fixture(scope="class")
def macros(self):
return {
"valid_column_name.sql": read_file("../macros/valid_column_name.sql"),
}

# everything that goes in the "models" directory (= SQL)
@pytest.fixture(scope="class")
def models(self):
return {
"actual.sql": actual,
}

def test_mock_run_and_check(self, project):
run_dbt(["build", "--vars", "conversion_events: ['my_conversion']"])
#breakpoint()
# breakpoint()
check_relations_equal(project.adapter, ["actual", "expected"])


class TestUsersNonStandardEventName:
# everything that goes in the "seeds" directory (= CSV format)
@pytest.fixture(scope="class")
def seeds(self):
return {
"stg_ga4__events.csv": mock_stg_ga4__nonstandard_events_csv,
"expected.csv": expected_csv,
}

# everything that goes in the "macros"
@pytest.fixture(scope="class")
def macros(self):
return {
"valid_column_name.sql": read_file("../macros/valid_column_name.sql"),
}

# everything that goes in the "models" directory (= SQL)
@pytest.fixture(scope="class")
def models(self):
return {
"actual.sql": actual,
}

def test_mock_run_and_check(self, project):
run_dbt(["build", "--vars", "conversion_events: ['my-conversion']"])
# breakpoint()
check_relations_equal(project.adapter, ["actual", "expected"])

0 comments on commit df85449

Please sign in to comment.