From ac730a0203daf66a47231b7cd3bd1f28cd1c302a Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 27 Feb 2021 13:21:13 +0100 Subject: [PATCH 01/10] add unit argument and apply conversion for kms multiply instead of divide and move logic in core macro rename things a bit update readme --- README.md | 7 +++++-- macros/geo/haversine_distance.sql | 16 +++++++++++----- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index fd665086..f2d88a49 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ Notes: ### Date/Time #### date_spine ([source](macros/datetime/date_spine.sql)) -This macro returns the sql required to build a date spine. The spine will include the `start_date` (if it is aligned to the `datepart`), but it will not include the `end_date`. +This macro returns the sql required to build a date spine. The spine will include the `start_date` (if it is aligned to the `datepart`), but it will not include the `end_date`. **Usage:** @@ -114,9 +114,12 @@ This macro returns the sql required to build a date spine. The spine will includ #### haversine_distance ([source](macros/geo/haversine_distance.sql)) This macro calculates the [haversine distance](http://daynebatten.com/2015/09/latitude-longitude-distance-sql/) between a pair of x/y coordinates. +Optionally takes a `unit` string parameter ('km' or 'mi') which defaults to miles (imperial system). + **Usage:** + ``` -{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=) }} +{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=, unit='mi') }} ``` --- ### Schema Tests diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index ed6c8281..8680e2ee 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -3,17 +3,23 @@ This calculates the distance between two sets of latitude and longitude. The formula is from the following blog post: http://daynebatten.com/2015/09/latitude-longitude-distance-sql/ -The arguments should be float type. +The arguments should be float type. #} -{% macro haversine_distance(lat1,lon1,lat2,lon2) -%} - {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2)) }} +{% macro haversine_distance(lat1,lon1,lat2,lon2,unit='mi') -%} + {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2,unit)) }} {% endmacro %} -{% macro default__haversine_distance(lat1,lon1,lat2,lon2) -%} +{% macro default__haversine_distance(lat1,lon1,lat2,lon2,unit='km') -%} +{# vanilla macro is in miles #} + {% set conversion = '' %} +{% if unit == 'km' %} +{# we multiply miles result to get it in kms #} + {% set conversion = '* 1.60934' %} +{% endif %} 2 * 3961 * asin(sqrt((sin(radians(({{lat2}} - {{lat1}}) / 2))) ^ 2 + cos(radians({{lat1}})) * cos(radians({{lat2}})) * - (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) + (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) {{conversion_rate}} {%- endmacro %} From 85c7d8f338534aaa1331767f6b5b2fbe764af6c1 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 6 Mar 2021 11:04:39 +0100 Subject: [PATCH 02/10] add test data and test for haversine too many curly brackets make params be strings pass strings remove round to see if this was the problem let's try casting round the output field instead add commas cast the macro result directly rename expected to output is it a rounding error swap the units what is going on... throw a log to see what we get remove curlies try computing in a CTE first' add comma I'm sleeping... fixup! I'm sleeping... rename in CTE dont quote? fix macro and separate unit in two tests fix yaml indent fix indentation in schema yaml alias the field clean up fixup! clean up make the macro log stuff remove logging in test macro remove the select star --- .../data/geo/data_haversine_km.csv | 2 ++ .../data/geo/data_haversine_mi.csv | 2 ++ integration_tests/dbt_project.yml | 4 ++-- integration_tests/macros/tests.sql | 1 - integration_tests/models/geo/schema.yml | 13 +++++++++++ .../models/geo/test_haversine_distance_km.sql | 23 +++++++++++++++++++ .../models/geo/test_haversine_distance_mi.sql | 23 +++++++++++++++++++ macros/geo/haversine_distance.sql | 7 +++--- 8 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 integration_tests/data/geo/data_haversine_km.csv create mode 100644 integration_tests/data/geo/data_haversine_mi.csv create mode 100644 integration_tests/models/geo/schema.yml create mode 100644 integration_tests/models/geo/test_haversine_distance_km.sql create mode 100644 integration_tests/models/geo/test_haversine_distance_mi.sql diff --git a/integration_tests/data/geo/data_haversine_km.csv b/integration_tests/data/geo/data_haversine_km.csv new file mode 100644 index 00000000..636442f2 --- /dev/null +++ b/integration_tests/data/geo/data_haversine_km.csv @@ -0,0 +1,2 @@ +lat_1,lon_1,lat_2,lon_2,output +48.864716,2.349014,52.379189,4.899431,430 diff --git a/integration_tests/data/geo/data_haversine_mi.csv b/integration_tests/data/geo/data_haversine_mi.csv new file mode 100644 index 00000000..cc4757f7 --- /dev/null +++ b/integration_tests/data/geo/data_haversine_mi.csv @@ -0,0 +1,2 @@ +lat_1,lon_1,lat_2,lon_2,output +48.864716,2.349014,52.379189,4.899431,267 diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 3a8c4feb..39493443 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -54,8 +54,8 @@ seeds: sql: data_events_20180103: +schema: events - + schema_tests: data_test_sequential_timestamps: +column_types: - my_timestamp: timestamp \ No newline at end of file + my_timestamp: timestamp diff --git a/integration_tests/macros/tests.sql b/integration_tests/macros/tests.sql index 9ca536e4..caeb5d6e 100644 --- a/integration_tests/macros/tests.sql +++ b/integration_tests/macros/tests.sql @@ -1,6 +1,5 @@ {% macro test_assert_equal(model, actual, expected) %} - select count(*) from {{ model }} where {{ actual }} != {{ expected }} {% endmacro %} diff --git a/integration_tests/models/geo/schema.yml b/integration_tests/models/geo/schema.yml new file mode 100644 index 00000000..50dd0241 --- /dev/null +++ b/integration_tests/models/geo/schema.yml @@ -0,0 +1,13 @@ +version: 2 + +models: + - name: test_haversine_distance_km + tests: + - assert_equal: + actual: actual + expected: expected + - name: test_haversine_distance_mi + tests: + - assert_equal: + actual: actual + expected: expected diff --git a/integration_tests/models/geo/test_haversine_distance_km.sql b/integration_tests/models/geo/test_haversine_distance_km.sql new file mode 100644 index 00000000..a00993f0 --- /dev/null +++ b/integration_tests/models/geo/test_haversine_distance_km.sql @@ -0,0 +1,23 @@ +with data as ( + select * from {{ ref('data_haversine_km') }} +), +final as ( + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + unit='km' + ) + }} as numeric + ) as actual + from data +) +select + expected, + round(actual,0) as actual +from final diff --git a/integration_tests/models/geo/test_haversine_distance_mi.sql b/integration_tests/models/geo/test_haversine_distance_mi.sql new file mode 100644 index 00000000..58378d14 --- /dev/null +++ b/integration_tests/models/geo/test_haversine_distance_mi.sql @@ -0,0 +1,23 @@ +with data as ( + select * from {{ ref('data_haversine_mi') }} +), +final as ( + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + unit='mi' + ) + }} as numeric + ) as actual + from data +) +select + expected, + round(actual,0) as actual +from final diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index 8680e2ee..8c1ed9d0 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -10,12 +10,13 @@ The arguments should be float type. {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2,unit)) }} {% endmacro %} -{% macro default__haversine_distance(lat1,lon1,lat2,lon2,unit='km') -%} +{% macro default__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} +{{log(unit, info=true)}} {# vanilla macro is in miles #} - {% set conversion = '' %} + {% set conversion_rate = '' %} {% if unit == 'km' %} {# we multiply miles result to get it in kms #} - {% set conversion = '* 1.60934' %} + {% set conversion_rate = '* 1.60934' %} {% endif %} 2 * 3961 * asin(sqrt((sin(radians(({{lat2}} - {{lat1}}) / 2))) ^ 2 + From 61ac149c9503817120fc43939330474d04f4cae9 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 6 Mar 2021 14:29:05 +0100 Subject: [PATCH 03/10] remove logging in macro --- macros/geo/haversine_distance.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index 8c1ed9d0..d6521901 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -11,7 +11,6 @@ The arguments should be float type. {% endmacro %} {% macro default__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} -{{log(unit, info=true)}} {# vanilla macro is in miles #} {% set conversion_rate = '' %} {% if unit == 'km' %} From a711ff815b5e51f6f17aade41533d50796361fcd Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 6 Mar 2021 14:29:39 +0100 Subject: [PATCH 04/10] fix typo in readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f2d88a49..6c90fe5c 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ Optionally takes a `unit` string parameter ('km' or 'mi') which defaults to mile **Usage:** ``` -{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=, unit='mi') }} +{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=, unit='mi') }} ``` --- ### Schema Tests From d3b435b4842e2d7cafb6e43d38dcf85a6c2ae4ce Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Mon, 8 Mar 2021 19:49:31 +0100 Subject: [PATCH 05/10] add a unioned test case for haversine sin unit --- .../models/geo/test_haversine_distance_mi.sql | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/integration_tests/models/geo/test_haversine_distance_mi.sql b/integration_tests/models/geo/test_haversine_distance_mi.sql index 58378d14..a3c1c469 100644 --- a/integration_tests/models/geo/test_haversine_distance_mi.sql +++ b/integration_tests/models/geo/test_haversine_distance_mi.sql @@ -1,13 +1,13 @@ with data as ( select * from {{ ref('data_haversine_mi') }} -), +), final as ( select output as expected, cast( {{ dbt_utils.haversine_distance( - lat1='lat_1', + lat1='lat_1', lon1='lon_1', lat2='lat_2', lon2='lon_2', @@ -16,6 +16,22 @@ final as ( }} as numeric ) as actual from data + + union all + + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + ) + }} as numeric + ) as actual + from data ) select expected, From 20b3fc4cc148fd1541894722c93ccf922bde35ff Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 14 Mar 2021 10:37:19 +0100 Subject: [PATCH 06/10] add snowflake specific implementation --- macros/geo/haversine_distance.sql | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index d6521901..eae366de 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -23,3 +23,17 @@ The arguments should be float type. (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) {{conversion_rate}} {%- endmacro %} + +{% macro snowflake__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} +{# vanilla macro is in miles #} + {% set conversion_rate = '' %} +{% if unit == 'km' %} +{# we multiply miles result to get it in kms #} + {% set conversion_rate = '* 1.60934' %} +{% endif %} + + 2 * 3961 * asin(sqrt(pow((sin(radians(({{lat2}} - {{lat1}}) / 2))), 2) + + cos(radians({{lat1}})) * cos(radians({{lat2}})) * + pow((sin(radians(({{lon2}} - {{lon1}}) / 2))), 2))) {{conversion_rate}} + +{%- endmacro %} From 9026617e3f4ac76bb442969bc239d510ebfc898f Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 14 Mar 2021 10:49:17 +0100 Subject: [PATCH 07/10] make implementation with pow() default --- macros/geo/haversine_distance.sql | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index eae366de..ecd6756d 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -14,20 +14,6 @@ The arguments should be float type. {# vanilla macro is in miles #} {% set conversion_rate = '' %} {% if unit == 'km' %} -{# we multiply miles result to get it in kms #} - {% set conversion_rate = '* 1.60934' %} -{% endif %} - - 2 * 3961 * asin(sqrt((sin(radians(({{lat2}} - {{lat1}}) / 2))) ^ 2 + - cos(radians({{lat1}})) * cos(radians({{lat2}})) * - (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) {{conversion_rate}} - -{%- endmacro %} - -{% macro snowflake__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} -{# vanilla macro is in miles #} - {% set conversion_rate = '' %} -{% if unit == 'km' %} {# we multiply miles result to get it in kms #} {% set conversion_rate = '* 1.60934' %} {% endif %} From 6ea2ff218c48162f4b369a5c1817817af8f8069f Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 14 Mar 2021 16:56:32 +0100 Subject: [PATCH 08/10] make a big query adapted macro --- macros/geo/haversine_distance.sql | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index ecd6756d..ebd69913 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -6,6 +6,10 @@ http://daynebatten.com/2015/09/latitude-longitude-distance-sql/ The arguments should be float type. #} +{% macro degrees_to_radians(degrees) -%} + acos(-1) * {{degrees}} / 180 +{%- endmacro %} + {% macro haversine_distance(lat1,lon1,lat2,lon2,unit='mi') -%} {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2,unit)) }} {% endmacro %} @@ -23,3 +27,24 @@ The arguments should be float type. pow((sin(radians(({{lon2}} - {{lon1}}) / 2))), 2))) {{conversion_rate}} {%- endmacro %} + + + +{% macro bigquery__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} +{% set r_lat1 = dbt_utils.degrees_to_radians(lat1) %} +{% set r_lat2 = dbt_utils.degrees_to_radians(lat2) %} +{% set r_lon1 = dbt_utils.degrees_to_radians(lon1) %} +{% set r_lon2 = dbt_utils.degrees_to_radians(lon2) %} +{# vanilla macro is in miles #} + {% set conversion_rate = '' %} +{% if unit == 'km' %} +{# we multiply miles result to get it in kms #} + {% set conversion_rate = '* 1.60934' %} +{% endif %} + + 2 * 3961 * asin(sqrt(pow(sin(({{r_lat2}} - {{r_lat1}}) / 2), 2) + + cos({{r_lat1}}) * cos({{r_lat2}}) * + pow(sin(({{r_lon2}} - {{r_lon1}}) / 2), 2))) {{conversion_rate}} + +{%- endmacro %} + From 0152392b078ba9b3dbbb3fb31d42f21faa03f50e Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sun, 14 Mar 2021 21:40:33 +0100 Subject: [PATCH 09/10] rename radians converted vars to be more transparent --- macros/geo/haversine_distance.sql | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index ebd69913..b7d22d2d 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -31,10 +31,10 @@ The arguments should be float type. {% macro bigquery__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} -{% set r_lat1 = dbt_utils.degrees_to_radians(lat1) %} -{% set r_lat2 = dbt_utils.degrees_to_radians(lat2) %} -{% set r_lon1 = dbt_utils.degrees_to_radians(lon1) %} -{% set r_lon2 = dbt_utils.degrees_to_radians(lon2) %} +{% set radians_lat1 = dbt_utils.degrees_to_radians(lat1) %} +{% set radians_lat2 = dbt_utils.degrees_to_radians(lat2) %} +{% set radians_lon1 = dbt_utils.degrees_to_radians(lon1) %} +{% set radians_lon2 = dbt_utils.degrees_to_radians(lon2) %} {# vanilla macro is in miles #} {% set conversion_rate = '' %} {% if unit == 'km' %} @@ -42,9 +42,9 @@ The arguments should be float type. {% set conversion_rate = '* 1.60934' %} {% endif %} - 2 * 3961 * asin(sqrt(pow(sin(({{r_lat2}} - {{r_lat1}}) / 2), 2) + - cos({{r_lat1}}) * cos({{r_lat2}}) * - pow(sin(({{r_lon2}} - {{r_lon1}}) / 2), 2))) {{conversion_rate}} + 2 * 3961 * asin(sqrt(pow(sin(({{radians_lat2}} - {{radians_lat1}}) / 2), 2) + + cos({{radians_lat1}}) * cos({{radians_lat2}}) * + pow(sin(({{radians_lon2}} - {{radians_lon1}}) / 2), 2))) {{conversion_rate}} {%- endmacro %} From 0e151ddba2e6239dc166f7c54ed066aeffd099d3 Mon Sep 17 00:00:00 2001 From: Bastien Boutonnet Date: Sat, 17 Apr 2021 17:44:59 +0200 Subject: [PATCH 10/10] implement review feedback --- macros/geo/haversine_distance.sql | 39 ++++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index b7d22d2d..38fb869e 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -10,41 +10,42 @@ The arguments should be float type. acos(-1) * {{degrees}} / 180 {%- endmacro %} -{% macro haversine_distance(lat1,lon1,lat2,lon2,unit='mi') -%} +{% macro haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2,unit)) }} {% endmacro %} -{% macro default__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} -{# vanilla macro is in miles #} - {% set conversion_rate = '' %} -{% if unit == 'km' %} -{# we multiply miles result to get it in kms #} - {% set conversion_rate = '* 1.60934' %} +{% macro default__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} +{%- if unit == 'mi' %} + {% set conversion_rate = 1 %} +{% elif unit == 'km' %} + {% set conversion_rate = 1.60934 %} +{% else %} + {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} {% endif %} - 2 * 3961 * asin(sqrt(pow((sin(radians(({{lat2}} - {{lat1}}) / 2))), 2) + + 2 * 3961 * asin(sqrt(pow((sin(radians(({{ lat2 }} - {{ lat1 }}) / 2))), 2) + cos(radians({{lat1}})) * cos(radians({{lat2}})) * - pow((sin(radians(({{lon2}} - {{lon1}}) / 2))), 2))) {{conversion_rate}} + pow((sin(radians(({{ lon2 }} - {{ lon1 }}) / 2))), 2))) * {{ conversion_rate }} {%- endmacro %} -{% macro bigquery__haversine_distance(lat1,lon1,lat2,lon2,unit) -%} +{% macro bigquery__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} {% set radians_lat1 = dbt_utils.degrees_to_radians(lat1) %} {% set radians_lat2 = dbt_utils.degrees_to_radians(lat2) %} {% set radians_lon1 = dbt_utils.degrees_to_radians(lon1) %} {% set radians_lon2 = dbt_utils.degrees_to_radians(lon2) %} -{# vanilla macro is in miles #} - {% set conversion_rate = '' %} -{% if unit == 'km' %} -{# we multiply miles result to get it in kms #} - {% set conversion_rate = '* 1.60934' %} +{%- if unit == 'mi' %} + {% set conversion_rate = 1 %} +{% elif unit == 'km' %} + {% set conversion_rate = 1.60934 %} +{% else %} + {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} {% endif %} - - 2 * 3961 * asin(sqrt(pow(sin(({{radians_lat2}} - {{radians_lat1}}) / 2), 2) + - cos({{radians_lat1}}) * cos({{radians_lat2}}) * - pow(sin(({{radians_lon2}} - {{radians_lon1}}) / 2), 2))) {{conversion_rate}} + 2 * 3961 * asin(sqrt(pow(sin(({{ radians_lat2 }} - {{ radians_lat1 }}) / 2), 2) + + cos({{ radians_lat1 }}) * cos({{ radians_lat2 }}) * + pow(sin(({{ radians_lon2 }} - {{ radians_lon1 }}) / 2), 2))) * {{ conversion_rate }} {%- endmacro %}