diff --git a/README.md b/README.md index fd665086..6c90fe5c 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ Notes: ### Date/Time #### date_spine ([source](macros/datetime/date_spine.sql)) -This macro returns the sql required to build a date spine. The spine will include the `start_date` (if it is aligned to the `datepart`), but it will not include the `end_date`. +This macro returns the sql required to build a date spine. The spine will include the `start_date` (if it is aligned to the `datepart`), but it will not include the `end_date`. **Usage:** @@ -114,9 +114,12 @@ This macro returns the sql required to build a date spine. The spine will includ #### haversine_distance ([source](macros/geo/haversine_distance.sql)) This macro calculates the [haversine distance](http://daynebatten.com/2015/09/latitude-longitude-distance-sql/) between a pair of x/y coordinates. +Optionally takes a `unit` string parameter ('km' or 'mi') which defaults to miles (imperial system). + **Usage:** + ``` -{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=) }} +{{ dbt_utils.haversine_distance(lat1=,lon1=,lat2=,lon2=, unit='mi') }} ``` --- ### Schema Tests diff --git a/integration_tests/data/geo/data_haversine_km.csv b/integration_tests/data/geo/data_haversine_km.csv new file mode 100644 index 00000000..636442f2 --- /dev/null +++ b/integration_tests/data/geo/data_haversine_km.csv @@ -0,0 +1,2 @@ +lat_1,lon_1,lat_2,lon_2,output +48.864716,2.349014,52.379189,4.899431,430 diff --git a/integration_tests/data/geo/data_haversine_mi.csv b/integration_tests/data/geo/data_haversine_mi.csv new file mode 100644 index 00000000..cc4757f7 --- /dev/null +++ b/integration_tests/data/geo/data_haversine_mi.csv @@ -0,0 +1,2 @@ +lat_1,lon_1,lat_2,lon_2,output +48.864716,2.349014,52.379189,4.899431,267 diff --git a/integration_tests/dbt_project.yml b/integration_tests/dbt_project.yml index 3a8c4feb..39493443 100644 --- a/integration_tests/dbt_project.yml +++ b/integration_tests/dbt_project.yml @@ -54,8 +54,8 @@ seeds: sql: data_events_20180103: +schema: events - + schema_tests: data_test_sequential_timestamps: +column_types: - my_timestamp: timestamp \ No newline at end of file + my_timestamp: timestamp diff --git a/integration_tests/macros/tests.sql b/integration_tests/macros/tests.sql index 9ca536e4..caeb5d6e 100644 --- a/integration_tests/macros/tests.sql +++ b/integration_tests/macros/tests.sql @@ -1,6 +1,5 @@ {% macro test_assert_equal(model, actual, expected) %} - select count(*) from {{ model }} where {{ actual }} != {{ expected }} {% endmacro %} diff --git a/integration_tests/models/geo/schema.yml b/integration_tests/models/geo/schema.yml new file mode 100644 index 00000000..50dd0241 --- /dev/null +++ b/integration_tests/models/geo/schema.yml @@ -0,0 +1,13 @@ +version: 2 + +models: + - name: test_haversine_distance_km + tests: + - assert_equal: + actual: actual + expected: expected + - name: test_haversine_distance_mi + tests: + - assert_equal: + actual: actual + expected: expected diff --git a/integration_tests/models/geo/test_haversine_distance_km.sql b/integration_tests/models/geo/test_haversine_distance_km.sql new file mode 100644 index 00000000..a00993f0 --- /dev/null +++ b/integration_tests/models/geo/test_haversine_distance_km.sql @@ -0,0 +1,23 @@ +with data as ( + select * from {{ ref('data_haversine_km') }} +), +final as ( + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + unit='km' + ) + }} as numeric + ) as actual + from data +) +select + expected, + round(actual,0) as actual +from final diff --git a/integration_tests/models/geo/test_haversine_distance_mi.sql b/integration_tests/models/geo/test_haversine_distance_mi.sql new file mode 100644 index 00000000..a3c1c469 --- /dev/null +++ b/integration_tests/models/geo/test_haversine_distance_mi.sql @@ -0,0 +1,39 @@ +with data as ( + select * from {{ ref('data_haversine_mi') }} +), +final as ( + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + unit='mi' + ) + }} as numeric + ) as actual + from data + + union all + + select + output as expected, + cast( + {{ + dbt_utils.haversine_distance( + lat1='lat_1', + lon1='lon_1', + lat2='lat_2', + lon2='lon_2', + ) + }} as numeric + ) as actual + from data +) +select + expected, + round(actual,0) as actual +from final diff --git a/macros/geo/haversine_distance.sql b/macros/geo/haversine_distance.sql index ed6c8281..38fb869e 100644 --- a/macros/geo/haversine_distance.sql +++ b/macros/geo/haversine_distance.sql @@ -3,17 +3,49 @@ This calculates the distance between two sets of latitude and longitude. The formula is from the following blog post: http://daynebatten.com/2015/09/latitude-longitude-distance-sql/ -The arguments should be float type. +The arguments should be float type. #} -{% macro haversine_distance(lat1,lon1,lat2,lon2) -%} - {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2)) }} +{% macro degrees_to_radians(degrees) -%} + acos(-1) * {{degrees}} / 180 +{%- endmacro %} + +{% macro haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} + {{ return(adapter.dispatch('haversine_distance', packages = dbt_utils._get_utils_namespaces())(lat1,lon1,lat2,lon2,unit)) }} {% endmacro %} -{% macro default__haversine_distance(lat1,lon1,lat2,lon2) -%} +{% macro default__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} +{%- if unit == 'mi' %} + {% set conversion_rate = 1 %} +{% elif unit == 'km' %} + {% set conversion_rate = 1.60934 %} +{% else %} + {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} +{% endif %} - 2 * 3961 * asin(sqrt((sin(radians(({{lat2}} - {{lat1}}) / 2))) ^ 2 + + 2 * 3961 * asin(sqrt(pow((sin(radians(({{ lat2 }} - {{ lat1 }}) / 2))), 2) + cos(radians({{lat1}})) * cos(radians({{lat2}})) * - (sin(radians(({{lon2}} - {{lon1}}) / 2))) ^ 2)) + pow((sin(radians(({{ lon2 }} - {{ lon1 }}) / 2))), 2))) * {{ conversion_rate }} + +{%- endmacro %} + + + +{% macro bigquery__haversine_distance(lat1, lon1, lat2, lon2, unit='mi') -%} +{% set radians_lat1 = dbt_utils.degrees_to_radians(lat1) %} +{% set radians_lat2 = dbt_utils.degrees_to_radians(lat2) %} +{% set radians_lon1 = dbt_utils.degrees_to_radians(lon1) %} +{% set radians_lon2 = dbt_utils.degrees_to_radians(lon2) %} +{%- if unit == 'mi' %} + {% set conversion_rate = 1 %} +{% elif unit == 'km' %} + {% set conversion_rate = 1.60934 %} +{% else %} + {{ exceptions.raise_compiler_error("unit input must be one of 'mi' or 'km'. Got " ~ unit) }} +{% endif %} + 2 * 3961 * asin(sqrt(pow(sin(({{ radians_lat2 }} - {{ radians_lat1 }}) / 2), 2) + + cos({{ radians_lat1 }}) * cos({{ radians_lat2 }}) * + pow(sin(({{ radians_lon2 }} - {{ radians_lon1 }}) / 2), 2))) * {{ conversion_rate }} {%- endmacro %} +