From dd3c94f5a7377c41e16ae5788391b488d56a9f16 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 9 Jan 2025 11:40:23 -0800 Subject: [PATCH 1/8] add not_empty test --- transform/models/_sources.yml | 12 ++++++++++++ transform/tests/generic/not_empty.sql | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 transform/tests/generic/not_empty.sql diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index 0c34f480..43006d1a 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -289,6 +289,8 @@ sources: controller might have a station for the mainline as well as a station for an onramp. freshness: null + tests: + - not_empty columns: - name: meta description: Metadata from the data relay scripts. @@ -310,6 +312,8 @@ sources: description: | A log table showing updates to the controller config. This can be joined with the `controller_config` table to get a full history of controller metadata. + tests: + - not_empty columns: - name: meta description: Metadata from the data relay scripts. @@ -342,6 +346,8 @@ sources: Metadata for a single VDS station. Multiple stations may be connected to a single controller, and multiple detectors may be connected to a single station. freshness: null + tests: + - not_empty columns: - name: meta description: Metadata from the data relay scripts. @@ -363,6 +369,8 @@ sources: description: | A log table showing updates to the station config. This can be joined with the `station_config` table to get a full history of station metadata. + tests: + - not_empty columns: - name: meta description: Metadata from the data relay scripts. @@ -400,6 +408,8 @@ sources: Metadata for a single loop detector. This is the device that actually records flow, occupancy, and speed, and is typically installed in a single lane. Multiple detectors across a set of lanes constitute a station. + tests: + - not_empty freshness: null columns: - name: meta @@ -412,6 +422,8 @@ sources: description: | A log table showing updates to the detector config. This can be joined with the `detector_config` table to get a full history of detector metadata. + tests: + - not_empty columns: - name: meta description: Metadata from the data relay scripts. diff --git a/transform/tests/generic/not_empty.sql b/transform/tests/generic/not_empty.sql new file mode 100644 index 00000000..9f2030c3 --- /dev/null +++ b/transform/tests/generic/not_empty.sql @@ -0,0 +1,16 @@ +{% test not_empty(model) %} + +with validation as ( + select count(0) as num_rows + from {{ model }} +), + +validation_errors as ( + select num_rows + from validation + where num_rows = 0 +) + +select * from validation_errors + +{% endtest %} From bdb69e36b91ce5c1434fd83ebbe67b52bafb5f71 Mon Sep 17 00:00:00 2001 From: James Logan Date: Thu, 9 Jan 2025 12:34:06 -0800 Subject: [PATCH 2/8] add uniqueness tests to sources --- transform/models/_sources.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index 43006d1a..fd0693bb 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -291,6 +291,8 @@ sources: freshness: null tests: - not_empty + - unique: + column_name: controller_id columns: - name: meta description: Metadata from the data relay scripts. @@ -314,6 +316,10 @@ sources: with the `controller_config` table to get a full history of controller metadata. tests: - not_empty + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - controller_id + - time_id columns: - name: meta description: Metadata from the data relay scripts. @@ -348,6 +354,8 @@ sources: freshness: null tests: - not_empty + - unique: + column_name: station_id columns: - name: meta description: Metadata from the data relay scripts. @@ -371,6 +379,10 @@ sources: with the `station_config` table to get a full history of station metadata. tests: - not_empty + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - station_id + - time_id columns: - name: meta description: Metadata from the data relay scripts. @@ -410,6 +422,8 @@ sources: Multiple detectors across a set of lanes constitute a station. tests: - not_empty + - unique: + column_name: detector_id freshness: null columns: - name: meta @@ -424,6 +438,10 @@ sources: with the `detector_config` table to get a full history of detector metadata. tests: - not_empty + - dbt_utils.unique_combination_of_columns: + combination_of_columns: + - detector_id + - time_id columns: - name: meta description: Metadata from the data relay scripts. From c7ea303c162670d504b7a4f42375f8c3de41370f Mon Sep 17 00:00:00 2001 From: pingpingxiu-DOT-ca-gov <149613649+pingpingxiu-DOT-ca-gov@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:46:43 -0700 Subject: [PATCH 3/8] Update _sources.yml Per discussion with @ian-r-rose , loosen the monitor threshold on the Station config's freshness since it may occasionally lag behind more than 3 days. --- transform/models/_sources.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index 0c34f480..c736845f 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -59,10 +59,10 @@ sources: depending on the measurement capabilities of the detectors. freshness: warn_after: - count: 2 + count: 14 period: day error_after: - count: 3 + count: 28 period: day loaded_at_field: SAMPLE_TIMESTAMP columns: From 103f24e44c64b9890995834aa33bb8bc5ae6916d Mon Sep 17 00:00:00 2001 From: pingpingxiu-DOT-ca-gov <149613649+pingpingxiu-DOT-ca-gov@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:55:46 -0700 Subject: [PATCH 4/8] Update _sources.yml fix bug --- transform/models/_sources.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index c736845f..0c34f480 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -59,10 +59,10 @@ sources: depending on the measurement capabilities of the detectors. freshness: warn_after: - count: 14 + count: 2 period: day error_after: - count: 28 + count: 3 period: day loaded_at_field: SAMPLE_TIMESTAMP columns: From 1d5553efdb05e7961368bdae55eb973ca8206efa Mon Sep 17 00:00:00 2001 From: pingpingxiu-DOT-ca-gov <149613649+pingpingxiu-DOT-ca-gov@users.noreply.github.com> Date: Mon, 21 Oct 2024 11:57:57 -0700 Subject: [PATCH 5/8] Update _sources.yml fix bug --- transform/models/_sources.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index 0c34f480..f7535789 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -412,6 +412,13 @@ sources: description: | A log table showing updates to the detector config. This can be joined with the `detector_config` table to get a full history of detector metadata. + freshness: + warn_after: + count: 14 + period: day + error_after: + count: 28 + period: day columns: - name: meta description: Metadata from the data relay scripts. From 2ef2cca3bebc32aa2bbb4a894104be3b9c04e3a2 Mon Sep 17 00:00:00 2001 From: "pingping.xiu@dot.ca.gov" Date: Mon, 21 Oct 2024 19:07:36 +0000 Subject: [PATCH 6/8] Fix trailing --- transform/models/_sources.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index f7535789..c61f3cd0 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -412,7 +412,7 @@ sources: description: | A log table showing updates to the detector config. This can be joined with the `detector_config` table to get a full history of detector metadata. - freshness: + freshness: warn_after: count: 14 period: day From 06f09202a8d8e15f53111bacac89598bbab9c225 Mon Sep 17 00:00:00 2001 From: Ian Rose Date: Fri, 10 Jan 2025 09:05:18 -0800 Subject: [PATCH 7/8] Adjust freshness check values based on rough update frequencies of these tables. These checks are not 100% reliable, as it is possible that no stations have updates in, e.g., a month, but if one of them trips it is suggestive that something is amiss. --- transform/models/_sources.yml | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index c61f3cd0..8bec0724 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -169,14 +169,6 @@ sources: description: | Vehicle Detector Station (VDS) (aka Station) related data obtained via data relay from db96 within the Caltrans network. - freshness: - warn_after: - count: 2 - period: day - error_after: - count: 3 - period: day - loaded_at_field: time_id tables: - name: vds30sec description: | @@ -288,7 +280,6 @@ sources: stations hooked up to a single controller. For example, the same controller might have a station for the mainline as well as a station for an onramp. - freshness: null columns: - name: meta description: Metadata from the data relay scripts. @@ -310,6 +301,14 @@ sources: description: | A log table showing updates to the controller config. This can be joined with the `controller_config` table to get a full history of controller metadata. + freshness: + warn_after: + count: 30 + period: day + error_after: + count: 60 + period: day + loaded_at_field: time_id columns: - name: meta description: Metadata from the data relay scripts. @@ -341,7 +340,6 @@ sources: description: | Metadata for a single VDS station. Multiple stations may be connected to a single controller, and multiple detectors may be connected to a single station. - freshness: null columns: - name: meta description: Metadata from the data relay scripts. @@ -363,6 +361,14 @@ sources: description: | A log table showing updates to the station config. This can be joined with the `station_config` table to get a full history of station metadata. + freshness: + warn_after: + count: 15 + period: day + error_after: + count: 30 + period: day + loaded_at_field: time_id columns: - name: meta description: Metadata from the data relay scripts. @@ -400,7 +406,6 @@ sources: Metadata for a single loop detector. This is the device that actually records flow, occupancy, and speed, and is typically installed in a single lane. Multiple detectors across a set of lanes constitute a station. - freshness: null columns: - name: meta description: Metadata from the data relay scripts. @@ -414,11 +419,12 @@ sources: with the `detector_config` table to get a full history of detector metadata. freshness: warn_after: - count: 14 + count: 15 period: day error_after: - count: 28 + count: 30 period: day + loaded_at_field: time_id columns: - name: meta description: Metadata from the data relay scripts. From e87355f55daa601ef2f2f841207aa073f672a777 Mon Sep 17 00:00:00 2001 From: James Logan Date: Fri, 10 Jan 2025 09:35:11 -0800 Subject: [PATCH 8/8] update sources yml test syntax --- transform/models/_sources.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/transform/models/_sources.yml b/transform/models/_sources.yml index fd0693bb..2bd4b53e 100644 --- a/transform/models/_sources.yml +++ b/transform/models/_sources.yml @@ -289,7 +289,7 @@ sources: controller might have a station for the mainline as well as a station for an onramp. freshness: null - tests: + data_tests: - not_empty - unique: column_name: controller_id @@ -314,7 +314,7 @@ sources: description: | A log table showing updates to the controller config. This can be joined with the `controller_config` table to get a full history of controller metadata. - tests: + data_tests: - not_empty - dbt_utils.unique_combination_of_columns: combination_of_columns: @@ -352,7 +352,7 @@ sources: Metadata for a single VDS station. Multiple stations may be connected to a single controller, and multiple detectors may be connected to a single station. freshness: null - tests: + data_tests: - not_empty - unique: column_name: station_id @@ -377,7 +377,7 @@ sources: description: | A log table showing updates to the station config. This can be joined with the `station_config` table to get a full history of station metadata. - tests: + data_tests: - not_empty - dbt_utils.unique_combination_of_columns: combination_of_columns: @@ -420,7 +420,7 @@ sources: Metadata for a single loop detector. This is the device that actually records flow, occupancy, and speed, and is typically installed in a single lane. Multiple detectors across a set of lanes constitute a station. - tests: + data_tests: - not_empty - unique: column_name: detector_id @@ -436,7 +436,7 @@ sources: description: | A log table showing updates to the detector config. This can be joined with the `detector_config` table to get a full history of detector metadata. - tests: + data_tests: - not_empty - dbt_utils.unique_combination_of_columns: combination_of_columns: