Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dkw 718 revert entity id to be unique string and not UUID #77

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 6 additions & 11 deletions db/dot/1-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ CREATE TABLE IF NOT EXISTS dot.entity_categories (

CREATE TABLE IF NOT EXISTS dot.configured_entities (
project_id VARCHAR(300) NOT NULL,
entity_id UUID,
entity_name VARCHAR(300),
entity_id VARCHAR(300),
entity_category VARCHAR(300),
entity_definition VARCHAR(4096),
date_added TIMESTAMP WITH TIME ZONE NOT NULL,
Expand All @@ -104,7 +103,7 @@ CREATE TABLE IF NOT EXISTS dot.configured_tests(
description VARCHAR(1000) NOT NULL,
impact VARCHAR(1000) NULL,
proposed_remediation VARCHAR(1000) NULL,
entity_id UUID NOT NULL references dot.configured_entities on update cascade,
entity_id VARCHAR(300) NOT NULL references dot.configured_entities on update cascade,
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
column_name VARCHAR(300) NULL,
column_description VARCHAR(1000) NULL,
Expand Down Expand Up @@ -140,7 +139,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
test_result_id VARCHAR(300) NOT NULL,
run_id UUID,
test_id UUID references dot.configured_tests on update cascade,
entity_id UUID,
entity_id VARCHAR(300),
status TEXT,
view_name VARCHAR(300) NULL,
id_column_name TEXT,
Expand All @@ -157,7 +156,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
CREATE TABLE IF NOT EXISTS dot.test_results_summary (
run_id UUID,
test_id UUID references dot.configured_tests on update cascade,
entity_id UUID references dot.configured_entities on update cascade,
entity_id VARCHAR(300) references dot.configured_entities on update cascade,
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
column_name VARCHAR(300) NULL,
test_parameters VARCHAR(1000) NULL,
Expand Down Expand Up @@ -253,8 +252,6 @@ AS $$
declare
KEY_STRING text;
BEGIN
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
new.date_added := NOW();
new.date_modified := NOW();
RETURN NEW;
Expand All @@ -267,11 +264,9 @@ LANGUAGE plpgsql
AS $$
declare
KEY_STRING text;
OLD_ENTITY_ID uuid;
NEW_ENTITY_ID uuid;
OLD_ENTITY_ID VARCHAR(300);
NEW_ENTITY_ID VARCHAR(300);
BEGIN
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
new.date_modified := NOW();
RETURN NEW;
END;
Expand Down
26 changes: 13 additions & 13 deletions db/dot/4-upload_sample_dot_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,66 @@ INSERT INTO dot.entity_categories VALUES('ZAG', 'Zagreb airport flights');
INSERT INTO dot.entity_categories VALUES('ETH', 'Ethiopian Airlines');

-- configured entities - db views of the data we want to scan
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data WHERE origin_airport=''Zagreb airport'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data WHERE airline=''Ethiopian Airlines'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.airport_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select DISTINCT airline
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');


-- Note these UUIDs get reset by the trigger
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '549c0575-e64c-3605-85a9-70356a23c4d2', 'MISSING-1', 3,
'Origin airport is not null', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_null', 'origin_airport', '',
'Origin airport is not null', '', '', 'all_flight_data', 'not_null', 'origin_airport', '',
NULL, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '8aca2bee-9e95-3f8a-90e9-153714e05367', 'INCONSISTENT-1',
5, 'Price is not negative', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_negative_string_column', 'price', '',
5, 'Price is not negative', '', '', 'all_flight_data', 'not_negative_string_column', 'price', '',
'{"name": "price"}', '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '52d7352e-56ee-3084-9c67-e5ab24afc3a3', 'DUPLICATE-1',
3, 'Airport not unique', '', '', '7b689796-afde-3930-87be-ed8b7c7a0474', 'unique', 'airport', '', NULL,
3, 'Airport not unique', '', '', 'all_airports_data', 'unique', 'airport', '', NULL,
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '935e6b61-b664-3eab-9d67-97c2c9c2bec0', 'INCONSISTENT-1',
3, 'Disallowed FP methods entered in form', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'accepted_values', 'stops',
3, 'Disallowed FP methods entered in form', '', '', 'all_flight_data', 'accepted_values', 'stops',
'', $${"values": [ "1", "2", "3", "Non-stop"]}$$, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f08', 'ASSESS-1', 3,
'Flight with no airport record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'relationships', 'origin_airport',
'Flight with no airport record', '', '', 'all_flight_data', 'relationships', 'origin_airport',
'', $${"name": "flight_with_no_airport", "to": "ref('dot_model__all_airports_data')", "field": "airport"}$$,
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f18', 'BIAS-1', 6,
'Price outlier airlines', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'expect_similar_means_across_reporters',
'Price outlier airlines', '', '', 'all_flight_data', 'expect_similar_means_across_reporters',
'price', '', $${"key": "airline","quantity": "price","data_table": "dot_model__all_flight_data","id_column": "airline",
"target_table":"dot_model__airlines_data"}$$, '2022-01-19 20:00:00.000 -0500', '2022-01-19 20:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '36d33837-bd92-370a-963a-264a4d5b2bac', 'DUPLICATE-1',
6, 'Duplicate flight record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'possible_duplicate_forms', '', '',
6, 'Duplicate flight record', '', '', 'all_flight_data', 'possible_duplicate_forms', '', '',
$${"table_specific_reported_date": "departure_time", "table_specific_patient_uuid": "airline", "table_specific_uuid":
"uuid", "table_specific_period": "day"}$$, '2021-12-23 19:00:00.000 -0500', '2022-03-21 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'c4a3da8f-32f4-4e9b-b135-354de203ca90', 'TREAT-1',
5, 'Number of stops has a reasonible value', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'custom_sql', '', '',
5, 'Number of stops has a reasonible value', '', '', 'all_flight_data', 'custom_sql', '', '',
format('{%s: %s}',
to_json('query'::text),
to_json($query$
Expand Down
2 changes: 1 addition & 1 deletion docker/appsmith/DOT App V2.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,run_id,test_id,entity_id,test_type,column_name,id_column_name,test_parameters,test_status,test_status_message,failed_tests_view,failed_tests_view_sql
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,dac4c545-f610-3dae-ad82-1ddf27dae144,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,cad13f73-27b5-3427-be8f-4d213bba3b19,all_flight_data,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
SELECT dot_model__all_flight_data.stops AS value_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_flight_data
Expand All @@ -9,11 +9,12 @@ test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_
all_values.n_records
FROM all_values
WHERE all_values.value_field::text <> ALL (ARRAY['1'::character varying::text, '2'::character varying::text, '3'::character varying::text, 'Non-stop'::character varying::text]);"
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,49aa2fd3-511c-3d84-a782-a5daf57f98da,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,ed27037a-4054-3070-9d88-fdf9cd0231c8,all_flight_data,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.price::character varying::text ~~ '-%'::text
HAVING count(*) > 0;"
test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,983a5746-bea7-3072-9a80-2c1c6706ceed,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,

test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,df44c2f4-65f8-3170-a03f-6035aaa45660,all_flight_data,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
dot_model__all_flight_data.departure_time,
dot_model__all_flight_data.airline,
dot_model__all_flight_data.origin_airport,
Expand All @@ -24,15 +25,17 @@ test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4
dot_model__all_flight_data.price
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.origin_airport IS NULL;"
test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,7aa26bda-57e1-39b8-a3e9-979a3d882577,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list

test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,2ba7f3e8-cd62-37ac-854f-01f704489130,all_flight_data,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
FROM ( SELECT dot_model__all_flight_data.uuid AS from_uuid,
dot_model__all_flight_data.origin_airport AS from_column_id
FROM self_tests_public_tests.dot_model__all_flight_data) from_model
LEFT JOIN ( SELECT dot_model__all_airports_data.airport AS to_id
FROM self_tests_public_tests.dot_model__all_airports_data) to_model ON to_model.to_id::text = from_model.from_column_id::text
WHERE from_model.from_column_id IS NOT NULL AND to_model.to_id IS NULL
HAVING count(*) > 0;"
test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,aa1c361c-a9ba-350e-9959-e92a5654f7dc,7b689796-afde-3930-87be-ed8b7c7a0474,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,

test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,942f4496-1202-3768-9cfe-96128bcd754c,all_airports_data,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_airports_data
WHERE dot_model__all_airports_data.airport IS NOT NULL
Expand Down
Loading