Skip to content

Commit

Permalink
Merge pull request #77 from datakind/DKW-718-revert-entity-id-to-be-u…
Browse files Browse the repository at this point in the history
…nique-string-and-not-uuid

Dkw 718 revert entity id to be unique string and not UUID
  • Loading branch information
dividor authored Feb 7, 2023
2 parents c955ebb + 5f2dd83 commit 566d3b2
Show file tree
Hide file tree
Showing 17 changed files with 661 additions and 769 deletions.
17 changes: 6 additions & 11 deletions db/dot/1-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@ CREATE TABLE IF NOT EXISTS dot.entity_categories (

CREATE TABLE IF NOT EXISTS dot.configured_entities (
project_id VARCHAR(300) NOT NULL,
entity_id UUID,
entity_name VARCHAR(300),
entity_id VARCHAR(300),
entity_category VARCHAR(300),
entity_definition VARCHAR(4096),
date_added TIMESTAMP WITH TIME ZONE NOT NULL,
Expand All @@ -104,7 +103,7 @@ CREATE TABLE IF NOT EXISTS dot.configured_tests(
description VARCHAR(1000) NOT NULL,
impact VARCHAR(1000) NULL,
proposed_remediation VARCHAR(1000) NULL,
entity_id UUID NOT NULL references dot.configured_entities on update cascade,
entity_id VARCHAR(300) NOT NULL references dot.configured_entities on update cascade,
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
column_name VARCHAR(300) NULL,
column_description VARCHAR(1000) NULL,
Expand Down Expand Up @@ -140,7 +139,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
test_result_id VARCHAR(300) NOT NULL,
run_id UUID,
test_id UUID references dot.configured_tests on update cascade,
entity_id UUID,
entity_id VARCHAR(300),
status TEXT,
view_name VARCHAR(300) NULL,
id_column_name TEXT,
Expand All @@ -157,7 +156,7 @@ CREATE TABLE IF NOT EXISTS dot.test_results(
CREATE TABLE IF NOT EXISTS dot.test_results_summary (
run_id UUID,
test_id UUID references dot.configured_tests on update cascade,
entity_id UUID references dot.configured_entities on update cascade,
entity_id VARCHAR(300) references dot.configured_entities on update cascade,
test_type VARCHAR(300) NOT NULL references dot.test_types on update cascade,
column_name VARCHAR(300) NULL,
test_parameters VARCHAR(1000) NULL,
Expand Down Expand Up @@ -253,8 +252,6 @@ AS $$
declare
KEY_STRING text;
BEGIN
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
new.date_added := NOW();
new.date_modified := NOW();
RETURN NEW;
Expand All @@ -267,11 +264,9 @@ LANGUAGE plpgsql
AS $$
declare
KEY_STRING text;
OLD_ENTITY_ID uuid;
NEW_ENTITY_ID uuid;
OLD_ENTITY_ID VARCHAR(300);
NEW_ENTITY_ID VARCHAR(300);
BEGIN
KEY_STRING := new.entity_name || new.entity_category || new.entity_definition;
NEW.entity_id := uuid_generate_v3(uuid_ns_oid(), KEY_STRING);
new.date_modified := NOW();
RETURN NEW;
END;
Expand Down
26 changes: 13 additions & 13 deletions db/dot/4-upload_sample_dot_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,66 @@ INSERT INTO dot.entity_categories VALUES('ZAG', 'Zagreb airport flights');
INSERT INTO dot.entity_categories VALUES('ETH', 'Ethiopian Airlines');

-- configured entities - db views of the data we want to scan
INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_flight_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'zagreb_flight_data', 'ZAG', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data WHERE origin_airport=''Zagreb airport'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'ethiopia_airlines_data', 'ETH', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.flight_data WHERE airline=''Ethiopian Airlines'' ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9b', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'all_airports_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select *
from {{ schema }}.airport_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');

INSERT INTO dot.configured_entities VALUES('ScanProject1','b05f1f9c-2176-46b0-8e8f-d6690f696b9c', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
INSERT INTO dot.configured_entities VALUES('ScanProject1', 'airlines_data', 'ALL', '{{ config(materialized=''view'') }}
{% set schema = <schema> %}
select DISTINCT airline
from {{ schema }}.flight_data ','2021-12-07 00:00:00+00','2021-12-07 00:00:00+00','Matt');


-- Note these UUIDs get reset by the trigger
INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '549c0575-e64c-3605-85a9-70356a23c4d2', 'MISSING-1', 3,
'Origin airport is not null', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_null', 'origin_airport', '',
'Origin airport is not null', '', '', 'all_flight_data', 'not_null', 'origin_airport', '',
NULL, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '8aca2bee-9e95-3f8a-90e9-153714e05367', 'INCONSISTENT-1',
5, 'Price is not negative', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'not_negative_string_column', 'price', '',
5, 'Price is not negative', '', '', 'all_flight_data', 'not_negative_string_column', 'price', '',
'{"name": "price"}', '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '52d7352e-56ee-3084-9c67-e5ab24afc3a3', 'DUPLICATE-1',
3, 'Airport not unique', '', '', '7b689796-afde-3930-87be-ed8b7c7a0474', 'unique', 'airport', '', NULL,
3, 'Airport not unique', '', '', 'all_airports_data', 'unique', 'airport', '', NULL,
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '935e6b61-b664-3eab-9d67-97c2c9c2bec0', 'INCONSISTENT-1',
3, 'Disallowed FP methods entered in form', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'accepted_values', 'stops',
3, 'Disallowed FP methods entered in form', '', '', 'all_flight_data', 'accepted_values', 'stops',
'', $${"values": [ "1", "2", "3", "Non-stop"]}$$, '2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f08', 'ASSESS-1', 3,
'Flight with no airport record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'relationships', 'origin_airport',
'Flight with no airport record', '', '', 'all_flight_data', 'relationships', 'origin_airport',
'', $${"name": "flight_with_no_airport", "to": "ref('dot_model__all_airports_data')", "field": "airport"}$$,
'2021-12-23 19:00:00.000 -0500', '2021-12-23 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '0cdc9702-91e0-3499-b6f0-4dec12ad0f18', 'BIAS-1', 6,
'Price outlier airlines', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'expect_similar_means_across_reporters',
'Price outlier airlines', '', '', 'all_flight_data', 'expect_similar_means_across_reporters',
'price', '', $${"key": "airline","quantity": "price","data_table": "dot_model__all_flight_data","id_column": "airline",
"target_table":"dot_model__airlines_data"}$$, '2022-01-19 20:00:00.000 -0500', '2022-01-19 20:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', '36d33837-bd92-370a-963a-264a4d5b2bac', 'DUPLICATE-1',
6, 'Duplicate flight record', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'possible_duplicate_forms', '', '',
6, 'Duplicate flight record', '', '', 'all_flight_data', 'possible_duplicate_forms', '', '',
$${"table_specific_reported_date": "departure_time", "table_specific_patient_uuid": "airline", "table_specific_uuid":
"uuid", "table_specific_period": "day"}$$, '2021-12-23 19:00:00.000 -0500', '2022-03-21 19:00:00.000 -0500', 'Matt');

INSERT INTO dot.configured_tests VALUES(TRUE, 'ScanProject1', 'c4a3da8f-32f4-4e9b-b135-354de203ca90', 'TREAT-1',
5, 'Number of stops has a reasonible value', '', '', 'ca4513fa-96e0-3a95-a1a8-7f0c127ea82a', 'custom_sql', '', '',
5, 'Number of stops has a reasonible value', '', '', 'all_flight_data', 'custom_sql', '', '',
format('{%s: %s}',
to_json('query'::text),
to_json($query$
Expand Down
2 changes: 1 addition & 1 deletion docker/appsmith/DOT App V2.json

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
,run_id,test_id,entity_id,test_type,column_name,id_column_name,test_parameters,test_status,test_status_message,failed_tests_view,failed_tests_view_sql
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,dac4c545-f610-3dae-ad82-1ddf27dae144,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_stop.b734743116,4541476c-814e-43fe-ab38-786f36beecbc,cad13f73-27b5-3427-be8f-4d213bba3b19,all_flight_data,accepted_values,stops,,"{'values': ['1', '2', '3', 'Non-stop']}",fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_flight_data_accepted_values_stops," WITH all_values AS (
SELECT dot_model__all_flight_data.stops AS value_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_flight_data
Expand All @@ -9,11 +9,12 @@ test.dbt_model_1.accepted_values_dot_model__all_flight_data_stops__1__2__3__Non_
all_values.n_records
FROM all_values
WHERE all_values.value_field::text <> ALL (ARRAY['1'::character varying::text, '2'::character varying::text, '3'::character varying::text, 'Non-stop'::character varying::text]);"
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,49aa2fd3-511c-3d84-a782-a5daf57f98da,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
test.dbt_model_1.not_negative_string_column_dot_model__all_flight_data_price__price.322389c2ba,4541476c-814e-43fe-ab38-786f36beecbc,ed27037a-4054-3070-9d88-fdf9cd0231c8,all_flight_data,not_negative_string_column,price,,{'name': 'price'},fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_price," SELECT array_agg(dot_model__all_flight_data.uuid) AS uuid_list
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.price::character varying::text ~~ '-%'::text
HAVING count(*) > 0;"
test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,983a5746-bea7-3072-9a80-2c1c6706ceed,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,

test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4541476c-814e-43fe-ab38-786f36beecbc,df44c2f4-65f8-3170-a03f-6035aaa45660,all_flight_data,not_null,origin_airport,,{},fail,"got 53 results, configured to fail if != 0",tr_dot_model__all_flight_data_not_null_origin_a," SELECT dot_model__all_flight_data.uuid,
dot_model__all_flight_data.departure_time,
dot_model__all_flight_data.airline,
dot_model__all_flight_data.origin_airport,
Expand All @@ -24,15 +25,17 @@ test.dbt_model_1.not_null_dot_model__all_flight_data_origin_airport.2196b664b6,4
dot_model__all_flight_data.price
FROM self_tests_public_tests.dot_model__all_flight_data
WHERE dot_model__all_flight_data.origin_airport IS NULL;"
test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,7aa26bda-57e1-39b8-a3e9-979a3d882577,ca4513fa-96e0-3a95-a1a8-7f0c127ea82a,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list

test.dbt_model_1.relationships_dot_model__all_flight_data_origin_airport__airport__flight_with_no_airport__ref_dot_model__all_airports_data_.3a9f7e32d9,4541476c-814e-43fe-ab38-786f36beecbc,2ba7f3e8-cd62-37ac-854f-01f704489130,all_flight_data,relationships,origin_airport,,"{'to': ""ref('dot_model__all_airports_data')"", 'name': 'flight_with_no_airport', 'field': 'airport'}",fail,"got 1 result, configured to fail if != 0",tr_dot_model__all_flight_data_flight_with_no_a," SELECT array_agg(from_model.from_uuid) AS uuid_list
FROM ( SELECT dot_model__all_flight_data.uuid AS from_uuid,
dot_model__all_flight_data.origin_airport AS from_column_id
FROM self_tests_public_tests.dot_model__all_flight_data) from_model
LEFT JOIN ( SELECT dot_model__all_airports_data.airport AS to_id
FROM self_tests_public_tests.dot_model__all_airports_data) to_model ON to_model.to_id::text = from_model.from_column_id::text
WHERE from_model.from_column_id IS NOT NULL AND to_model.to_id IS NULL
HAVING count(*) > 0;"
test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,aa1c361c-a9ba-350e-9959-e92a5654f7dc,7b689796-afde-3930-87be-ed8b7c7a0474,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,

test.dbt_model_1.unique_dot_model__all_airports_data_airport.912f240fa1,4541476c-814e-43fe-ab38-786f36beecbc,942f4496-1202-3768-9cfe-96128bcd754c,all_airports_data,unique,airport,,{},fail,"got 2 results, configured to fail if != 0",tr_dot_model__all_airports_data_unique_airport," SELECT dot_model__all_airports_data.airport AS unique_field,
count(*) AS n_records
FROM self_tests_public_tests.dot_model__all_airports_data
WHERE dot_model__all_airports_data.airport IS NOT NULL
Expand Down
Loading

0 comments on commit 566d3b2

Please sign in to comment.