From 86b7091214bcf23656db0b560936a3c2e4d8097e Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 7 Nov 2023 22:49:24 -0800 Subject: [PATCH 1/4] Add created_on to user profile --- .../V2.5.0__add_created_on_to_userprofile.sql | 2 + ...1__add_created_on_to_userprofile_tasks.sql | 78 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql create mode 100644 synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql diff --git a/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql b/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql new file mode 100644 index 00000000..6298a62d --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql @@ -0,0 +1,2 @@ +USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP +ALTER TABLE userprofilesnapshot ADD COLUMN created_on TIMESTAMP; diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql new file mode 100644 index 00000000..acd8f659 --- /dev/null +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql @@ -0,0 +1,78 @@ +use role accountadmin; +use schema {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP +alter task refresh_synapse_warehouse_s3_stage_task suspend; +alter task userprofilesnapshot_task suspend; +alter task upsert_to_userprofile_latest_task suspend; +alter task userprofilesnapshot_task MODIFY AS + copy into + userprofilesnapshot + from ( + select + $1:change_type as change_type, + $1:change_timestamp as change_timestamp, + $1:change_user_id as change_user_id, + $1:snapshot_timestamp as snapshot_timestamp, + $1:id as id, + $1:user_name as user_name, + $1:first_name as first_name, + $1:last_name as last_name, + REGEXP_REPLACE($1:email, '.+\@', '*****@') as email, + $1:location as location, + $1:company as company, + $1:position as position, + NULLIF( + REGEXP_REPLACE( + metadata$filename, + '.*userprofilesnapshots\/snapshot_date\=(.*)\/.*', '\\1' + ), + '__HIVE_DEFAULT_PARTITION__' + ) as snapshot_date + $1:created_on as created_on + from + @{{stage_storage_integration}}_stage/userprofilesnapshots --noqa: TMP + ) + pattern = '.*userprofilesnapshots/snapshot_date=.*/.*'; + +alter task upsert_to_userprofile_latest_task modify as + MERGE INTO {{database_name}}.SYNAPSE.USERPROFILE_LATEST AS TARGET_TABLE --noqa: TMP + USING ( + WITH RANKED_NODES AS ( + SELECT + *, + "row_number"() + OVER ( + PARTITION BY ID + ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC + ) + AS N + FROM + USERPROFILESNAPSHOT_STREAM + ) + + SELECT * EXCLUDE N + FROM RANKED_NODES + WHERE N = 1 + ) AS SOURCE_TABLE ON TARGET_TABLE.ID = SOURCE_TABLE.ID + WHEN MATCHED THEN + UPDATE SET + TARGET_TABLE.CHANGE_TYPE = SOURCE_TABLE.CHANGE_TYPE, + TARGET_TABLE.CHANGE_TIMESTAMP = SOURCE_TABLE.CHANGE_TIMESTAMP, + TARGET_TABLE.CHANGE_USER_ID = SOURCE_TABLE.CHANGE_USER_ID, + TARGET_TABLE.SNAPSHOT_TIMESTAMP = SOURCE_TABLE.SNAPSHOT_TIMESTAMP, + TARGET_TABLE.ID = SOURCE_TABLE.ID, + TARGET_TABLE.USER_NAME = SOURCE_TABLE.USER_NAME, + TARGET_TABLE.FIRST_NAME = SOURCE_TABLE.FIRST_NAME, + TARGET_TABLE.LAST_NAME = SOURCE_TABLE.LAST_NAME, + TARGET_TABLE.EMAIL = SOURCE_TABLE.EMAIL, + TARGET_TABLE.LOCATION = SOURCE_TABLE.LOCATION, + TARGET_TABLE.COMPANY = SOURCE_TABLE.COMPANY, + TARGET_TABLE.POSITION = SOURCE_TABLE.POSITION, + TARGET_TABLE.SNAPSHOT_DATE = SOURCE_TABLE.SNAPSHOT_DATE + TARGET_TABLE.CREATED_ON = SOURCE_TABLE.CREATED_ON + WHEN NOT MATCHED THEN + INSERT (CHANGE_TYPE, CHANGE_TIMESTAMP, CHANGE_USER_ID, SNAPSHOT_TIMESTAMP, ID, USER_NAME, FIRST_NAME, LAST_NAME, EMAIL, LOCATION, COMPANY, POSITION, SNAPSHOT_DATE, CREATED_ON) + VALUES (SOURCE_TABLE.CHANGE_TYPE, SOURCE_TABLE.CHANGE_TIMESTAMP, SOURCE_TABLE.CHANGE_USER_ID, SOURCE_TABLE.SNAPSHOT_TIMESTAMP, SOURCE_TABLE.ID, SOURCE_TABLE.USER_NAME, SOURCE_TABLE.FIRST_NAME, SOURCE_TABLE.LAST_NAME, SOURCE_TABLE.EMAIL, SOURCE_TABLE.LOCATION, SOURCE_TABLE.COMPANY, SOURCE_TABLE.POSITION, SOURCE_TABLE.SNAPSHOT_DATE, SOURCE_TABLE.CREATED_ON); + +alter task upsert_to_userprofile_latest_task resume; +alter task userprofilesnapshot_task resume; +alter task refresh_synapse_warehouse_s3_stage_task resume; From 3730d5937d44c78cb8a2e23df992e2112678dbd1 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 7 Nov 2023 22:52:39 -0800 Subject: [PATCH 2/4] Add created on to user profile latest --- .../tables/V2.5.2__add_created_on_to_userprofile_latest.sql | 2 ++ .../tables/V2.5.0__add_created_on_to_userprofile.sql | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql diff --git a/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql b/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql new file mode 100644 index 00000000..939975b7 --- /dev/null +++ b/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql @@ -0,0 +1,2 @@ +USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP +ALTER TABLE userprofilesnapshot ADD COLUMN created_on TIMESTAMP; \ No newline at end of file diff --git a/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql b/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql index 6298a62d..c3919602 100644 --- a/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql +++ b/synapse_data_warehouse/synapse_raw/tables/V2.5.0__add_created_on_to_userprofile.sql @@ -1,2 +1,2 @@ USE SCHEMA {{database_name}}.synapse_raw; --noqa: JJ01,PRS,TMP -ALTER TABLE userprofilesnapshot ADD COLUMN created_on TIMESTAMP; +ALTER TABLE USERPROFILE_LATEST ADD COLUMN created_on TIMESTAMP; From 63baf687f8f3f9a69e3582c353ad04d86509eee2 Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 7 Nov 2023 22:53:46 -0800 Subject: [PATCH 3/4] Add new line --- .../tables/V2.5.2__add_created_on_to_userprofile_latest.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql b/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql index 939975b7..9838012a 100644 --- a/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql +++ b/synapse_data_warehouse/synapse/tables/V2.5.2__add_created_on_to_userprofile_latest.sql @@ -1,2 +1,2 @@ USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP -ALTER TABLE userprofilesnapshot ADD COLUMN created_on TIMESTAMP; \ No newline at end of file +ALTER TABLE userprofilesnapshot ADD COLUMN created_on TIMESTAMP; From 85f9be1ce2839525791f217183cd58f31121688e Mon Sep 17 00:00:00 2001 From: Thomas Yu Date: Tue, 7 Nov 2023 22:55:14 -0800 Subject: [PATCH 4/4] Add comma --- .../tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql b/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql index acd8f659..972e8289 100644 --- a/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql +++ b/synapse_data_warehouse/synapse_raw/tasks/V2.5.1__add_created_on_to_userprofile_tasks.sql @@ -26,7 +26,7 @@ alter task userprofilesnapshot_task MODIFY AS '.*userprofilesnapshots\/snapshot_date\=(.*)\/.*', '\\1' ), '__HIVE_DEFAULT_PARTITION__' - ) as snapshot_date + ) as snapshot_date, $1:created_on as created_on from @{{stage_storage_integration}}_stage/userprofilesnapshots --noqa: TMP @@ -67,7 +67,7 @@ alter task upsert_to_userprofile_latest_task modify as TARGET_TABLE.LOCATION = SOURCE_TABLE.LOCATION, TARGET_TABLE.COMPANY = SOURCE_TABLE.COMPANY, TARGET_TABLE.POSITION = SOURCE_TABLE.POSITION, - TARGET_TABLE.SNAPSHOT_DATE = SOURCE_TABLE.SNAPSHOT_DATE + TARGET_TABLE.SNAPSHOT_DATE = SOURCE_TABLE.SNAPSHOT_DATE, TARGET_TABLE.CREATED_ON = SOURCE_TABLE.CREATED_ON WHEN NOT MATCHED THEN INSERT (CHANGE_TYPE, CHANGE_TIMESTAMP, CHANGE_USER_ID, SNAPSHOT_TIMESTAMP, ID, USER_NAME, FIRST_NAME, LAST_NAME, EMAIL, LOCATION, COMPANY, POSITION, SNAPSHOT_DATE, CREATED_ON)