Skip to content

Commit 22bc594

Browse files
authored
Merge pull request #100 from Sage-Bionetworks/snow-155-userprofile_latest-dynamic-table
[SNOW-155] Converting userprofile_latest table to dynamic table
2 parents 4888855 + 8ab919c commit 22bc594

6 files changed

+70
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
-- Introduce the dynamic table
2+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
3+
CREATE OR REPLACE DYNAMIC TABLE USERPROFILE_LATEST
4+
TARGET_LAG = '1 day'
5+
WAREHOUSE = compute_xsmall
6+
AS
7+
WITH dedup_userprofile AS (
8+
SELECT
9+
*
10+
FROM {{database_name}}.SYNAPSE_RAW.USERPROFILESNAPSHOT --noqa: TMP
11+
WHERE
12+
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '14 days'
13+
QUALIFY
14+
ROW_NUMBER() OVER (
15+
PARTITION BY ID
16+
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
17+
) = 1
18+
)
19+
SELECT
20+
* exclude (LOCATION, COMPANY, POSITION, INDUSTRY),
21+
-- TODO: Need to revisit this section after the mixture of NULL and empty strings issue being resolved in https://sagebionetworks.jira.com/browse/SWC-7215
22+
NULLIF(LOCATION, '') AS LOCATION,
23+
NULLIF(COMPANY, '') AS COMPANY,
24+
NULLIF(POSITION, '') AS POSITION,
25+
NULLIF(INDUSTRY, '') AS INDUSTRY,
26+
FROM
27+
dedup_userprofile;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
-- Add table and column comments to userprofile_latest dynamic table
2+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
3+
4+
-- Table comments
5+
COMMENT ON DYNAMIC TABLE USERPROFILE_LATEST IS 'This dynamic table contain the latest snapshot of user-profiles during the past 14 days. Snapshots are taken when user profiles are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.';
6+
7+
-- Column comments
8+
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred to the user profile, e.g., CREATE, UPDATE (Snapshotting does not capture DELETE change).';
9+
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_TIMESTAMP IS 'The time when any change to the user profile was made (e.g. create or update).';
10+
COMMENT ON COLUMN USERPROFILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the user profile.';
11+
COMMENT ON COLUMN USERPROFILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).';
12+
COMMENT ON COLUMN USERPROFILE_LATEST.ID IS 'The unique identifier of the user.';
13+
COMMENT ON COLUMN USERPROFILE_LATEST.USER_NAME IS 'The Synapse username.';
14+
COMMENT ON COLUMN USERPROFILE_LATEST.FIRST_NAME IS 'The first name of the user.';
15+
COMMENT ON COLUMN USERPROFILE_LATEST.LAST_NAME IS 'The last name of the user.';
16+
COMMENT ON COLUMN USERPROFILE_LATEST.EMAIL IS 'The primary email of the user.';
17+
COMMENT ON COLUMN USERPROFILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.';
18+
COMMENT ON COLUMN USERPROFILE_LATEST.CREATED_ON IS 'The creation time of the user profile.';
19+
COMMENT ON COLUMN USERPROFILE_LATEST.IS_TWO_FACTOR_AUTH_ENABLED IS 'Indicates if the user had two factor authentication enabled when the snapshot was captured.';
20+
COMMENT ON COLUMN USERPROFILE_LATEST.TOS_AGREEMENTS IS 'Contains the list of all the term of service that the user agreed to, with their agreed on date and version.';
21+
COMMENT ON COLUMN USERPROFILE_LATEST.LOCATION IS 'The location of the user.';
22+
COMMENT ON COLUMN USERPROFILE_LATEST.COMPANY IS 'The company where the user works.';
23+
COMMENT ON COLUMN USERPROFILE_LATEST.POSITION IS 'The position of the user in the company.';
24+
COMMENT ON COLUMN USERPROFILE_LATEST.INDUSTRY IS 'The industry/discipline that this person is associated with.';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- Backup the original latest table
2+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
3+
4+
-- Clone the USERPROFILE_LATEST table to ``USERPROFILE_LATEST_BACKUP`` for validation purposes
5+
CREATE OR REPLACE TABLE USERPROFILE_LATEST_BACKUP CLONE USERPROFILE_LATEST;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Drop the ``USERPROFILE_LATEST`` table
2+
USE SCHEMA {{database_name}}.synapse;
3+
DROP TABLE USERPROFILE_LATEST;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
-- Drop the snapshot stream
2+
USE SCHEMA {{database_name}}.synapse_raw;
3+
DROP STREAM USERPROFILESNAPSHOT_STREAM;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
-- Drop any scheduled tasks
2+
USE SCHEMA {{database_name}}.synapse_raw;
3+
-- Suspend ROOT TASK
4+
ALTER TASK REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK SUSPEND;
5+
-- Drop LATEST_TABLE UPSERTING TASK
6+
DROP TASK UPSERT_TO_USERPROFILE_LATEST_TASK;
7+
-- Resume the ROOT task and its child tasks
8+
SELECT SYSTEM$TASK_DEPENDENTS_ENABLE( 'REFRESH_SYNAPSE_WAREHOUSE_S3_STAGE_TASK' );

0 commit comments

Comments
 (0)