Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SNOW-107] Add deduplication logic for filehandleassociation_latest #73

Merged
merged 16 commits into from
Sep 6, 2024
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use schema {{database_name}}.synapse; --noqa: JJ01,PRS,TMP,CP01

CREATE OR REPLACE DYNAMIC TABLE filehandleassociation_latest
TARGET_LAG = '7 days'
WAREHOUSE = compute_xsmall
AS
WITH latest_unique_rows AS (
SELECT
filehandleid,
associateid,
MAX(timestamp) AS latest_timestamp
FROM
{{database_name}}.synapse_raw.filehandleassociationsnapshots --noqa: TMP
WHERE
timestamp >= CURRENT_TIMESTAMP - INTERVAL '14 DAYS'
GROUP BY
filehandleid,
associateid
)
SELECT
filehandleassociationsnapshots.*
FROM
{{database_name}}.synapse_raw.filehandleassociationsnapshots --noqa: TMP
JOIN
latest_unique_rows
ON
filehandleassociationsnapshots.filehandleid = latest_unique_filehandles.filehandleid
AND
filehandleassociationsnapshots.associateid = latest_unique_filehandles.associateid
AND
filehandleassociationsnapshots.timestamp = latest_unique_filehandles.latest_timestamp;
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
use schema {{database_name}}.synapse; --noqa: JJ01,PRS,TMP,CP01

DROP TABLE IF EXISTS TEAM_LATEST;

CREATE DYNAMIC TABLE IF NOT EXISTS TEAM_LATEST
TARGET_LAG = '1 day'
WAREHOUSE = compute_xsmall
AS
WITH RANKED_NODES AS (
SELECT
*,
"row_number"()
OVER (
PARTITION BY ID
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
)
AS N
FROM {{database_name}}.SYNAPSE_RAW.TEAMSNAPSHOTS --noqa: TMP
WHERE
(SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '30 DAYS')
QUALIFY
N=1
)

SELECT * EXCLUDE N
FROM RANKED_NODES;