Skip to content

Commit 68e43e3

Browse files
[SNOW-147] Filter out deleted ACLs from ACL_LATEST (#140)
* . * Delete synapse_data_warehouse/synapse/dynamic_tables/R__populate_acl_latest_table.sql * Update synapse_data_warehouse/synapse/dynamic_tables/V2.40.0__acl_latest_refactored.sql Co-authored-by: BryanFauble <17128019+BryanFauble@users.noreply.github.com> * filter out objects with no ACL * parse json after dedup * . * programmatically retrieve field names * bump up Version * comment --------- Co-authored-by: BryanFauble <17128019+BryanFauble@users.noreply.github.com>
1 parent 5557795 commit 68e43e3

File tree

2 files changed

+85
-55
lines changed

2 files changed

+85
-55
lines changed

synapse_data_warehouse/synapse/dynamic_tables/R__populate_acl_latest_table.sql

-55
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
2+
3+
4+
CREATE OR REPLACE DYNAMIC TABLE ACL_LATEST
5+
TARGET_LAG = '1 day'
6+
WAREHOUSE = compute_xsmall
7+
AS
8+
-- 1. Deduplicate the snapshots table based on each
9+
-- entry's OWNER_ID, and select only the last 14 days'
10+
-- worth of snapshots that have a non-empty RESOURCE_ACCESS column
11+
WITH dedup_acl AS (
12+
SELECT
13+
CHANGE_TIMESTAMP,
14+
CHANGE_TYPE,
15+
CREATED_ON,
16+
OWNER_ID,
17+
OWNER_TYPE,
18+
SNAPSHOT_DATE,
19+
SNAPSHOT_TIMESTAMP,
20+
RESOURCE_ACCESS
21+
FROM {{ database_name }}.SYNAPSE_RAW.ACLSNAPSHOTS --noqa: TMP
22+
WHERE
23+
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '14 days'
24+
AND
25+
RESOURCE_ACCESS != '[]' -- An empty RESOURCE_ACCESS means no ACL was captured for the owner_id
26+
QUALIFY
27+
ROW_NUMBER() OVER (
28+
PARTITION BY OWNER_ID
29+
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
30+
) = 1
31+
),
32+
-- 2. Unpack each element within the RESOURCE_ACCESSS VARIANT, with each
33+
-- element being a JSON object describing privilege(s) assigned to a particular principal.
34+
dedup_acl_level1_unpack AS (
35+
SELECT
36+
CHANGE_TIMESTAMP,
37+
CHANGE_TYPE,
38+
CREATED_ON,
39+
OWNER_ID,
40+
OWNER_TYPE,
41+
SNAPSHOT_DATE,
42+
SNAPSHOT_TIMESTAMP,
43+
RESOURCE_ACCESS,
44+
flattened_resource_access.value AS ACL_ENTRY
45+
FROM
46+
dedup_acl,
47+
LATERAL FLATTEN(INPUT => PARSE_JSON(dedup_acl.RESOURCE_ACCESS)) AS flattened_resource_access
48+
),
49+
-- 3. Unpacks each key-value pair in the JSON objects programmatically.
50+
-- The fields are: access type & principal ID, but are named differently in each entry, so we
51+
-- extract them programmatically using case-insensitive ILIKE and wildcards (%) in the string.
52+
dedup_acl_level2_unpack AS (
53+
SELECT
54+
CHANGE_TIMESTAMP,
55+
CHANGE_TYPE,
56+
CREATED_ON,
57+
OWNER_ID,
58+
OWNER_TYPE,
59+
SNAPSHOT_DATE,
60+
SNAPSHOT_TIMESTAMP,
61+
ARRAY_SORT(MAX(
62+
CASE
63+
WHEN key ILIKE '%access%' THEN value::variant
64+
END
65+
)) AS access_type, -- Grab the access type VARIANT programmatically regardless of field name
66+
MAX(
67+
CASE
68+
WHEN key ILIKE '%principal%' THEN value::number
69+
END
70+
) AS principal_id -- Grab the principal ID NUMBER programmatically regardless of field name
71+
FROM
72+
dedup_acl_level1_unpack,
73+
LATERAL FLATTEN(INPUT => ACL_ENTRY)
74+
GROUP BY
75+
CHANGE_TIMESTAMP,
76+
CHANGE_TYPE,
77+
CREATED_ON,
78+
OWNER_ID,
79+
OWNER_TYPE,
80+
SNAPSHOT_DATE,
81+
SNAPSHOT_TIMESTAMP,
82+
ACL_ENTRY -- This is how we make sure the access types go with the right principal IDs (1 ACL entry per access type & pid pair)
83+
)
84+
SELECT *
85+
FROM dedup_acl_level2_unpack;

0 commit comments

Comments
 (0)