|
| 1 | +USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP |
| 2 | + |
| 3 | + |
| 4 | +CREATE OR REPLACE DYNAMIC TABLE ACL_LATEST |
| 5 | + TARGET_LAG = '1 day' |
| 6 | + WAREHOUSE = compute_xsmall |
| 7 | + AS |
| 8 | + -- 1. Deduplicate the snapshots table based on each |
| 9 | + -- entry's OWNER_ID, and select only the last 14 days' |
| 10 | + -- worth of snapshots that have a non-empty RESOURCE_ACCESS column |
| 11 | + WITH dedup_acl AS ( |
| 12 | + SELECT |
| 13 | + CHANGE_TIMESTAMP, |
| 14 | + CHANGE_TYPE, |
| 15 | + CREATED_ON, |
| 16 | + OWNER_ID, |
| 17 | + OWNER_TYPE, |
| 18 | + SNAPSHOT_DATE, |
| 19 | + SNAPSHOT_TIMESTAMP, |
| 20 | + RESOURCE_ACCESS |
| 21 | + FROM {{ database_name }}.SYNAPSE_RAW.ACLSNAPSHOTS --noqa: TMP |
| 22 | + WHERE |
| 23 | + SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '14 days' |
| 24 | + AND |
| 25 | + RESOURCE_ACCESS != '[]' -- An empty RESOURCE_ACCESS means no ACL was captured for the owner_id |
| 26 | + QUALIFY |
| 27 | + ROW_NUMBER() OVER ( |
| 28 | + PARTITION BY OWNER_ID |
| 29 | + ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC |
| 30 | + ) = 1 |
| 31 | + ), |
| 32 | + -- 2. Unpack each element within the RESOURCE_ACCESSS VARIANT, with each |
| 33 | + -- element being a JSON object describing privilege(s) assigned to a particular principal. |
| 34 | + dedup_acl_level1_unpack AS ( |
| 35 | + SELECT |
| 36 | + CHANGE_TIMESTAMP, |
| 37 | + CHANGE_TYPE, |
| 38 | + CREATED_ON, |
| 39 | + OWNER_ID, |
| 40 | + OWNER_TYPE, |
| 41 | + SNAPSHOT_DATE, |
| 42 | + SNAPSHOT_TIMESTAMP, |
| 43 | + RESOURCE_ACCESS, |
| 44 | + flattened_resource_access.value AS ACL_ENTRY |
| 45 | + FROM |
| 46 | + dedup_acl, |
| 47 | + LATERAL FLATTEN(INPUT => PARSE_JSON(dedup_acl.RESOURCE_ACCESS)) AS flattened_resource_access |
| 48 | + ), |
| 49 | + -- 3. Unpacks each key-value pair in the JSON objects programmatically. |
| 50 | + -- The fields are: access type & principal ID, but are named differently in each entry, so we |
| 51 | + -- extract them programmatically using case-insensitive ILIKE and wildcards (%) in the string. |
| 52 | + dedup_acl_level2_unpack AS ( |
| 53 | + SELECT |
| 54 | + CHANGE_TIMESTAMP, |
| 55 | + CHANGE_TYPE, |
| 56 | + CREATED_ON, |
| 57 | + OWNER_ID, |
| 58 | + OWNER_TYPE, |
| 59 | + SNAPSHOT_DATE, |
| 60 | + SNAPSHOT_TIMESTAMP, |
| 61 | + ARRAY_SORT(MAX( |
| 62 | + CASE |
| 63 | + WHEN key ILIKE '%access%' THEN value::variant |
| 64 | + END |
| 65 | + )) AS access_type, -- Grab the access type VARIANT programmatically regardless of field name |
| 66 | + MAX( |
| 67 | + CASE |
| 68 | + WHEN key ILIKE '%principal%' THEN value::number |
| 69 | + END |
| 70 | + ) AS principal_id -- Grab the principal ID NUMBER programmatically regardless of field name |
| 71 | + FROM |
| 72 | + dedup_acl_level1_unpack, |
| 73 | + LATERAL FLATTEN(INPUT => ACL_ENTRY) |
| 74 | + GROUP BY |
| 75 | + CHANGE_TIMESTAMP, |
| 76 | + CHANGE_TYPE, |
| 77 | + CREATED_ON, |
| 78 | + OWNER_ID, |
| 79 | + OWNER_TYPE, |
| 80 | + SNAPSHOT_DATE, |
| 81 | + SNAPSHOT_TIMESTAMP, |
| 82 | + ACL_ENTRY -- This is how we make sure the access types go with the right principal IDs (1 ACL entry per access type & pid pair) |
| 83 | + ) |
| 84 | + SELECT * |
| 85 | + FROM dedup_acl_level2_unpack; |
0 commit comments