Skip to content

Commit 4e31e38

Browse files
danlu1philerooski
authored andcommitted
[SNOW-172] Add filter to file_latest and add comments (#128)
* add filter to file_latest and add comments * Update V2.38.0__reintroduce_file_latest_dynamic_table.sql Add NOT IS_PREVIEW filter * Update V2.38.0__reintroduce_file_latest_dynamic_table.sql move not is_preview filter
1 parent 901df0e commit 4e31e38

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
-- Introduce the dynamic table
2+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
3+
CREATE OR REPLACE DYNAMIC TABLE FILE_LATEST
4+
TARGET_LAG = '1 day'
5+
WAREHOUSE = compute_xsmall
6+
AS
7+
WITH dedup_filesnapshots AS (
8+
SELECT
9+
*
10+
FROM {{database_name}}.SYNAPSE_RAW.FILESNAPSHOTS --noqa: TMP
11+
WHERE
12+
SNAPSHOT_DATE >= CURRENT_TIMESTAMP - INTERVAL '30 days' AND NOT IS_PREVIEW
13+
QUALIFY
14+
ROW_NUMBER() OVER (
15+
PARTITION BY ID
16+
ORDER BY CHANGE_TIMESTAMP DESC, SNAPSHOT_TIMESTAMP DESC
17+
) = 1
18+
)
19+
SELECT
20+
CHANGE_TYPE,
21+
CHANGE_TIMESTAMP,
22+
CHANGE_USER_ID,
23+
SNAPSHOT_TIMESTAMP,
24+
ID,
25+
CREATED_BY,
26+
CREATED_ON,
27+
MODIFIED_ON,
28+
CONCRETE_TYPE,
29+
CONTENT_MD5,
30+
CONTENT_TYPE,
31+
FILE_NAME,
32+
STORAGE_LOCATION_ID,
33+
CONTENT_SIZE,
34+
BUCKET,
35+
KEY,
36+
PREVIEW_ID,
37+
IS_PREVIEW,
38+
STATUS,
39+
SNAPSHOT_DATE
40+
FROM
41+
dedup_filesnapshots
42+
WHERE
43+
CHANGE_TYPE != 'DELETE';
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
-- Add table and column comments to userprofile_latest dynamic table
2+
USE SCHEMA {{database_name}}.synapse; --noqa: JJ01,PRS,TMP
3+
-- Table comments
4+
COMMENT ON DYNAMIC TABLE FILE_LATEST IS 'This dynamic table contains the latest snapshot of files during the past 30 days. Snapshots are taken when files are created or modified. Note: Snapshots are also taken periodically and independently of the changes. The snapshot_timestamp records when the snapshot was taken.';
5+
-- Column comments
6+
COMMENT ON COLUMN FILE_LATEST.CHANGE_TYPE IS 'The type of change that occurred on the file handle, e.g., CREATE, UPDATE, DELETE.';
7+
COMMENT ON COLUMN FILE_LATEST.CHANGE_TIMESTAMP IS 'The time when the change (created/updated/deleted) on the file is pushed to the queue for snapshotting.';
8+
COMMENT ON COLUMN FILE_LATEST.CHANGE_USER_ID IS 'The unique identifier of the user who made the change to the file.';
9+
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_TIMESTAMP IS 'The time when the snapshot was taken (It is usually after the change happened).';
10+
COMMENT ON COLUMN FILE_LATEST.ID IS 'The unique identifier of the file handle.';
11+
COMMENT ON COLUMN FILE_LATEST.CREATED_BY IS 'The unique identifier of the user who created the file handle.';
12+
COMMENT ON COLUMN FILE_LATEST.CREATED_ON IS 'The creation timestamp of the file handle.';
13+
COMMENT ON COLUMN FILE_LATEST.MODIFIED_ON IS 'The most recent change time of the file handle.';
14+
COMMENT ON COLUMN FILE_LATEST.CONCRETE_TYPE IS 'The type of the file handle. Allowed file handles are: S3FileHandle, ProxyFileHandle, ExternalFileHandle, ExternalObjectStoreFileHandle, GoogleCloudFileHandle.';
15+
COMMENT ON COLUMN FILE_LATEST.CONTENT_MD5 IS 'The md5 hash (using MD5 algorithm) of the file referenced by the file handle.';
16+
COMMENT ON COLUMN FILE_LATEST.CONTENT_TYPE IS 'Metadata about the content of the file, e.g., application/json, application/zip, application/octet-stream.';
17+
COMMENT ON COLUMN FILE_LATEST.FILE_NAME IS 'The name of the file referenced by the file handle.';
18+
COMMENT ON COLUMN FILE_LATEST.STORAGE_LOCATION_ID IS 'The identifier of the environment, where the physical files are stored.';
19+
COMMENT ON COLUMN FILE_LATEST.CONTENT_SIZE IS 'The size of the file referenced by the file handle.';
20+
COMMENT ON COLUMN FILE_LATEST.BUCKET IS 'The bucket where the file is physically stored. Applicable for s3 and GCP, otherwise empty.';
21+
COMMENT ON COLUMN FILE_LATEST.KEY IS 'The key name uniquely identifies the object (file) in the bucket.';
22+
COMMENT ON COLUMN FILE_LATEST.PREVIEW_ID IS 'The identifier of the file handle that contains a preview of the file referenced by this file handle.';
23+
COMMENT ON COLUMN FILE_LATEST.IS_PREVIEW IS 'If true, the file referenced by this file handle is a preview of another file.';
24+
COMMENT ON COLUMN FILE_LATEST.STATUS IS 'The availability status of the file referenced by the file handle. AVAILABLE: accessible via Synapse; UNLINKED: not referenced by Synapse and therefore available for garbage collection; ARCHIVED: the file has been garbage collected.';
25+
COMMENT ON COLUMN FILE_LATEST.SNAPSHOT_DATE IS 'The data is partitioned for fast and cost effective queries. The snapshot_timestamp field is converted into a date and stored in the snapshot_date field for partitioning. The date should be used as a condition (WHERE CLAUSE) in the queries.';

0 commit comments

Comments
 (0)