From 50a488c04fc0983f8c20340e20e217b0713313b4 Mon Sep 17 00:00:00 2001 From: Joby James Date: Mon, 28 Oct 2024 13:32:05 +0530 Subject: [PATCH 1/2] feat: add content assembler statefull table and stateless view #20 --- lib/pattern/content-assembler/package.sql.ts | 58 ++++++++++++++++++++ lib/pattern/content-assembler/statefull.sql | 33 ++++++++--- lib/pattern/content-assembler/stateless.sql | 19 +++++++ 3 files changed, 102 insertions(+), 8 deletions(-) create mode 100644 lib/pattern/content-assembler/stateless.sql diff --git a/lib/pattern/content-assembler/package.sql.ts b/lib/pattern/content-assembler/package.sql.ts index 0d52a5f54..1a8820853 100755 --- a/lib/pattern/content-assembler/package.sql.ts +++ b/lib/pattern/content-assembler/package.sql.ts @@ -59,6 +59,58 @@ export class ContentAssemblerSqlPages extends spn.TypicalSqlPageNotebook { WHERE namespace = 'prime' AND parent_path = '/cak' ORDER BY sibling_order;`; } + + @cakNav({ + caption: "Inbox", + description: ``, + siblingOrder: 1, + }) + "cak/inbox.sql"() { + return this.SQL` + ${this.activePageTitle()} + + SELECT 'table' AS component, + 'subject' AS markdown, + 'Column Count' as align_right, + TRUE as sort, + TRUE as search; + + SELECT extended_uniform_resource_id as "uniform resource id", + "message_from" as "message from", + '[' || message_subject || '](/cak/email-detail.sql?id=' || extended_uniform_resource_id || ')' AS "subject", + strftime('%m/%d/%Y', message_date) as "message date" + from inbox + `; + } + + @spn.shell({ breadcrumbsFromNavStmts: "no" }) + "cak/email-detail.sql"() { + return this.SQL` + select + 'breadcrumb' as component; + select + 'Home' as title, + '/' as link; + select + 'IMAP Email System' as title, + '/cak/' as link; + select + 'inbox' as title, + '/cak/inbox.sql' as link; + select + "message_subject" as title from inbox where CAST(extended_uniform_resource_id AS TEXT)=CAST($id AS TEXT); + + SELECT 'table' AS component, + 'Column Count' as align_right, + TRUE as sort, + TRUE as search; + SELECT + uniform_resource_id, + anchor as "News letter link", + anchor_text as "link text" + from + ur_transform_html_email_anchor where CAST(uniform_resource_id AS TEXT)=CAST($id AS TEXT);`; + } } export async function SQL() { @@ -70,6 +122,12 @@ export async function SQL() { import.meta.resolve("./statefull.sql"), ); } + async statelesscakSQL() { + // read the file from either local or remote (depending on location of this file) + return await spn.TypicalSqlPageNotebook.fetchText( + import.meta.resolve("./stateless.sql"), + ); + } }(), new sh.ShellSqlPages(), new c.ConsoleSqlPages(), diff --git a/lib/pattern/content-assembler/statefull.sql b/lib/pattern/content-assembler/statefull.sql index ffdb8025f..877cc033b 100644 --- a/lib/pattern/content-assembler/statefull.sql +++ b/lib/pattern/content-assembler/statefull.sql @@ -1,10 +1,10 @@ -DROP TABLE IF EXISTS ur_transform_html_email_anchor; -CREATE TABLE ur_transform_html_email_anchor AS +DROP TABLE IF EXISTS ur_transform_html_flattened_email_anchor; +CREATE TABLE ur_transform_html_flattened_email_anchor AS SELECT uniform_resource_transform_id, uniform_resource_id, json_extract(json_each.value, '$.attributes.href') AS anchor, - json_extract(json_each.value, '$.children[0]') AS text + json_extract(json_each.value, '$.children[0]') AS anchor_text FROM uniform_resource_transform, json_each(content); @@ -17,9 +17,26 @@ SELECT uniform_resource_id, anchor, CASE - WHEN regexp_like(anchor, '(?i)optout') THEN 'optout' - WHEN regexp_like(anchor, '(?i)unsubscribe') THEN 'unsubscribe' - END AS type, - text + WHEN regexp_like(anchor, '(?i)unsubscribe|list-unsubscribe') THEN 'Unsubscribe' + WHEN regexp_like(anchor, '(?i)optout|opt-out') THEN 'Optout' + WHEN regexp_like(anchor, '(?i)preferences') THEN 'Preferences' + WHEN regexp_like(anchor, '(?i)remove') THEN 'Remove' + WHEN regexp_like(anchor, '(?i)manage') THEN 'Manage' + WHEN regexp_like(anchor, '(?i)email-settings') THEN 'Email-settings' + WHEN regexp_like(anchor, '(?i)subscription|subscribe') THEN 'Subscribe' + WHEN regexp_like(anchor, '(?i)mailto:') THEN 'mailto' + END AS anchor_type, + anchor_text +FROM + ur_transform_html_flattened_email_anchor; + +DROP TABLE IF EXISTS ur_transform_html_email_anchor; +CREATE TABLE ur_transform_html_email_anchor AS +SELECT + uniform_resource_transform_id, + uniform_resource_id, + anchor, + anchor_text FROM - ur_transform_html_email_anchor; \ No newline at end of file + ur_transform_html_flattened_email_anchor + WHERE NOT regexp_like(anchor, '(?i)unsubscribe|optout|opt-out|preferences|remove|manage|email-settings|subscription|subscribe|list-unsubscribe|mailto:'); \ No newline at end of file diff --git a/lib/pattern/content-assembler/stateless.sql b/lib/pattern/content-assembler/stateless.sql new file mode 100644 index 000000000..5e31e813f --- /dev/null +++ b/lib/pattern/content-assembler/stateless.sql @@ -0,0 +1,19 @@ +drop view if exists inbox; +CREATE VIEW inbox AS +SELECT + ur_imap.uniform_resource_id AS base_uniform_resource_id, + ur_imap."from" AS message_from, + ur_imap."subject" AS message_subject, + ur_imap."date" AS message_date, + ur_extended.uniform_resource_id AS extended_uniform_resource_id, + ur_extended.uri AS extended_uri +FROM + ur_ingest_session_imap_acct_folder_message ur_imap +JOIN + uniform_resource ur_base + ON ur_base.uniform_resource_id = ur_imap.uniform_resource_id +JOIN + uniform_resource ur_extended + ON ur_extended.uri = ur_base.uri || '/html' +WHERE + ur_extended.uri LIKE '%/html'; \ No newline at end of file From cd35df062c865beb795dcc166c2f8492fb320c68 Mon Sep 17 00:00:00 2001 From: Joby James Date: Mon, 28 Oct 2024 14:25:24 +0530 Subject: [PATCH 2/2] feat: add content assembler statefull table and stateless view and testn #20 --- lib/pattern/content-assembler/README.md | 16 +++ .../content-assembler/package_sql_test.ts | 117 ++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 lib/pattern/content-assembler/package_sql_test.ts diff --git a/lib/pattern/content-assembler/README.md b/lib/pattern/content-assembler/README.md index ccbc77320..2d93cc9ed 100644 --- a/lib/pattern/content-assembler/README.md +++ b/lib/pattern/content-assembler/README.md @@ -144,3 +144,19 @@ time-consuming to re-run the same command in the CLI manually each time a file changes, you can use _watch mode_ instead. See: [`surveilrctl.ts`](../../std/surveilrctl.ts). + +## How to Run the Tests + +To execute test and ensure that `surveilr` is functioning correctly: + +1. Run the tests using Deno: + + ```bash + deno test -A # Executes test + ``` + +This process will create an 'assurance' folder, where you can find the files +related to the test, including the database and ingestion folder + +The `-A` flag provides all necessary permissions for the tests to run, including +file system access and network permissions. diff --git a/lib/pattern/content-assembler/package_sql_test.ts b/lib/pattern/content-assembler/package_sql_test.ts new file mode 100644 index 000000000..4ede136c3 --- /dev/null +++ b/lib/pattern/content-assembler/package_sql_test.ts @@ -0,0 +1,117 @@ +import { assertEquals, assertExists } from "jsr:@std/assert@1"; +import { DB } from "https://deno.land/x/sqlite@v3.8/mod.ts"; +const DEFAULT_RSSD_PATH = "./resource-surveillance.sqlite.db"; + +Deno.test("Statefull tables check", async (t) => { + await t.step("Check database", async () => { + assertExists( + await Deno.stat(DEFAULT_RSSD_PATH).catch(() => null), + `❌ Error: ${DEFAULT_RSSD_PATH} does not exist`, + ); + }); + const db = new DB(DEFAULT_RSSD_PATH); + + await t.step("Flattened email anchor", () => { + try { + db.execute(`DROP TABLE IF EXISTS ur_transform_html_flattened_email_anchor; + CREATE TABLE ur_transform_html_flattened_email_anchor AS + SELECT + uniform_resource_transform_id, + uniform_resource_id, + json_extract(json_each.value, '$.attributes.href') AS anchor, + json_extract(json_each.value, '$.children[0]') AS anchor_text + FROM + uniform_resource_transform, + json_each(content)`); + } catch (e) { + console.error( + `Failed to create table ur_transform_html_flattened_email_anchor: ${e.message}`, + ); + } + const result = db.query( + `SELECT COUNT(*) AS count FROM ur_transform_html_flattened_email_anchor`, + ); + assertEquals(result.length, 1); + }); + + await t.step("Email anchor subscription filter existence check", () => { + try { + const result = db.query( + `SELECT COUNT(*) + FROM sqlite_master + WHERE type='table' AND name='ur_transform_html_email_anchor_subscription_filter';`, + ); + + // Access the count value directly + const count = result[0][0] || 0; + assertEquals(count, 1, "Table does not exist."); + } catch (e) { + console.error( + `Failed to verify existence of table ur_transform_html_email_anchor_subscription_filter: ${e.message}`, + ); + } + }); + + await t.step("Email anchor existence check", () => { + try { + const result = db.query( + `SELECT COUNT(*) + FROM sqlite_master + WHERE type='table' AND name='ur_transform_html_email_anchor';`, + ); + + // Access the count value directly + const count = result[0][0] || 0; + assertEquals(count, 1, "Table does not exist."); + } catch (e) { + console.error( + `Failed to verify existence of table ur_transform_html_email_anchor: ${e.message}`, + ); + } + }); + + await t.step("Inbox", () => { + try { + db.execute(`DROP VIEW IF EXISTS inbox; + CREATE VIEW inbox AS + SELECT + ur_imap.uniform_resource_id AS base_uniform_resource_id, + ur_imap."from" AS message_from, + ur_imap."subject" AS message_subject, + ur_imap."date" AS message_date, + ur_extended.uniform_resource_id AS extended_uniform_resource_id, + ur_extended.uri AS extended_uri + FROM + ur_ingest_session_imap_acct_folder_message ur_imap + JOIN + uniform_resource ur_base + ON ur_base.uniform_resource_id = ur_imap.uniform_resource_id + JOIN + uniform_resource ur_extended + ON ur_extended.uri = ur_base.uri || '/html' + WHERE + ur_extended.uri LIKE '%/html'`); + } catch (e) { + console.error( + `Failed to create table inbox: ${e.message}`, + ); + } + const result = db.query( + `SELECT COUNT(*) AS count FROM inbox`, + ); + assertEquals(result.length, 1); + }); + + db.close(); +}); + +Deno.test("Stateless tables check", async (t) => { + await t.step("Check database", async () => { + assertExists( + await Deno.stat(DEFAULT_RSSD_PATH).catch(() => null), + `❌ Error: ${DEFAULT_RSSD_PATH} does not exist`, + ); + }); + const db = new DB(DEFAULT_RSSD_PATH); + db.close(); +});