Skip to content

Commit

Permalink
misc(scoring): clear score list cache on delete and create
Browse files Browse the repository at this point in the history
  • Loading branch information
luis-dk authored and aarthy-dk committed Feb 12, 2025
1 parent 529b784 commit bab63a1
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 36 deletions.
8 changes: 4 additions & 4 deletions testgen/common/models/scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pandas as pd
from sqlalchemy import Boolean, Column, Enum, Float, ForeignKey, Integer, String, select, text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import joinedload, relationship
from sqlalchemy.orm import relationship

from testgen.common import read_template_sql_file
from testgen.common.models import Base, Session, engine
Expand Down Expand Up @@ -41,18 +41,18 @@ class ScoreDefinition(Base):
"ScoreDefinitionResult",
cascade="all, delete-orphan",
order_by="ScoreDefinitionResult.category",
lazy='joined',
lazy="joined",
)
filters: Iterable["ScoreDefinitionFilter"] = relationship(
"ScoreDefinitionFilter",
cascade="all, delete-orphan",
lazy='joined',
lazy="joined",
)
breakdown: Iterable["ScoreDefinitionBreakdownItem"] = relationship(
"ScoreDefinitionBreakdownItem",
cascade="all, delete-orphan",
order_by="ScoreDefinitionBreakdownItem.impact.desc()",
lazy='joined',
lazy="joined",
)

@classmethod
Expand Down
102 changes: 72 additions & 30 deletions testgen/template/dbupgrade/0125_incremental_upgrade.sql
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,13 @@ FROM (
NULL AS column_name,
NULL AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0) + COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (test_records.project_code = profiling_records.project_code AND test_records.table_groups_id = profiling_records.table_groups_id AND test_records.table_name = profiling_records.table_name)
Expand All @@ -103,7 +106,7 @@ profiling_records AS (
table_name,
column_name,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_column
WHERE NULLIF(column_name, '') IS NOT NULL
Expand All @@ -117,7 +120,7 @@ test_records AS (
table_name,
column_name,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_column
WHERE NULLIF(column_name, '') IS NOT NULL
Expand Down Expand Up @@ -151,10 +154,13 @@ FROM (
COALESCE(profiling_records.column_name, test_records.column_name) AS column_name,
NULL AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0) + COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (test_records.project_code = profiling_records.project_code AND test_records.table_groups_id = profiling_records.table_groups_id AND test_records.table_name = profiling_records.table_name AND test_records.column_name = profiling_records.column_name)
Expand All @@ -179,7 +185,7 @@ profiling_records AS (
table_groups_name,
dq_dimension,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_dimension
WHERE NULLIF(dq_dimension, '') IS NOT NULL
Expand All @@ -191,7 +197,7 @@ test_records AS (
table_groups_name,
dq_dimension,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_dimension
WHERE NULLIF(dq_dimension, '') IS NOT NULL
Expand Down Expand Up @@ -225,10 +231,16 @@ FROM (
NULL AS column_name,
COALESCE(profiling_records.dq_dimension, test_records.dq_dimension) AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (
Expand Down Expand Up @@ -257,7 +269,7 @@ profiling_records AS (
table_groups_name,
semantic_data_type,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_column
WHERE NULLIF(semantic_data_type, '') IS NOT NULL
Expand All @@ -269,7 +281,7 @@ test_records AS (
table_groups_name,
semantic_data_type,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_column
WHERE NULLIF(semantic_data_type, '') IS NOT NULL
Expand Down Expand Up @@ -303,10 +315,16 @@ FROM (
NULL AS column_name,
NULL AS dq_dimension,
COALESCE(profiling_records.semantic_data_type, test_records.semantic_data_type) AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (
Expand Down Expand Up @@ -336,7 +354,7 @@ profiling_records AS (
table_groups_name,
table_name,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_column
WHERE NULLIF(table_name, '') IS NOT NULL
Expand All @@ -350,7 +368,7 @@ test_records AS (
table_groups_name,
table_name,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_column
WHERE NULLIF(table_name, '') IS NOT NULL
Expand Down Expand Up @@ -385,10 +403,16 @@ FROM (
NULL AS column_name,
NULL AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (test_records.project_code = profiling_records.project_code AND test_records.table_groups_id = profiling_records.table_groups_id AND test_records.table_name = profiling_records.table_name)
Expand All @@ -415,7 +439,7 @@ profiling_records AS (
table_name,
column_name,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_column
WHERE NULLIF(column_name, '') IS NOT NULL
Expand All @@ -430,7 +454,7 @@ test_records AS (
table_name,
column_name,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_column
WHERE NULLIF(column_name, '') IS NOT NULL
Expand Down Expand Up @@ -465,10 +489,16 @@ FROM (
COALESCE(profiling_records.column_name, test_records.column_name) AS column_name,
NULL AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (test_records.project_code = profiling_records.project_code AND test_records.table_groups_id = profiling_records.table_groups_id AND test_records.table_name = profiling_records.table_name AND test_records.column_name = profiling_records.column_name)
Expand All @@ -493,7 +523,7 @@ profiling_records AS (
table_groups_name,
dq_dimension,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_dimension
WHERE NULLIF(dq_dimension, '') IS NOT NULL
Expand All @@ -506,7 +536,7 @@ test_records AS (
table_groups_name,
dq_dimension,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_dimension
WHERE NULLIF(dq_dimension, '') IS NOT NULL
Expand Down Expand Up @@ -541,10 +571,16 @@ FROM (
NULL AS column_name,
COALESCE(profiling_records.dq_dimension, test_records.dq_dimension) AS dq_dimension,
NULL AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (
Expand Down Expand Up @@ -573,7 +609,7 @@ profiling_records AS (
table_groups_name,
semantic_data_type,
SUM(issue_ct) AS issue_ct,
SUM(record_ct) AS record_ct,
SUM(record_ct) AS data_point_ct,
SUM(record_ct * good_data_pct) / NULLIF(SUM(record_ct), 0) AS score
FROM v_dq_profile_scoring_latest_by_column
WHERE NULLIF(semantic_data_type, '') IS NOT NULL
Expand All @@ -586,7 +622,7 @@ test_records AS (
table_groups_name,
semantic_data_type,
SUM(issue_ct) AS issue_ct,
SUM(dq_record_ct) AS record_ct,
SUM(dq_record_ct) AS data_point_ct,
SUM(dq_record_ct * good_data_pct) / NULLIF(SUM(dq_record_ct), 0) AS score
FROM v_dq_test_scoring_latest_by_column
WHERE NULLIF(semantic_data_type, '') IS NOT NULL
Expand Down Expand Up @@ -621,10 +657,16 @@ FROM (
NULL AS column_name,
NULL AS dq_dimension,
COALESCE(profiling_records.semantic_data_type, test_records.semantic_data_type) AS semantic_data_type,
100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records AS impact,
100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) AS impact,
(COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) AS score,
(COALESCE(profiling_records.issue_ct, 0) + COALESCE(test_records.issue_ct, 0)) AS issue_ct,
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * COALESCE(profiling_records.record_ct, test_records.record_ct, 0) * (1 - COALESCE(profiling_records.score, 1) * COALESCE(test_records.score, 1)) / parent.all_records DESC)
row_number() OVER (PARTITION BY score_definitions.id ORDER BY 100 * (
COALESCE(profiling_records.data_point_ct * (1 - profiling_records.score) / NULLIF(parent.profiling_data_points, 0), 0)
+ COALESCE(test_records.data_point_ct * (1 - test_records.score) / NULLIF(parent.test_data_points, 0), 0)
) DESC)
FROM profiling_records
FULL OUTER JOIN test_records
ON (
Expand Down
3 changes: 2 additions & 1 deletion testgen/ui/views/score_details.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from testgen.ui.navigation.page import Page
from testgen.ui.navigation.router import Router
from testgen.ui.pdf import hygiene_issue_report, test_result_report
from testgen.ui.queries.scoring_queries import get_score_card_issue_reports
from testgen.ui.queries.scoring_queries import get_all_score_cards, get_score_card_issue_reports
from testgen.ui.services import authentication_service
from testgen.ui.session import session, temp_value
from testgen.ui.views.dialogs.profiling_results_dialog import profiling_results_dialog
Expand Down Expand Up @@ -154,4 +154,5 @@ def delete_score_card(definition_id: str) -> None:

if delete_clicked():
score_definition.delete()
get_all_score_cards.clear()
Router().navigate("quality-dashboard")
Loading

0 comments on commit bab63a1

Please sign in to comment.