Skip to content

Commit

Permalink
feat: Use LowCardinality for smaller columns
Browse files Browse the repository at this point in the history
Generally for columns with less than 10k unique values this provides a noticable performance benefit.
  • Loading branch information
bmtcril committed May 28, 2024
1 parent b043d49 commit c6ee178
Show file tree
Hide file tree
Showing 9 changed files with 18 additions and 17 deletions.
11 changes: 6 additions & 5 deletions models/base/xapi_events_all_parsed.sql
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

select
event_id as event_id,
JSON_VALUE(event::String, '$.verb.id') as verb_id,
toLowCardinality(JSON_VALUE(event::String, '$.verb.id')) as verb_id,
COALESCE(
NULLIF(JSON_VALUE(event::String, '$.actor.account.name'), ''),
NULLIF(JSON_VALUE(event::String, '$.actor.mbox'), ''),
Expand All @@ -23,7 +23,7 @@ select
-- If the contextActivities parent is a course, use that. It can be a "course"
-- type, or a "cmi.interaction" type for multiple question problem submissions.
-- Otherwise use the object id for the course id.
multiIf(
toLowCardinality(multiIf(
-- If the contextActivities parent is a course, use that
JSON_VALUE(
event::String, '$.context.contextActivities.parent[0].definition.type'
Expand All @@ -37,10 +37,11 @@ select
JSON_VALUE(event::String, '$.context.contextActivities.grouping[0].id'),
-- Otherwise use the object id
JSON_VALUE(event::String, '$.object.id')
) as course_id,
coalesce(
)) as course_id,
toLowCardinality(splitByString('/', course_id)[-1]) as course_key,
toLowCardinality(coalesce(
get_org_from_course_url(course_id), get_org_from_ccx_course_url(course_id), ''
) as org,
)) as org,
emission_time as emission_time,
event::String as event
from {{ source("xapi", "xapi_events_all") }}
2 changes: 1 addition & 1 deletion models/completion/completion_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
Expand Down
6 changes: 3 additions & 3 deletions models/enrollment/enrollment_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(
toLowCardinality(JSON_VALUE(
event,
'$.object.definition.extensions."https://w3id.org/xapi/acrossx/extensions/type"'
) as enrollment_mode
)) as enrollment_mode
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in (
Expand Down
2 changes: 1 addition & 1 deletion models/forum/forum_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
object_id,
actor_id,
verb_id
Expand Down
2 changes: 1 addition & 1 deletion models/grading/grading_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractFloat(event, 'result', 'score', 'scaled') as scaled_score
Expand Down
2 changes: 1 addition & 1 deletion models/navigation/navigation_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
splitByString('/xblock/', object_id)[-1] as block_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSONExtractString(event, 'object', 'definition', 'type') as object_type,
Expand Down
4 changes: 2 additions & 2 deletions models/problems/problem_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
cast(emission_time as DateTime) as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
JSON_VALUE(event, '$.result.response') as responses,
Expand All @@ -25,7 +25,7 @@ select
cast(JSON_VALUE(event, '$.result.success') as Bool),
false
) as success,
JSON_VALUE(event, '$.object.definition.interactionType') as interaction_type,
toLowCardinality(JSON_VALUE(event, '$.object.definition.interactionType')) as interaction_type,
if(
verb_id = 'https://w3id.org/xapi/acrossx/verbs/evaluated',
cast(
Expand Down
2 changes: 1 addition & 1 deletion models/video/video_playback_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ select
CAST(emission_time, 'DateTime') as emission_time,
actor_id,
object_id,
splitByString('/', course_id)[-1] as course_key,
course_key,
org,
verb_id,
ceil(
Expand Down
4 changes: 2 additions & 2 deletions models/video/video_transcript_events.sql
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ select
event_id,
CAST(emission_time, 'DateTime') as emission_time,
org,
splitByString('/', course_id)[-1] as course_key,
course_key,
splitByString('/xblock/', object_id)[2] as video_id,
actor_id,
JSONExtractBool(
Expand All @@ -25,7 +25,7 @@ select
) as cc_enabled
from {{ ref("xapi_events_all_parsed") }}
where
verb_id in ('http://adlnet.gov/expapi/verbs/interacted')
verb_id = 'http://adlnet.gov/expapi/verbs/interacted'
and JSONHas(
event,
'result',
Expand Down

0 comments on commit c6ee178

Please sign in to comment.