From 9d57584e6cc8b79ca575df7a66e54dec47249c96 Mon Sep 17 00:00:00 2001 From: stevenleggdfe <51697598+stevenleggdfe@users.noreply.github.com> Date: Mon, 17 Feb 2020 12:26:46 +0000 Subject: [PATCH 1/5] Repoint vacancy-feedback-metrics-by-month.sql Now points to vacancy instead of vacancies Excludes the unused month = null row the previous query was returning --- bigquery/vacancy-feedback-metrics-by-month.sql | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bigquery/vacancy-feedback-metrics-by-month.sql b/bigquery/vacancy-feedback-metrics-by-month.sql index 035074c5fb..f17303cc46 100644 --- a/bigquery/vacancy-feedback-metrics-by-month.sql +++ b/bigquery/vacancy-feedback-metrics-by-month.sql @@ -12,7 +12,8 @@ SELECT feedback_available) AS exclusive_hires_rate_upperbound FROM ( SELECT - CAST(TIMESTAMP_TRUNC(publish_on,MONTH) AS DATE) AS month, + DATE_TRUNC(PARSE_DATE("%e %B %E4Y", + publish_on),MONTH) AS month, COUNT(*) AS vacancies_published, COUNTIF(hired_status IS NOT NULL AND listed_elsewhere IS NOT NULL) AS feedback_available, @@ -28,12 +29,10 @@ FROM ( "listed_free", "listed_dont_know")) AS exclusive_hires_upperbound FROM - `teacher-vacancy-service.production_dataset.vacancies` - WHERE - status NOT IN ("trashed", - "deleted", - "draft") + `teacher-vacancy-service.production_dataset.vacancy` GROUP BY month) +WHERE + month IS NOT NULL ORDER BY month ASC From 30d324ec3188ab69739dcb6c264de44f5fd79b14 Mon Sep 17 00:00:00 2001 From: stevenleggdfe <51697598+stevenleggdfe@users.noreply.github.com> Date: Mon, 17 Feb 2020 13:58:40 +0000 Subject: [PATCH 2/5] Filter out vacancies which haven't been published --- bigquery/vacancy-feedback-metrics-by-month.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bigquery/vacancy-feedback-metrics-by-month.sql b/bigquery/vacancy-feedback-metrics-by-month.sql index f17303cc46..eb246f2461 100644 --- a/bigquery/vacancy-feedback-metrics-by-month.sql +++ b/bigquery/vacancy-feedback-metrics-by-month.sql @@ -30,6 +30,10 @@ FROM ( "listed_dont_know")) AS exclusive_hires_upperbound FROM `teacher-vacancy-service.production_dataset.vacancy` + WHERE + status NOT IN ("trashed", + "deleted", + "draft") GROUP BY month) WHERE From 116c884ecf958280c415ec5ec07bcb34a7eba6b5 Mon Sep 17 00:00:00 2001 From: stevenleggdfe <51697598+stevenleggdfe@users.noreply.github.com> Date: Mon, 17 Feb 2020 14:11:17 +0000 Subject: [PATCH 3/5] Repoint schools_joined_with_metrics.sql From vacancies to vacancy --- bigquery/schools_joined_with_metrics.sql | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/bigquery/schools_joined_with_metrics.sql b/bigquery/schools_joined_with_metrics.sql index 3ec639e3a3..aee6109414 100644 --- a/bigquery/schools_joined_with_metrics.sql +++ b/bigquery/schools_joined_with_metrics.sql @@ -14,17 +14,19 @@ WITH school.urn AS urn, COUNT(*) AS vacancies_published, #the total number of vacancies this school published over all time - COUNTIF(CAST(publish_on AS DATE) > DATE_SUB(CURRENT_DATE(), INTERVAL 1 YEAR)) AS vacancies_published_in_the_last_year, - COUNTIF(CAST(publish_on AS DATE) > DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH)) AS vacancies_published_in_the_last_quarter, + COUNTIF(PARSE_DATE("%e %B %E4Y",publish_on) > DATE_SUB(CURRENT_DATE(), INTERVAL 1 YEAR)) AS vacancies_published_in_the_last_year, + COUNTIF(PARSE_DATE("%e %B %E4Y",publish_on) > DATE_SUB(CURRENT_DATE(), INTERVAL 3 MONTH)) AS vacancies_published_in_the_last_quarter, COUNTIF(status="published" - AND CAST(expiry_time AS DATE) > CURRENT_DATE()) AS vacancies_currently_live #count this as vacancies which have been published and have not yet expired + AND PARSE_DATE("%e %B %E4Y",expires_on) > CURRENT_DATE()) AS vacancies_currently_live #count this as vacancies which have been published and have not yet expired FROM - `teacher-vacancy-service.production_dataset.vacancies` AS vacancies + `teacher-vacancy-service.production_dataset.vacancy` AS vacancy + INNER JOIN `teacher-vacancy-service.production_dataset.school` AS school + ON vacancy.school_id=school.id WHERE status != "trashed" #exclude deleted vacancies from the counts above AND status != "draft" #exclude vacancies which have not (yet) been published from the counts above GROUP BY - vacancies.school.urn ), + school.urn), mat_metrics AS ( #make a table of academy trusts (MATs and SATs) with current values of trust related metrics for inclusion in main query later SELECT GIAS.Trusts__name_ AS trust_name, @@ -134,7 +136,7 @@ ON LEFT JOIN school_vacancy_metrics ON - school_vacancy_metrics.urn=CAST(school.urn AS STRING) + school_vacancy_metrics.urn=school.urn LEFT JOIN `teacher-vacancy-service.production_dataset.STATIC_GIAS_manual_download` AS GIAS ON From 6cbaf41478490ce6d272e40ec6d39ee2fc1de9a7 Mon Sep 17 00:00:00 2001 From: stevenleggdfe <51697598+stevenleggdfe@users.noreply.github.com> Date: Mon, 17 Feb 2020 14:40:29 +0000 Subject: [PATCH 4/5] Repoint ga-append-latest-expired-vacancies-to-CALCULATED-vacancy-GA-event-counts.sql --- ...-to-CALCULATED-vacancy-GA-event-counts.sql | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/bigquery/ga-append-latest-expired-vacancies-to-CALCULATED-vacancy-GA-event-counts.sql b/bigquery/ga-append-latest-expired-vacancies-to-CALCULATED-vacancy-GA-event-counts.sql index 4ad4fa839f..e960e46308 100644 --- a/bigquery/ga-append-latest-expired-vacancies-to-CALCULATED-vacancy-GA-event-counts.sql +++ b/bigquery/ga-append-latest-expired-vacancies-to-CALCULATED-vacancy-GA-event-counts.sql @@ -1,7 +1,7 @@ SELECT - vacancies.id, #unique vacancy ID from the Teaching Vacancies database - vacancies.slug, #human readable dash-separated string that is probably also a unique vacancy ID (but can't trust this) - vacancies.expiry_time, + vacancy.id, #unique vacancy ID from the Teaching Vacancies database + vacancy.slug, #human readable dash-separated string that is probably also a unique vacancy ID (but can't trust this) + PARSE_TIMESTAMP("%e %B %E4Y %R",vacancy.expiry_time) AS expiry_time, SUM( #series of SUMIF statements that turn various Google Analytics event configurations into counts of the total number of events that occurred on this vacancy's page IF (events.event_Action="vacancy_visited", @@ -50,7 +50,7 @@ SELECT events.Unique_Events, 0)) AS twitter_shares, FROM - `teacher-vacancy-service.production_dataset.vacancies` AS vacancies + `teacher-vacancy-service.production_dataset.vacancy` AS vacancy LEFT JOIN ( SELECT SPLIT(SPLIT(Page_path_level_2,"/")[ #Convert the URL part from the Page_path_level_2 which comes in the form /slug into just the slug, which can be joined onto the slug field from the vacancies table in the database @@ -64,13 +64,13 @@ LEFT JOIN ( FROM `teacher-vacancy-service.production_dataset.GA_events_on_vacancies_page`) AS events ON - vacancies.slug=events.slug #matches the vacancy slug from our database with the vacancy slug from the part of the page URL recorded in Google Analytics - this is the critical part of this query -WHERE vacancies.expiry_time < CURRENT_TIMESTAMP #only obtain vacancies which have expired -AND vacancies.expiry_time > (SELECT MAX(expiry_time) FROM `teacher-vacancy-service.production_dataset.CALCULATED_vacancy_GA_event_counts`) #only select vacancies that expired since we last ran this query + vacancy.slug=events.slug #matches the vacancy slug from our database with the vacancy slug from the part of the page URL recorded in Google Analytics - this is the critical part of this query +WHERE PARSE_TIMESTAMP("%e %B %E4Y %R",vacancy.expiry_time) < CURRENT_TIMESTAMP #only obtain vacancies which have expired +AND PARSE_TIMESTAMP("%e %B %E4Y %R",vacancy.expiry_time) > (SELECT MAX(expiry_time) FROM `teacher-vacancy-service.production_dataset.CALCULATED_vacancy_GA_event_counts`) #only select vacancies that expired since we last ran this query AND status NOT IN ("trashed","draft") GROUP BY - vacancies.id, - vacancies.slug, - vacancies.expiry_time + vacancy.id, + vacancy.slug, + vacancy.expiry_time ORDER BY - vacancies.expiry_time DESC + vacancy.expiry_time DESC From 8db6d659cf47081adbeeb4f284280158edc15777 Mon Sep 17 00:00:00 2001 From: stevenleggdfe <51697598+stevenleggdfe@users.noreply.github.com> Date: Mon, 17 Feb 2020 14:47:35 +0000 Subject: [PATCH 5/5] Repoint vacancies-published.sql --- bigquery/vacancies-published.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigquery/vacancies-published.sql b/bigquery/vacancies-published.sql index 9b00963f09..5b51dbf962 100644 --- a/bigquery/vacancies-published.sql +++ b/bigquery/vacancies-published.sql @@ -15,16 +15,16 @@ WITH DATE_ADD(year,INTERVAL 8 MONTH)) AS academic_year #converts the month into the corresponding academic year, storing this as the 1st September at the beginning of that academic year (the precise format doesn't matter; we just need a consistent way to represent the academic year so that the PARTITION BY above works) FROM ( SELECT - CAST(TIMESTAMP_TRUNC(publish_on,MONTH) AS DATE) AS month, #use the first day of the month containing publish_on to represent the month (standard in data studio) - CAST(TIMESTAMP_TRUNC(publish_on,YEAR) AS DATE) AS year #use the first day of the year containing publish_on to represent the year (standard in data studio) + DATE_TRUNC(PARSE_DATE("%e %B %E4Y",publish_on),MONTH) AS month, #use the first day of the month containing publish_on to represent the month (standard in data studio) + DATE_TRUNC(PARSE_DATE("%e %B %E4Y",publish_on),YEAR) AS year #use the first day of the year containing publish_on to represent the year (standard in data studio) FROM - `teacher-vacancy-service.production_dataset.vacancies` + `teacher-vacancy-service.production_dataset.vacancy` WHERE status NOT IN ("trashed", "deleted", "draft") #excludes vacancies which were never published, or which were published and then subsequently deleted AND publish_on IS NOT NULL #also excludes vacancies which were never published (to be safe) - AND publish_on < CURRENT_TIMESTAMP() ) #excludes vacancies which have been published but are not yet visible on the site because their publication date is in the future + AND PARSE_DATE("%e %B %E4Y",publish_on) <= CURRENT_DATE() ) #excludes vacancies which have been published but are not yet visible on the site because their publication date is in the future GROUP BY month, year