Skip to content

Commit

Permalink
Merge pull request #6 from DFE-Digital/schema-task
Browse files Browse the repository at this point in the history
Emit JSON schema and BigQuery schema
  • Loading branch information
duncanjbrown authored Jun 20, 2022
2 parents 0256c93 + 4c7c404 commit f10c3da
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 11 deletions.
5 changes: 3 additions & 2 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2022-05-20 13:06:05 UTC using RuboCop version 1.26.1.
# on 2022-06-16 10:08:54 UTC using RuboCop version 1.26.1.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
Expand All @@ -14,7 +14,7 @@ Naming/FileName:
Exclude:
- 'spec/dummy/config/initializers/dfe-analytics.rb'

# Offense count: 12
# Offense count: 13
# Configuration parameters: AllowedConstants.
Style/Documentation:
Exclude:
Expand All @@ -23,6 +23,7 @@ Style/Documentation:
- 'lib/dfe/analytics.rb'
- 'lib/dfe/analytics/entities.rb'
- 'lib/dfe/analytics/event.rb'
- 'lib/dfe/analytics/event_schema.rb'
- 'lib/dfe/analytics/load_entities.rb'
- 'lib/dfe/analytics/requests.rb'
- 'lib/dfe/analytics/send_events.rb'
Expand Down
9 changes: 3 additions & 6 deletions config/event-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@
}
},
"request_referer": {
"anyOf": [
{"type": "string"},
{"type": "null"}
]
"type": "string"
},
"anonymised_user_agent_and_ip": {
"type": "string"
Expand All @@ -48,13 +45,13 @@
"type": "string"
},
"response_status": {
"type": "integer"
"type": "string"
},
"namespace": {
"type": "string"
},
"user_id": {
"type": "integer"
"type": "string"
},
"data": {
"type": "array",
Expand Down
1 change: 1 addition & 0 deletions lib/dfe/analytics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require 'request_store_rails'
require 'i18n'
require 'dfe/analytics/event_schema'
require 'dfe/analytics/fields'
require 'dfe/analytics/entities'
require 'dfe/analytics/event'
Expand Down
73 changes: 73 additions & 0 deletions lib/dfe/analytics/event_schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
module DfE
module Analytics
class EventSchema
def self.as_json
path = "#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json"
File.read(path)
end

def self.as_bigquery_schema
schema = JSON.parse(as_json)
required_fields = schema['required']

properties = schema['properties']

schema = properties.keys.reduce([]) do |bq_schema, json_schema_entry_name|
json_schema_entry = properties[json_schema_entry_name]
bigquery_field_type = resolve_bigquery_type(json_schema_entry)

bigquery_schema_entry = {
'mode' => resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields),
'name' => json_schema_entry_name,
'type' => bigquery_field_type
}

if bigquery_field_type == 'RECORD'
bigquery_schema_entry['fields'] = [
{
'mode' => 'REQUIRED',
'name' => 'key',
'type' => 'STRING'
},
{
'mode' => 'REPEATED',
'name' => 'value',
'type' => 'STRING'
}
]
end

bq_schema << bigquery_schema_entry
bq_schema
end

schema.to_json
end

def self.resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields)
if required_fields.include?(json_schema_entry_name)
'REQUIRED'
elsif json_schema_entry['type'] == 'array'
'REPEATED'
else
'NULLABLE'
end
end

def self.resolve_bigquery_type(json_schema_entry)
json_type = json_schema_entry['type']
json_format = json_schema_entry['format']

if json_type == 'array'
'RECORD'
elsif json_type == 'string' && json_format == 'date-time'
'TIMESTAMP'
elsif json_type == 'string'
'STRING'
elsif json_type == 'integer'
'INTEGER'
end
end
end
end
end
13 changes: 13 additions & 0 deletions lib/dfe/analytics/tasks/schema.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace :dfe do
namespace :analytics do
desc 'Print out the dfe-analytics JSON schema'
task :schema do
puts DfE::Analytics::EventSchema.as_json
end

desc 'Print out the dfe-analytics BigQuery schema'
task :big_query_schema do
puts DfE::Analytics::EventSchema.as_bigquery_schema
end
end
end
4 changes: 2 additions & 2 deletions spec/dfe/analytics/entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def self.name
model.create

expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down Expand Up @@ -169,7 +169,7 @@ def self.name
entity.update(email_address: 'bar@baz.com')

expect(DfE::Analytics::SendEvents).to have_received(:perform_later).twice do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down
23 changes: 23 additions & 0 deletions spec/dfe/analytics/event_schema_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
RSpec.describe DfE::Analytics::EventSchema do
describe '.as_json' do
it 'returns the JSON schema as an object' do
schema_on_disk = File.read("#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json")

output = described_class.as_json

expect(output).to be_present
expect(output).to eq schema_on_disk
end
end

describe '.as_bigquery_schema' do
it 'transforms the JSON schema into a BQ schema' do
bq_schema_on_disk = File.read('spec/examples/bigquery_schema.json')

output = JSON.parse(described_class.as_bigquery_schema)

expect(output).to be_present
expect(output).to match_array JSON.parse(bq_schema_on_disk)
end
end
end
2 changes: 1 addition & 1 deletion spec/dfe/analytics/load_entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
described_class.new(model_name: 'Candidate', sleep_time: 0).run

expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down
106 changes: 106 additions & 0 deletions spec/examples/bigquery_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
[
{
"mode": "REQUIRED",
"name": "occurred_at",
"type": "TIMESTAMP"
},
{
"mode": "REQUIRED",
"name": "event_type",
"type": "STRING"
},
{
"mode": "REQUIRED",
"name": "environment",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "namespace",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "user_id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_uuid",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_method",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_path",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_user_agent",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_referer",
"type": "STRING"
},
{
"fields": [
{
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "request_query",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "response_content_type",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "response_status",
"type": "STRING"
},
{
"fields": [
{
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "data",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "entity_table_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "anonymised_user_agent_and_ip",
"type": "STRING"
}
]
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,8 @@ def name; end
end

include DfE::Analytics::Testing::Helpers

config.expect_with :rspec do |c|
c.max_formatted_output_length = nil
end
end

0 comments on commit f10c3da

Please sign in to comment.