Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Emit JSON schema and BigQuery schema #6

Merged
merged 5 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2022-05-20 13:06:05 UTC using RuboCop version 1.26.1.
# on 2022-06-16 10:08:54 UTC using RuboCop version 1.26.1.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
Expand All @@ -14,7 +14,7 @@ Naming/FileName:
Exclude:
- 'spec/dummy/config/initializers/dfe-analytics.rb'

# Offense count: 12
# Offense count: 13
# Configuration parameters: AllowedConstants.
Style/Documentation:
Exclude:
Expand All @@ -23,6 +23,7 @@ Style/Documentation:
- 'lib/dfe/analytics.rb'
- 'lib/dfe/analytics/entities.rb'
- 'lib/dfe/analytics/event.rb'
- 'lib/dfe/analytics/event_schema.rb'
- 'lib/dfe/analytics/load_entities.rb'
- 'lib/dfe/analytics/requests.rb'
- 'lib/dfe/analytics/send_events.rb'
Expand Down
9 changes: 3 additions & 6 deletions config/event-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,7 @@
}
},
"request_referer": {
"anyOf": [
{"type": "string"},
{"type": "null"}
]
"type": "string"
},
"anonymised_user_agent_and_ip": {
"type": "string"
Expand All @@ -48,13 +45,13 @@
"type": "string"
},
"response_status": {
"type": "integer"
"type": "string"
},
"namespace": {
"type": "string"
},
"user_id": {
"type": "integer"
"type": "string"
},
"data": {
"type": "array",
Expand Down
1 change: 1 addition & 0 deletions lib/dfe/analytics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

require 'request_store_rails'
require 'i18n'
require 'dfe/analytics/event_schema'
require 'dfe/analytics/fields'
require 'dfe/analytics/entities'
require 'dfe/analytics/event'
Expand Down
73 changes: 73 additions & 0 deletions lib/dfe/analytics/event_schema.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
module DfE
module Analytics
class EventSchema
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
class EventSchema
module EventSchema

Why is this a class and not a module?

def self.as_json
path = "#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json"
File.read(path)
end

def self.as_bigquery_schema
schema = JSON.parse(as_json)
required_fields = schema['required']

properties = schema['properties']

schema = properties.keys.reduce([]) do |bq_schema, json_schema_entry_name|
json_schema_entry = properties[json_schema_entry_name]
bigquery_field_type = resolve_bigquery_type(json_schema_entry)

bigquery_schema_entry = {
'mode' => resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields),
'name' => json_schema_entry_name,
'type' => bigquery_field_type
}

if bigquery_field_type == 'RECORD'
bigquery_schema_entry['fields'] = [
{
'mode' => 'REQUIRED',
'name' => 'key',
'type' => 'STRING'
},
{
'mode' => 'REPEATED',
'name' => 'value',
'type' => 'STRING'
}
]
end

bq_schema << bigquery_schema_entry
bq_schema
end

schema.to_json
end

def self.resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields)
if required_fields.include?(json_schema_entry_name)
'REQUIRED'
elsif json_schema_entry['type'] == 'array'
'REPEATED'
else
'NULLABLE'
end
end

def self.resolve_bigquery_type(json_schema_entry)
json_type = json_schema_entry['type']
json_format = json_schema_entry['format']

if json_type == 'array'
'RECORD'
elsif json_type == 'string' && json_format == 'date-time'
'TIMESTAMP'
elsif json_type == 'string'
'STRING'
elsif json_type == 'integer'
'INTEGER'
end
end
end
end
end
13 changes: 13 additions & 0 deletions lib/dfe/analytics/tasks/schema.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
namespace :dfe do
namespace :analytics do
desc 'Print out the dfe-analytics JSON schema'
task :schema do
puts DfE::Analytics::EventSchema.as_json
end

desc 'Print out the dfe-analytics BigQuery schema'
task :big_query_schema do
puts DfE::Analytics::EventSchema.as_bigquery_schema
end
end
end
4 changes: 2 additions & 2 deletions spec/dfe/analytics/entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def self.name
model.create

expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down Expand Up @@ -169,7 +169,7 @@ def self.name
entity.update(email_address: 'bar@baz.com')

expect(DfE::Analytics::SendEvents).to have_received(:perform_later).twice do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down
23 changes: 23 additions & 0 deletions spec/dfe/analytics/event_schema_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
RSpec.describe DfE::Analytics::EventSchema do
describe '.as_json' do
it 'returns the JSON schema as an object' do
schema_on_disk = File.read("#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json")

output = described_class.as_json

expect(output).to be_present
expect(output).to eq schema_on_disk
end
end

describe '.as_bigquery_schema' do
it 'transforms the JSON schema into a BQ schema' do
bq_schema_on_disk = File.read('spec/examples/bigquery_schema.json')

output = JSON.parse(described_class.as_bigquery_schema)

expect(output).to be_present
expect(output).to match_array JSON.parse(bq_schema_on_disk)
end
end
end
2 changes: 1 addition & 1 deletion spec/dfe/analytics/load_entities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
described_class.new(model_name: 'Candidate', sleep_time: 0).run

expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload|
schema = File.read('config/event-schema.json')
schema = DfE::Analytics::EventSchema.new.as_json
schema_validator = JSONSchemaValidator.new(schema, payload.first)

expect(schema_validator).to be_valid, schema_validator.failure_message
Expand Down
106 changes: 106 additions & 0 deletions spec/examples/bigquery_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
[
{
"mode": "REQUIRED",
"name": "occurred_at",
"type": "TIMESTAMP"
},
{
"mode": "REQUIRED",
"name": "event_type",
"type": "STRING"
},
{
"mode": "REQUIRED",
"name": "environment",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "namespace",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "user_id",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_uuid",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_method",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_path",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_user_agent",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "request_referer",
"type": "STRING"
},
{
"fields": [
{
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "request_query",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "response_content_type",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "response_status",
"type": "STRING"
},
{
"fields": [
{
"mode": "REQUIRED",
"name": "key",
"type": "STRING"
},
{
"mode": "REPEATED",
"name": "value",
"type": "STRING"
}
],
"mode": "REPEATED",
"name": "data",
"type": "RECORD"
},
{
"mode": "NULLABLE",
"name": "entity_table_name",
"type": "STRING"
},
{
"mode": "NULLABLE",
"name": "anonymised_user_agent_and_ip",
"type": "STRING"
}
]
4 changes: 4 additions & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,8 @@ def name; end
end

include DfE::Analytics::Testing::Helpers

config.expect_with :rspec do |c|
c.max_formatted_output_length = nil
end
end