diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 8e05fabd..e97ef1cc 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -1,6 +1,6 @@ # This configuration was generated by # `rubocop --auto-gen-config` -# on 2022-05-20 13:06:05 UTC using RuboCop version 1.26.1. +# on 2022-06-16 10:08:54 UTC using RuboCop version 1.26.1. # The point is for the user to remove these configuration records # one by one as the offenses are removed from the code base. # Note that changes in the inspected code, or installation of new @@ -14,7 +14,7 @@ Naming/FileName: Exclude: - 'spec/dummy/config/initializers/dfe-analytics.rb' -# Offense count: 12 +# Offense count: 13 # Configuration parameters: AllowedConstants. Style/Documentation: Exclude: @@ -23,6 +23,7 @@ Style/Documentation: - 'lib/dfe/analytics.rb' - 'lib/dfe/analytics/entities.rb' - 'lib/dfe/analytics/event.rb' + - 'lib/dfe/analytics/event_schema.rb' - 'lib/dfe/analytics/load_entities.rb' - 'lib/dfe/analytics/requests.rb' - 'lib/dfe/analytics/send_events.rb' diff --git a/config/event-schema.json b/config/event-schema.json index e5707986..065865f1 100644 --- a/config/event-schema.json +++ b/config/event-schema.json @@ -36,10 +36,7 @@ } }, "request_referer": { - "anyOf": [ - {"type": "string"}, - {"type": "null"} - ] + "type": "string" }, "anonymised_user_agent_and_ip": { "type": "string" @@ -48,13 +45,13 @@ "type": "string" }, "response_status": { - "type": "integer" + "type": "string" }, "namespace": { "type": "string" }, "user_id": { - "type": "integer" + "type": "string" }, "data": { "type": "array", diff --git a/lib/dfe/analytics.rb b/lib/dfe/analytics.rb index 518105cb..08415b0b 100644 --- a/lib/dfe/analytics.rb +++ b/lib/dfe/analytics.rb @@ -2,6 +2,7 @@ require 'request_store_rails' require 'i18n' +require 'dfe/analytics/event_schema' require 'dfe/analytics/fields' require 'dfe/analytics/entities' require 'dfe/analytics/event' diff --git a/lib/dfe/analytics/event_schema.rb b/lib/dfe/analytics/event_schema.rb new file mode 100644 index 00000000..2038d718 --- /dev/null +++ b/lib/dfe/analytics/event_schema.rb @@ -0,0 +1,73 @@ +module DfE + module Analytics + class EventSchema + def self.as_json + path = "#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json" + File.read(path) + end + + def self.as_bigquery_schema + schema = JSON.parse(as_json) + required_fields = schema['required'] + + properties = schema['properties'] + + schema = properties.keys.reduce([]) do |bq_schema, json_schema_entry_name| + json_schema_entry = properties[json_schema_entry_name] + bigquery_field_type = resolve_bigquery_type(json_schema_entry) + + bigquery_schema_entry = { + 'mode' => resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields), + 'name' => json_schema_entry_name, + 'type' => bigquery_field_type + } + + if bigquery_field_type == 'RECORD' + bigquery_schema_entry['fields'] = [ + { + 'mode' => 'REQUIRED', + 'name' => 'key', + 'type' => 'STRING' + }, + { + 'mode' => 'REPEATED', + 'name' => 'value', + 'type' => 'STRING' + } + ] + end + + bq_schema << bigquery_schema_entry + bq_schema + end + + schema.to_json + end + + def self.resolve_bigquery_mode(json_schema_entry_name, json_schema_entry, required_fields) + if required_fields.include?(json_schema_entry_name) + 'REQUIRED' + elsif json_schema_entry['type'] == 'array' + 'REPEATED' + else + 'NULLABLE' + end + end + + def self.resolve_bigquery_type(json_schema_entry) + json_type = json_schema_entry['type'] + json_format = json_schema_entry['format'] + + if json_type == 'array' + 'RECORD' + elsif json_type == 'string' && json_format == 'date-time' + 'TIMESTAMP' + elsif json_type == 'string' + 'STRING' + elsif json_type == 'integer' + 'INTEGER' + end + end + end + end +end diff --git a/lib/dfe/analytics/tasks/schema.rake b/lib/dfe/analytics/tasks/schema.rake new file mode 100644 index 00000000..19e289ac --- /dev/null +++ b/lib/dfe/analytics/tasks/schema.rake @@ -0,0 +1,13 @@ +namespace :dfe do + namespace :analytics do + desc 'Print out the dfe-analytics JSON schema' + task :schema do + puts DfE::Analytics::EventSchema.as_json + end + + desc 'Print out the dfe-analytics BigQuery schema' + task :big_query_schema do + puts DfE::Analytics::EventSchema.as_bigquery_schema + end + end +end diff --git a/spec/dfe/analytics/entities_spec.rb b/spec/dfe/analytics/entities_spec.rb index c63ed900..f4fec11a 100644 --- a/spec/dfe/analytics/entities_spec.rb +++ b/spec/dfe/analytics/entities_spec.rb @@ -71,7 +71,7 @@ def self.name model.create expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload| - schema = File.read('config/event-schema.json') + schema = DfE::Analytics::EventSchema.new.as_json schema_validator = JSONSchemaValidator.new(schema, payload.first) expect(schema_validator).to be_valid, schema_validator.failure_message @@ -169,7 +169,7 @@ def self.name entity.update(email_address: 'bar@baz.com') expect(DfE::Analytics::SendEvents).to have_received(:perform_later).twice do |payload| - schema = File.read('config/event-schema.json') + schema = DfE::Analytics::EventSchema.new.as_json schema_validator = JSONSchemaValidator.new(schema, payload.first) expect(schema_validator).to be_valid, schema_validator.failure_message diff --git a/spec/dfe/analytics/event_schema_spec.rb b/spec/dfe/analytics/event_schema_spec.rb new file mode 100644 index 00000000..e17225ce --- /dev/null +++ b/spec/dfe/analytics/event_schema_spec.rb @@ -0,0 +1,23 @@ +RSpec.describe DfE::Analytics::EventSchema do + describe '.as_json' do + it 'returns the JSON schema as an object' do + schema_on_disk = File.read("#{Gem.loaded_specs['dfe-analytics'].gem_dir}/config/event-schema.json") + + output = described_class.as_json + + expect(output).to be_present + expect(output).to eq schema_on_disk + end + end + + describe '.as_bigquery_schema' do + it 'transforms the JSON schema into a BQ schema' do + bq_schema_on_disk = File.read('spec/examples/bigquery_schema.json') + + output = JSON.parse(described_class.as_bigquery_schema) + + expect(output).to be_present + expect(output).to match_array JSON.parse(bq_schema_on_disk) + end + end +end diff --git a/spec/dfe/analytics/load_entities_spec.rb b/spec/dfe/analytics/load_entities_spec.rb index bf314b9c..2cd85ec8 100644 --- a/spec/dfe/analytics/load_entities_spec.rb +++ b/spec/dfe/analytics/load_entities_spec.rb @@ -23,7 +23,7 @@ described_class.new(model_name: 'Candidate', sleep_time: 0).run expect(DfE::Analytics::SendEvents).to have_received(:perform_later) do |payload| - schema = File.read('config/event-schema.json') + schema = DfE::Analytics::EventSchema.new.as_json schema_validator = JSONSchemaValidator.new(schema, payload.first) expect(schema_validator).to be_valid, schema_validator.failure_message diff --git a/spec/examples/bigquery_schema.json b/spec/examples/bigquery_schema.json new file mode 100644 index 00000000..4a52fb83 --- /dev/null +++ b/spec/examples/bigquery_schema.json @@ -0,0 +1,106 @@ +[ + { + "mode": "REQUIRED", + "name": "occurred_at", + "type": "TIMESTAMP" + }, + { + "mode": "REQUIRED", + "name": "event_type", + "type": "STRING" + }, + { + "mode": "REQUIRED", + "name": "environment", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "namespace", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "user_id", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "request_uuid", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "request_method", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "request_path", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "request_user_agent", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "request_referer", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "request_query", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "response_content_type", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "response_status", + "type": "STRING" + }, + { + "fields": [ + { + "mode": "REQUIRED", + "name": "key", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "value", + "type": "STRING" + } + ], + "mode": "REPEATED", + "name": "data", + "type": "RECORD" + }, + { + "mode": "NULLABLE", + "name": "entity_table_name", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "anonymised_user_agent_and_ip", + "type": "STRING" + } +] diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 51082d4a..a751c934 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -36,4 +36,8 @@ def name; end end include DfE::Analytics::Testing::Helpers + + config.expect_with :rspec do |c| + c.max_formatted_output_length = nil + end end