diff --git a/Gemfile.lock b/Gemfile.lock index 821adcc9..6d7a75a8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -3,6 +3,7 @@ PATH specs: dfe-analytics (1.12.5) google-cloud-bigquery (~> 1.38) + httparty (~> 0.21) request_store_rails (~> 2) GEM @@ -171,6 +172,9 @@ GEM os (>= 0.9, < 2.0) signet (>= 0.16, < 2.a) hashdiff (1.0.1) + httparty (0.21.0) + mini_mime (>= 1.0.0) + multi_xml (>= 0.5.2) httpclient (2.8.3) i18n (1.14.1) concurrent-ruby (~> 1.0) @@ -203,6 +207,7 @@ GEM mini_portile2 (2.8.2) minitest (5.18.1) multi_json (1.15.0) + multi_xml (0.6.0) net-imap (0.3.6) date net-protocol diff --git a/config/locales/en.yml b/config/locales/en.yml index 7a21416a..a0285ac8 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -60,7 +60,7 @@ en: entity_table_checks_enabled: description: | Whether to run entity table checksum job. - default: false + default: false rack_page_cached: description: | A proc which will be called with the rack env, and which should @@ -72,4 +72,29 @@ en: Schedule a maintenance window during which no events are streamed to BigQuery in the format of '22-01-2024 19:30..22-01-2024 20:30' (UTC). default: ENV['BIGQUERY_MAINTENANCE_WINDOW'] - + azure_federated_auth: + description: | + Whether to use azure workload identity federation for authentication + instead of the BigQuery API JSON Key. Note that this also will also + use a new version of the BigQuery streaming APIs. + default: false + azure_client_id: + description: | + Client Id of the app in azure + default: ENV['AZURE_CLIENT_ID'] + azure_token_path: + description: | + Path of token file for used for getting token from azure ad + default: ENV['AZURE_FEDERATED_TOKEN_FILE'] + azure_scope: + description: | + Azure audience scope + default: api://AzureADTokenExchange/.default + gcp_scope: + description: | + Google cloud scope + default: https://www.googleapis.com/auth/cloud-platform + google_cloud_credentials: + description: | + Google generated cloud credentials file + default: ENV['GOOGLE_CLOUD_CREDENTIALS'] diff --git a/dfe-analytics.gemspec b/dfe-analytics.gemspec index 4b2f0e9b..b49a4908 100644 --- a/dfe-analytics.gemspec +++ b/dfe-analytics.gemspec @@ -24,6 +24,7 @@ Gem::Specification.new do |spec| end spec.add_dependency 'google-cloud-bigquery', '~> 1.38' + spec.add_dependency 'httparty', '~> 0.21' spec.add_dependency 'request_store_rails', '~> 2' spec.add_development_dependency 'appraisal' diff --git a/lib/dfe/analytics.rb b/lib/dfe/analytics.rb index 552928c1..ab6eb15e 100644 --- a/lib/dfe/analytics.rb +++ b/lib/dfe/analytics.rb @@ -2,6 +2,8 @@ require 'request_store_rails' require 'i18n' +require 'httparty' +require 'google/cloud/bigquery' require 'dfe/analytics/event_schema' require 'dfe/analytics/fields' require 'dfe/analytics/entities' @@ -23,34 +25,14 @@ require 'dfe/analytics/middleware/request_identity' require 'dfe/analytics/middleware/send_cached_page_request_event' require 'dfe/analytics/railtie' +require 'dfe/analytics/big_query_api' +require 'dfe/analytics/big_query_legacy_api' +require 'dfe/analytics/azure_federated_auth' module DfE module Analytics class ConfigurationError < StandardError; end - def self.events_client - @events_client ||= begin - require 'google/cloud/bigquery' - - missing_config = %i[ - bigquery_project_id - bigquery_table_name - bigquery_dataset - bigquery_api_json_key - ].select { |val| config.send(val).nil? } - - raise(ConfigurationError, "DfE::Analytics: missing required config values: #{missing_config.join(', ')}") if missing_config.any? - - Google::Cloud::Bigquery.new( - project: config.bigquery_project_id, - credentials: JSON.parse(config.bigquery_api_json_key), - retries: config.bigquery_retries, - timeout: config.bigquery_timeout - ).dataset(config.bigquery_dataset, skip_lookup: true) - .table(config.bigquery_table_name, skip_lookup: true) - end - end - def self.config configurables = %i[ log_only @@ -69,6 +51,12 @@ def self.config entity_table_checks_enabled rack_page_cached bigquery_maintenance_window + azure_federated_auth + azure_client_id + azure_token_path + azure_scope + gcp_scope + google_cloud_credentials ] @config ||= Struct.new(*configurables).new @@ -93,6 +81,15 @@ def self.configure config.entity_table_checks_enabled ||= false config.rack_page_cached ||= proc { |_rack_env| false } config.bigquery_maintenance_window ||= ENV.fetch('BIGQUERY_MAINTENANCE_WINDOW', nil) + config.azure_federated_auth ||= false + + return unless config.azure_federated_auth + + config.azure_client_id ||= ENV.fetch('AZURE_CLIENT_ID', nil) + config.azure_token_path ||= ENV.fetch('AZURE_FEDERATED_TOKEN_FILE', nil) + config.google_cloud_credentials ||= JSON.parse(ENV.fetch('GOOGLE_CLOUD_CREDENTIALS', '{}')).deep_symbolize_keys + config.azure_scope ||= DfE::Analytics::AzureFederatedAuth::DEFAULT_AZURE_SCOPE + config.gcp_scope ||= DfE::Analytics::AzureFederatedAuth::DEFAULT_GCP_SCOPE end def self.initialize! diff --git a/lib/dfe/analytics/azure_federated_auth.rb b/lib/dfe/analytics/azure_federated_auth.rb new file mode 100644 index 00000000..3af42014 --- /dev/null +++ b/lib/dfe/analytics/azure_federated_auth.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +require 'googleauth' + +module DfE + module Analytics + # Azure client for workload identity federation with GCP using OAuth + class AzureFederatedAuth + DEFAULT_AZURE_SCOPE = 'api://AzureADTokenExchange/.default' + DEFAULT_GCP_SCOPE = 'https://www.googleapis.com/auth/cloud-platform' + ACCESS_TOKEN_EXPIRE_TIME_LEEWAY = 10.seconds + + def self.gcp_client_credentials + return @gcp_client_credentials if @gcp_client_credentials && !@gcp_client_credentials.expired? + + azure_token = azure_access_token + + azure_google_exchange_token = azure_google_exchange_access_token(azure_token) + + google_token, expire_time = google_access_token(azure_google_exchange_token) + + expire_time_with_leeway = expire_time.to_datetime - ACCESS_TOKEN_EXPIRE_TIME_LEEWAY + + @gcp_client_credentials = Google::Auth::UserRefreshCredentials.new(access_token: google_token, expires_at: expire_time_with_leeway) + end + + def self.azure_access_token + aad_token_request_body = { + grant_type: 'client_credentials', + client_id: DfE::Analytics.config.azure_client_id, + scope: DfE::Analytics.config.azure_scope, + client_assertion_type: 'urn:ietf:params:oauth:client-assertion-type:jwt-bearer', + client_assertion: File.read(DfE::Analytics.config.azure_token_path) + } + + azure_token_response = + HTTParty.get(DfE::Analytics.config.google_cloud_credentials[:credential_source][:url], body: aad_token_request_body) + + unless azure_token_response.success? + error_message = "Error calling azure token API: status: #{azure_token_response.code} body: #{azure_token_response.body}" + + Rails.logger.error error_message + + raise Error, error_message + end + + azure_token_response.parsed_response['access_token'] + end + + def self.azure_google_exchange_access_token(azure_token) + request_body = { + grant_type: 'urn:ietf:params:oauth:grant-type:token-exchange', + audience: DfE::Analytics.config.google_cloud_credentials[:audience], + scope: DfE::Analytics.config.gcp_scope, + requested_token_type: 'urn:ietf:params:oauth:token-type:access_token', + subject_token: azure_token, + subject_token_type: DfE::Analytics.config.google_cloud_credentials[:subject_token_type] + } + + exchange_token_response = HTTParty.post(DfE::Analytics.config.google_cloud_credentials[:token_url], body: request_body) + + unless exchange_token_response.success? + error_message = "Error calling google exchange token API: status: #{exchange_token_response.code} body: #{exchange_token_response.body}" + + Rails.logger.error error_message + + raise Error, error_message + end + + exchange_token_response.parsed_response['access_token'] + end + + def self.google_access_token(azure_google_exchange_token) + google_token_response = HTTParty.post( + DfE::Analytics.config.google_cloud_credentials[:service_account_impersonation_url], + headers: { 'Authorization' => "Bearer #{azure_google_exchange_token}" }, + body: { scope: DfE::Analytics.config.gcp_scope } + ) + + unless google_token_response.success? + error_message = "Error calling google token API: status: #{google_token_response.code} body: #{google_token_response.body}" + + Rails.logger.error error_message + + raise Error, error_message + end + + parsed_response = google_token_response.parsed_response + + [parsed_response['accessToken'], parsed_response['expireTime']] + end + + class Error < StandardError; end + end + end +end diff --git a/lib/dfe/analytics/big_query_api.rb b/lib/dfe/analytics/big_query_api.rb new file mode 100644 index 00000000..3ffba318 --- /dev/null +++ b/lib/dfe/analytics/big_query_api.rb @@ -0,0 +1,71 @@ +# frozen_string_literal: true + +module DfE + module Analytics + # For use with for workload identity federeation + class BigQueryApi + def self.events_client + @events_client ||= begin + missing_config = %i[ + bigquery_project_id + bigquery_table_name + bigquery_dataset + azure_client_id + azure_token_path + azure_scope + gcp_scope + google_cloud_credentials + ].select { |val| DfE::Analytics.config.send(val).blank? } + + raise(ConfigurationError, "DfE::Analytics: missing required config values: #{missing_config.join(', ')}") if missing_config.any? + + Google::Apis::BigqueryV2::BigqueryService.new + end + + @events_client.authorization = DfE::Analytics::AzureFederatedAuth.gcp_client_credentials + @events_client + end + + def self.insert(events) + rows = events.map { |event| { json: event } } + data_request = Google::Apis::BigqueryV2::InsertAllTableDataRequest.new(rows: rows, skip_invalid_rows: true) + options = Google::Apis::RequestOptions.new(retries: DfE::Analytics.config.bigquery_retries) + + response = + events_client.insert_all_table_data( + DfE::Analytics.config.bigquery_project_id, + DfE::Analytics.config.bigquery_dataset, + DfE::Analytics.config.bigquery_table_name, + data_request, + options: options + ) + + return unless response.insert_errors.present? + + event_count = events.length + error_message = error_message_for(response) + + Rails.logger.error(error_message) + + events.each.with_index(1) do |event, index| + Rails.logger.info("DfE::Analytics possible error processing event (#{index}/#{event_count}): #{event.inspect}") + end + + raise SendEventsError, error_message + end + + def self.error_message_for(response) + message = + response + .insert_errors + .map { |insert_error| "index: #{insert_error.index} error: #{insert_error.errors.map(&:message).join(' ')} insert_error: #{insert_error}" } + .compact.join("\n") + + "DfE::Analytics BigQuery API insert error for #{response.insert_errors.length} event(s):\n#{message}" + end + + class ConfigurationError < StandardError; end + class SendEventsError < StandardError; end + end + end +end diff --git a/lib/dfe/analytics/big_query_legacy_api.rb b/lib/dfe/analytics/big_query_legacy_api.rb new file mode 100644 index 00000000..568e7eda --- /dev/null +++ b/lib/dfe/analytics/big_query_legacy_api.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +module DfE + module Analytics + # For use with legacy authentication with fixed api key + class BigQueryLegacyApi + def self.events_client + @events_client ||= begin + # Check for missing config items - otherwise may get obscure api errors + missing_config = %i[ + bigquery_project_id + bigquery_table_name + bigquery_dataset + bigquery_api_json_key + ].select { |val| DfE::Analytics.config.send(val).blank? } + + raise(ConfigurationError, "DfE::Analytics: missing required config values: #{missing_config.join(', ')}") if missing_config.any? + + Google::Cloud::Bigquery.new( + project: DfE::Analytics.config.bigquery_project_id, + credentials: JSON.parse(DfE::Analytics.config.bigquery_api_json_key), + retries: DfE::Analytics.config.bigquery_retries, + timeout: DfE::Analytics.config.bigquery_timeout + ).dataset(DfE::Analytics.config.bigquery_dataset, skip_lookup: true) + .table(DfE::Analytics.config.bigquery_table_name, skip_lookup: true) + end + end + + def self.insert(events) + response = events_client.insert(events, ignore_unknown: true) + + return if response.success? + + event_count = events.length + error_message = error_message_for(response) + + Rails.logger.error(error_message) + + events.each.with_index(1) do |event, index| + Rails.logger.info("DfE::Analytics possible error processing event (#{index}/#{event_count}): #{event.inspect}") + end + + raise SendEventsError, error_message + end + + def self.error_message_for(response) + message = + response + .error_rows + .map { |error_row| "index: #{response.index_for(error_row)} error: #{response.errors_for(error_row)} error_row: #{error_row}" } + .compact.join("\n") + + "DfE::Analytics BigQuery API insert error for #{response.error_rows.length} event(s): response error count: #{response.error_count}\n#{message}" + end + + class ConfigurationError < StandardError; end + class SendEventsError < StandardError; end + end + end +end diff --git a/lib/dfe/analytics/send_events.rb b/lib/dfe/analytics/send_events.rb index 41eec582..c7d56def 100644 --- a/lib/dfe/analytics/send_events.rb +++ b/lib/dfe/analytics/send_events.rb @@ -35,35 +35,9 @@ def perform(events) .each { |event| Rails.logger.info("DfE::Analytics processing: #{event.inspect}") } end - response = DfE::Analytics.events_client.insert(events, ignore_unknown: true) - - unless response.success? - event_count = events.length - error_message = error_message_for(response) - - Rails.logger.error(error_message) - - events.each.with_index(1) do |event, index| - Rails.logger.info("DfE::Analytics possible error processing event (#{index}/#{event_count}): #{event.inspect}") - end - - raise SendEventsError, error_message - end + DfE::Analytics.config.azure_federated_auth ? DfE::Analytics::BigQueryApi.insert(events) : DfE::Analytics::BigQueryLegacyApi.insert(events) end end - - def error_message_for(resp) - message = - resp - .error_rows - .map { |row| "index: #{resp.index_for(row)} error: #{resp.errors_for(row)} error_row: #{row}" } - .compact.join("\n") - - "DfE::Analytics BigQuery API insert error for #{resp.error_rows.length} event(s): response error count: #{resp.error_count}\n#{message}" - end - end - - class SendEventsError < StandardError end end end diff --git a/lib/dfe/analytics/testing.rb b/lib/dfe/analytics/testing.rb index c185634f..671e3703 100644 --- a/lib/dfe/analytics/testing.rb +++ b/lib/dfe/analytics/testing.rb @@ -42,7 +42,7 @@ def switch_test_mode(test_mode) end end - class StubClient + class LegacyStubClient Response = Struct.new(:success?) def insert(*) @@ -50,6 +50,24 @@ def insert(*) end end + module LegacyTestOverrides + def events_client + if DfE::Analytics::Testing.fake? + LegacyStubClient.new + else + super + end + end + end + + class StubClient + Response = Struct.new(:insert_errors) + + def insert(*) + Response.new([]) + end + end + module TestOverrides def events_client if DfE::Analytics::Testing.fake? @@ -63,6 +81,7 @@ def events_client # Default to fake mode DfE::Analytics::Testing.fake! - DfE::Analytics.singleton_class.send :prepend, TestOverrides + DfE::Analytics::BigQueryLegacyApi.singleton_class.send :prepend, LegacyTestOverrides + DfE::Analytics::BigQueryApi.singleton_class.send :prepend, TestOverrides end end diff --git a/lib/dfe/analytics/testing/helpers.rb b/lib/dfe/analytics/testing/helpers.rb index 72ce32f2..4a1739e4 100644 --- a/lib/dfe/analytics/testing/helpers.rb +++ b/lib/dfe/analytics/testing/helpers.rb @@ -4,9 +4,17 @@ module DfE module Analytics module Testing module Helpers - def stub_bigquery_auth! + def stub_analytics_event_submission + if DfE::Analytics.config.azure_federated_auth + nil + else + stub_analytics_legacy_event_submission + end + end + + def stub_bigquery_legacy_auth! # will noop if called more than once - @stub_bigquery_auth ||= begin + @stub_bigquery_legacy_auth ||= begin DfE::Analytics.configure do |config| fake_bigquery_key = { 'type' => 'service_account', 'project_id' => 'abc', @@ -30,15 +38,23 @@ def stub_bigquery_auth! end end - def stub_analytics_event_submission - stub_bigquery_auth! + def stub_analytics_legacy_event_submission + stub_bigquery_legacy_auth! stub_request(:post, /bigquery.googleapis.com/) .to_return(status: 200, body: '{}', headers: { 'Content-Type' => 'application/json' }) end def stub_analytics_event_submission_with_insert_errors - stub_bigquery_auth! + if DfE::Analytics.config.azure_federated_auth + nil + else + stub_analytics_legacy_event_submission_with_insert_errors + end + end + + def stub_analytics_legacy_event_submission_with_insert_errors + stub_bigquery_legacy_auth! body = { insertErrors: [ @@ -58,6 +74,224 @@ def stub_analytics_event_submission_with_insert_errors .to_return(status: 200, body: body.to_json, headers: { 'Content-Type' => 'application/json' }) end + def stub_azure_access_token_request + # will noop if called more than once + @stub_azure_access_token_request ||= DfE::Analytics.configure do |config| + config.azure_client_id = 'fake_az_client_id_1234' + config.azure_scope = 'fake_az_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + credential_source: { + url: 'https://login.microsoftonline.com/fake-az-token-id/oauth2/v2.0/token' + } + } + end + + request_body = + 'grant_type=client_credentials&client_id=fake_az_client_id_1234&scope=fake_az_scope&' \ + 'client_assertion_type=urn%3Aietf%3Aparams%3Aoauth%3Aclient-assertion-type%3Ajwt-bearer&' \ + 'client_assertion=fake_az_token' + + response_body = { + 'token_type' => 'Bearer', + 'expires_in' => 86_399, + 'ext_expires_in' => 86_399, + 'access_token' => 'fake_az_response_token' + }.to_json + + stub_request(:get, 'https://login.microsoftonline.com/fake-az-token-id/oauth2/v2.0/token') + .with( + body: request_body, + headers: { + 'Accept' => '*/*', + 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', + 'User-Agent' => 'Ruby' + } + ) + .to_return( + status: 200, + body: response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + + def stub_azure_access_token_request_with_auth_error + # will noop if called more than once + @stub_azure_access_token_request_with_auth_error ||= DfE::Analytics.configure do |config| + config.azure_client_id = 'fake_az_client_id_1234' + config.azure_scope = 'fake_az_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + credential_source: { + url: 'https://login.microsoftonline.com/fake-az-token-id/oauth2/v2.0/token' + } + } + end + + error_response_body = { + 'error' => 'unsupported_grant_type', + 'error_description' => 'AADSTS70003: The app requested an unsupported grant type ...', + 'error_codes' => [70_003], + 'timestamp' => '2024-03-18 19:55:40Z', + 'trace_id' => '0e58a943-a980-6d7e-89ba-c9740c572100', + 'correlation_id' => '84f1c2d2-5288-4879-a038-429c31193c9c' + }.to_json + + stub_request(:get, 'https://login.microsoftonline.com/fake-az-token-id/oauth2/v2.0/token') + .to_return( + status: 400, + body: error_response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + + def stub_azure_google_exchange_access_token_request + # will noop if called more than once + @stub_azure_google_exchange_access_token_request ||= DfE::Analytics.configure do |config| + config.gcp_scope = 'fake_gcp_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + audience: 'fake_gcp_aud', + subject_token_type: 'fake_sub_token_type', + token_url: 'https://sts.googleapis.com/v1/token' + } + end + + request_body = 'grant_type=urn%3Aietf%3Aparams%3Aoauth%3Agrant-type%3Atoken-exchange' \ + '&audience=fake_gcp_aud&scope=fake_gcp_scope&requested_token_type=urn%3' \ + 'Aietf%3Aparams%3Aoauth%3Atoken-type%3Aaccess_token&subject_token=' \ + 'fake_az_response_token&subject_token_type=fake_sub_token_type' + + response_body = { + token_type: 'Bearer', + expires_in: 3599, + issued_token_type: 'urn:ietf:params:oauth:token-type:access_token', + access_token: 'fake_az_gcp_exchange_token_response' + }.to_json + + stub_request(:post, 'https://sts.googleapis.com/v1/token') + .with( + body: request_body, + headers: { + 'Accept' => '*/*', + 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', + 'User-Agent' => 'Ruby' + } + ) + .to_return( + status: 200, + body: response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + + def stub_azure_google_exchange_access_token_request_with_auth_error + # will noop if called more than once + @stub_azure_google_exchange_access_token_request_with_auth_error ||= DfE::Analytics.configure do |config| + config.gcp_scope = 'fake_gcp_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + audience: 'fake_gcp_aud', + subject_token_type: 'fake_sub_token_type', + token_url: 'https://sts.googleapis.com/v1/token' + } + end + + error_response_body = { + error: 'invalid_grant', + error_description: 'Unable to parse the ID Token.' + }.to_json + + stub_request(:post, 'https://sts.googleapis.com/v1/token') + .to_return( + status: 400, + body: error_response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + + def stub_google_access_token_request + # will noop if called more than once + @stub_google_access_token_request ||= DfE::Analytics.configure do |config| + config.gcp_scope = 'fake_gcp_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + service_account_impersonation_url: 'https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/cip-gcp-spike@my_project.iam.gserviceaccount.com:generateAccessToken' + } + end + + request_body = 'scope=fake_gcp_scope' + + response_body = { + expireTime: '2024-03-09T14:38:02Z', + accessToken: 'fake_google_response_token' + }.to_json + + stub_request( + :post, + 'https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/cip-gcp-spike@my_project.iam.gserviceaccount.com:generateAccessToken' + ).with( + body: request_body, + headers: { + 'Accept' => '*/*', + 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3', + 'User-Agent' => 'Ruby' + } + ).to_return( + status: 200, + body: response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + + def stub_google_access_token_request_with_auth_error + # will noop if called more than once + @stub_google_access_token_request_with_auth_error ||= DfE::Analytics.configure do |config| + config.gcp_scope = 'fake_gcp_scope' + config.azure_token_path = 'fake_az_token_path' + config.google_cloud_credentials = { + service_account_impersonation_url: 'https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/cip-gcp-spike@my_project.iam.gserviceaccount.com:generateAccessToken' + } + end + + error_response_body = { + error: { + code: 401, + message: 'Request had invalid authentication credentials. Expected OAuth 2 access token, login cookie or other valid authentication credential. See https://developers.google.com/identity/sign-in/web/devconsole-project.', + status: 'UNAUTHENTICATED', + details: [{ + '@type': 'type.googleapis.com/google.rpc.ErrorInfo', + reason: 'ACCESS_TOKEN_TYPE_UNSUPPORTED', + metadata: { + service: 'iamcredentials.googleapis.com', + method: 'google.iam.credentials.v1.IAMCredentials.GenerateAccessToken' + } + }] + } + }.to_json + + stub_request( + :post, + 'https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/cip-gcp-spike@my_project.iam.gserviceaccount.com:generateAccessToken' + ).to_return( + status: 401, + body: error_response_body, + headers: { + 'content-type' => ['application/json; charset=utf-8'] + } + ) + end + def with_analytics_config(options) old_config = DfE::Analytics.config.dup DfE::Analytics.configure do |config| @@ -68,6 +302,13 @@ def with_analytics_config(options) ensure DfE::Analytics.instance_variable_set(:@config, old_config) end + + def test_dummy_config + config = DfE::Analytics.config.members.each_with_object({}) { |key, mem| mem[key] = 'dummy_value' } + config[:google_cloud_credentials] = '{ "dummy_value": 1 }' + config[:bigquery_api_json_key] = '{ "dummy_value": 1 }' + config + end end end end diff --git a/spec/dfe/analytics/azure_federated_auth_spec.rb b/spec/dfe/analytics/azure_federated_auth_spec.rb new file mode 100644 index 00000000..17d31062 --- /dev/null +++ b/spec/dfe/analytics/azure_federated_auth_spec.rb @@ -0,0 +1,167 @@ +# frozen_string_literal: true + +RSpec.describe DfE::Analytics::AzureFederatedAuth do + before(:each) do + allow(DfE::Analytics.config).to receive(:azure_federated_auth).and_return(true) + + DfE::Analytics::Testing.webmock! + end + + let(:azure_access_token) { 'fake_az_response_token' } + let(:azure_google_exchange_access_token) { 'fake_az_gcp_exchange_token_response' } + let(:google_access_token) { 'fake_google_response_token' } + let(:google_access_token_expire_time) { '2024-03-09T14:38:02Z' } + + describe '#azure_access_token' do + before do + allow(File).to receive(:read).with('fake_az_token_path').and_return('fake_az_token') + end + + context 'when azure access token endpoint returns OK response' do + it 'returns the access token' do + stub_azure_access_token_request + + expect(described_class.azure_access_token).to eq(azure_access_token) + end + end + + context 'when azure access token endpoint returns an error response' do + it 'raises the expected error' do + stub_azure_access_token_request_with_auth_error + + expected_err_msg = /Error calling azure token API: status: 400/ + + expect(Rails.logger).to receive(:error).with(expected_err_msg) + + expect { described_class.azure_access_token } + .to raise_error(DfE::Analytics::AzureFederatedAuth::Error, expected_err_msg) + end + end + end + + describe '#azure_google_exchange_access_token' do + context 'when google exchange access token endpoint returns OK response' do + it 'returns the access token' do + stub_azure_google_exchange_access_token_request + + expect(described_class.azure_google_exchange_access_token(azure_access_token)) + .to eq(azure_google_exchange_access_token) + end + end + + context 'when google exchange access token endpoint returns an error response' do + it 'raises the expected error' do + stub_azure_google_exchange_access_token_request_with_auth_error + + expected_err_msg = /Error calling google exchange token API: status: 400/ + + expect(Rails.logger).to receive(:error).with(expected_err_msg) + + expect { described_class.azure_google_exchange_access_token(azure_access_token) } + .to raise_error(DfE::Analytics::AzureFederatedAuth::Error, expected_err_msg) + end + end + end + + describe '#google_access_token' do + context 'when google access token endpoint returns OK response' do + it 'returns the access token' do + stub_google_access_token_request + + expect(described_class.google_access_token(azure_google_exchange_access_token)) + .to eq([google_access_token, google_access_token_expire_time]) + end + end + + context 'when google access token endpoint returns an error response' do + it 'raises the expected error' do + stub_google_access_token_request_with_auth_error + + expected_err_msg = /Error calling google token API: status: 401/ + + expect(Rails.logger).to receive(:error).with(expected_err_msg) + + expect { described_class.google_access_token(azure_google_exchange_access_token) } + .to raise_error(DfE::Analytics::AzureFederatedAuth::Error, expected_err_msg) + end + end + end + + describe '#gcp_client_credentials' do + let(:future_expire_time) { Time.now + 1.hour } + + before do + allow(described_class).to receive(:azure_access_token).and_return(azure_access_token) + + allow(described_class) + .to receive(:azure_google_exchange_access_token) + .with(azure_access_token).and_return(azure_google_exchange_access_token) + + allow(described_class) + .to receive(:google_access_token) + .with(azure_google_exchange_access_token).and_return([google_access_token, future_expire_time]) + end + + it 'returns the expected client credentials' do + expect(described_class.gcp_client_credentials).to be_an_instance_of(Google::Auth::UserRefreshCredentials) + expect(described_class.gcp_client_credentials.access_token).to eq(google_access_token) + expect(described_class.gcp_client_credentials.expires_at) + .to be_within(DfE::Analytics::AzureFederatedAuth::ACCESS_TOKEN_EXPIRE_TIME_LEEWAY).of(future_expire_time) + end + + context 'token expiry' do + context 'when expire time is in the future' do + it 'calls token APIs once only on mutiple calls to get access token' do + expect(described_class) + .to receive(:azure_access_token) + .and_return(azure_access_token) + .once + + expect(described_class) + .to receive(:azure_google_exchange_access_token) + .with(azure_access_token) + .and_return(azure_google_exchange_access_token) + .once + + expect(described_class) + .to receive(:google_access_token) + .with(azure_google_exchange_access_token) + .and_return([google_access_token, future_expire_time]) + .once + + 5.times do + expect(described_class.gcp_client_credentials.access_token).to eq(google_access_token) + end + end + end + + context 'when the token expires on every call' do + it 'calls token APIs everytime there is a call to get access token' do + expect(described_class) + .to receive(:azure_access_token) + .and_return(azure_access_token) + .exactly(5) + .times + + expect(described_class) + .to receive(:azure_google_exchange_access_token) + .with(azure_access_token) + .and_return(azure_google_exchange_access_token) + .exactly(5) + .times + + expect(described_class) + .to receive(:google_access_token) + .with(azure_google_exchange_access_token) + .and_return([google_access_token, Time.now]) + .exactly(5) + .times + + 5.times do + expect(described_class.gcp_client_credentials.access_token).to eq(google_access_token) + end + end + end + end + end +end diff --git a/spec/dfe/analytics/big_query_api_spec.rb b/spec/dfe/analytics/big_query_api_spec.rb new file mode 100644 index 00000000..3b8aba0b --- /dev/null +++ b/spec/dfe/analytics/big_query_api_spec.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +RSpec.describe DfE::Analytics::BigQueryApi do + let(:event) do + { + environment: 'test', + request_method: 'GET', + request_path: '/provider/applications', + namespace: 'provider_interface', + user_id: 3456 + } + end + + let(:events_client) { double(:events_client) } + let(:authorization) { double(:authorization) } + + before(:each) do + allow(DfE::Analytics.config).to receive(:azure_federated_auth).and_return(true) + + allow(Google::Apis::BigqueryV2::BigqueryService).to receive(:new).and_return(events_client) + allow(DfE::Analytics::AzureFederatedAuth).to receive(:gcp_client_credentials).and_return(authorization) + allow(events_client).to receive(:authorization=).and_return(authorization) + + DfE::Analytics::Testing.webmock! + end + + describe '#events_client' do + it 'raises a configuration error on missing config values' do + with_analytics_config(bigquery_project_id: nil) do + expect { described_class.events_client }.to raise_error(DfE::Analytics::BigQueryApi::ConfigurationError) + end + end + + context 'when authorization endpoint returns OK response' do + it 'calls the expected big query apis' do + with_analytics_config(test_dummy_config) do + expect(described_class.events_client).to eq(events_client) + end + end + end + end + + describe '#insert' do + subject(:insert) do + with_analytics_config(test_dummy_config) do + described_class.insert([event.as_json]) + end + end + + context 'when the request is successful' do + let(:response) { double(:response, insert_errors: []) } + + it 'does not log the request when event_debug disabled' do + allow(events_client).to receive(:insert_all_table_data).and_return(response) + expect(Rails.logger).not_to receive(:info) + + insert + end + + it 'calls the expected big query apis' do + expect(events_client).to receive(:insert_all_table_data) + .with( + test_dummy_config[:bigquery_project_id], + test_dummy_config[:bigquery_dataset], + test_dummy_config[:bigquery_table_name], + an_instance_of(Google::Apis::BigqueryV2::InsertAllTableDataRequest), + options: an_instance_of(Google::Apis::RequestOptions) + ) + .and_return(response) + + insert + end + end + + context 'when the request is not successful' do + let(:response) { double(:response, insert_errors: [insert_error]) } + let(:insert_error) { double(:insert_error, index: 0, errors: [error]) } + let(:error) { double(:error, message: 'An error.') } + + before { expect(events_client).to receive(:insert_all_table_data).and_return(response) } + + it 'raises an exception' do + expect { insert }.to raise_error(DfE::Analytics::BigQueryApi::SendEventsError, /An error./) + end + + it 'contains the insert errors' do + insert + rescue DfE::Analytics::BigQueryApi::SendEventsError => e + expect(e.message).to_not be_empty + end + + it 'logs the error message' do + expect(Rails.logger).to receive(:error).with(/DfE::Analytics BigQuery API insert error for 1 event\(s\):/) + + insert + rescue DfE::Analytics::BigQueryApi::SendEventsError + nil + end + end + end +end diff --git a/spec/dfe/analytics/big_query_legacy_api_spec.rb b/spec/dfe/analytics/big_query_legacy_api_spec.rb new file mode 100644 index 00000000..ece7d551 --- /dev/null +++ b/spec/dfe/analytics/big_query_legacy_api_spec.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +RSpec.describe DfE::Analytics::BigQueryLegacyApi do + let(:event) do + { + environment: 'test', + request_method: 'GET', + request_path: '/provider/applications', + namespace: 'provider_interface', + user_id: 3456 + } + end + + before(:each) do + allow(DfE::Analytics.config).to receive(:azure_federated_auth).and_return(false) + + DfE::Analytics::Testing.webmock! + end + + describe '#events_client' do + it 'raises a configuration error on missing config values' do + with_analytics_config(bigquery_project_id: nil) do + expect { described_class.events_client }.to raise_error(DfE::Analytics::BigQueryLegacyApi::ConfigurationError) + end + end + + context 'when authorization endpoint returns OK response' do + let(:events_client) { double(:events_client) } + + before do + allow(Google::Cloud::Bigquery) + .to receive_message_chain(:new, :dataset, :table) + .and_return(events_client) + end + + it 'calls the expected big query apis' do + with_analytics_config(test_dummy_config) do + expect(described_class.events_client).to eq(events_client) + end + end + end + end + + describe '#insert' do + subject(:insert) { described_class.insert([event.as_json]) } + + context 'when the request is successful' do + it 'does not log the request when event_debug disabled' do + stub_analytics_event_submission + + expect(Rails.logger).not_to receive(:info) + + insert + end + + it 'sends the events JSON to Bigquery' do + request = stub_analytics_event_submission + + insert + + expect(request.with do |req| + body = JSON.parse(req.body) + payload = body['rows'].first['json'] + expect(payload.except('occurred_at', 'request_uuid')).to match(a_hash_including(event.deep_stringify_keys)) + end).to have_been_made + end + end + + context 'when the request is not successful' do + before { stub_analytics_event_submission_with_insert_errors } + + it 'raises an exception' do + expect { insert }.to raise_error(DfE::Analytics::BigQueryLegacyApi::SendEventsError, /An error./) + end + + it 'contains the insert errors' do + insert + rescue DfE::Analytics::BigQueryLegacyApi::SendEventsError => e + expect(e.message).to_not be_empty + end + + it 'logs the error message' do + expect(Rails.logger).to receive(:error).with(/DfE::Analytics BigQuery API insert error for 1 event\(s\):/) + + insert + rescue DfE::Analytics::BigQueryLegacyApi::SendEventsError + nil + end + end + end +end diff --git a/spec/dfe/analytics/load_entities_spec.rb b/spec/dfe/analytics/load_entities_spec.rb index 1b01808a..96f63850 100644 --- a/spec/dfe/analytics/load_entities_spec.rb +++ b/spec/dfe/analytics/load_entities_spec.rb @@ -46,6 +46,8 @@ allow(Rails.logger).to receive(:info) DfE::Analytics.initialize! + + DfE::Analytics::Testing.fake! end around do |ex| diff --git a/spec/dfe/analytics/send_events_spec.rb b/spec/dfe/analytics/send_events_spec.rb index 5b450564..7fdb3920 100644 --- a/spec/dfe/analytics/send_events_spec.rb +++ b/spec/dfe/analytics/send_events_spec.rb @@ -3,97 +3,36 @@ RSpec.describe DfE::Analytics::SendEvents do include ActiveJob::TestHelper - describe '#perform' do - let(:event) do - { - environment: 'test', - request_method: 'GET', - request_path: '/provider/applications', - namespace: 'provider_interface', - user_id: 3456 - } - end - - context 'when the request is successful' do - it 'sends the events JSON to Bigquery' do - request = stub_analytics_event_submission - - DfE::Analytics::Testing.webmock! do - described_class.new.perform([event.as_json]) - - expect(request.with do |req| - body = JSON.parse(req.body) - payload = body['rows'].first['json'] - expect(payload.except('occurred_at', 'request_uuid')).to match(a_hash_including(event.deep_stringify_keys)) - end).to have_been_made - end - end - - it 'does not log the request when event_debug disabled' do - stub_analytics_event_submission - - expect(Rails.logger).not_to receive(:info) - - DfE::Analytics::Testing.webmock! do - described_class.new.perform([event.as_json]) - end - end - end - - context 'when the request is not successful' do - before { stub_analytics_event_submission_with_insert_errors } - - subject(:perform) do - DfE::Analytics::Testing.webmock! do - described_class.new.perform([event.as_json]) - end - end - - it 'raises an exception' do - expect { perform }.to raise_error(DfE::Analytics::SendEventsError, /An error./) - end - - it 'contains the insert errors' do - perform - rescue DfE::Analytics::SendEventsError => e - expect(e.message).to_not be_empty - end + let(:event) do + { + environment: 'test', + request_method: 'GET', + request_path: '/provider/applications', + namespace: 'provider_interface', + user_id: 3456 + } + end - it 'logs the error message' do - expect(Rails.logger).to receive(:error).with(/DfE::Analytics BigQuery API insert error for 1 event\(s\):/) + let(:events) { [event.as_json] } - perform - rescue DfE::Analytics::SendEventsError - nil - end - end + describe '#perform' do + subject(:perform) { described_class.new.perform(events) } context 'when "log_only" is set' do before do allow(DfE::Analytics).to receive(:log_only?).and_return true end - it 'does not go out to the network' do - request = stub_analytics_event_submission - - DfE::Analytics::Testing.webmock! do - described_class.new.perform([event.as_json]) - expect(request).not_to have_been_made - end + it 'does not go call bigquery apis' do + expect(DfE::Analytics::BigQueryLegacyApi).not_to receive(:insert).with(events) + perform end end describe 'logging events for event debug' do before do - stub_analytics_event_submission - allow(DfE::Analytics).to receive(:event_debug_filters).and_return(event_debug_filters) - end - - subject(:perform) do - DfE::Analytics::Testing.webmock! do - described_class.new.perform([event.as_json]) - end + allow(DfE::Analytics::BigQueryLegacyApi).to receive(:insert) end context 'when the event filter matches' do @@ -135,86 +74,92 @@ end end - context 'when using fake testing mode' do - it 'does not go out to the network' do - request = stub_analytics_event_submission + describe 'Federated auth bigquery api' do + it 'calls the expected federated auth api' do + allow(DfE::Analytics.config).to receive(:azure_federated_auth).and_return(true) - DfE::Analytics::Testing.fake! do - described_class.new.perform([event.as_json]) - expect(request).not_to have_been_made - end + expect(DfE::Analytics::BigQueryApi).to receive(:insert).with(events) + perform end end - describe 'retry behaviour' do - before do - # we don't want to define a permanent exception, just one for this test - stub_const('DummyException', Class.new(StandardError)) + describe 'Legacy bigquery api (default)' do + it 'calls the expected legacy api by default' do + expect(DfE::Analytics::BigQueryLegacyApi).to receive(:insert).with(events) + perform end + end + end + + describe 'retry behaviour' do + before do + # we don't want to define a permanent exception, just one for this test + stub_const('DummyException', Class.new(StandardError)) + end - it 'makes 5 attempts' do - allow(DfE::Analytics).to receive(:log_only?).and_raise(DummyException) + it 'makes 5 attempts' do + allow(DfE::Analytics).to receive(:log_only?).and_raise(DummyException) - assert_performed_jobs 5 do - described_class.perform_later([]) - rescue DummyException - # the final exception won’t be caught - end + assert_performed_jobs 5 do + described_class.perform_later([]) + rescue DummyException + # the final exception won’t be caught end end + end - describe 'maintenance window scheduling' do - let(:events) { [event] } - let(:maintenance_window_start) { Time.zone.parse('25-02-2024 08:00') } - let(:maintenance_window_end) { Time.zone.parse('25-02-2024 10:00') } - let(:current_time_within_window) { Time.zone.parse('25-02-2024 09:00') } + describe 'maintenance window scheduling' do + let(:maintenance_window_start) { Time.zone.parse('25-02-2024 08:00') } + let(:maintenance_window_end) { Time.zone.parse('25-02-2024 10:00') } + let(:current_time_within_window) { Time.zone.parse('25-02-2024 09:00') } - before do - allow(DfE::Analytics).to receive(:within_maintenance_window?).and_return(true) - allow(DfE::Analytics.config).to receive(:bigquery_maintenance_window).and_return('25-02-2024 08:00..25-02-2024 10:00') - Timecop.freeze(current_time_within_window) - end + subject(:send_events) { described_class.do(events) } - after do - Timecop.return - end + before do + allow(DfE::Analytics).to receive(:within_maintenance_window?).and_return(true) + allow(DfE::Analytics.config).to receive(:bigquery_maintenance_window).and_return('25-02-2024 08:00..25-02-2024 10:00') + Timecop.freeze(current_time_within_window) + end - context 'within the maintenance window' do - it 'does not enqueue the events for asynchronous execution' do - expect(DfE::Analytics::SendEvents).not_to receive(:perform_later).with(events) - DfE::Analytics::SendEvents.do(events) - end + after do + Timecop.return + end - it 'does not execute the events synchronously' do - expect(DfE::Analytics::SendEvents).not_to receive(:perform_now).with(events) - DfE::Analytics::SendEvents.do(events) - end + context 'within the maintenance window' do + it 'does not enqueue the events for asynchronous execution' do + expect(DfE::Analytics::SendEvents).not_to receive(:perform_later).with(events) + send_events + end + + it 'does not execute the events synchronously' do + expect(DfE::Analytics::SendEvents).not_to receive(:perform_now).with(events) + send_events + end - it 'schedules the events for after the maintenance window' do - elapsed_seconds = current_time_within_window - maintenance_window_start - expected_wait_until = maintenance_window_end + elapsed_seconds + it 'schedules the events for after the maintenance window' do + elapsed_seconds = current_time_within_window - maintenance_window_start + expected_wait_until = maintenance_window_end + elapsed_seconds - expect(DfE::Analytics::SendEvents).to receive(:set).with(wait_until: expected_wait_until).and_call_original - DfE::Analytics::SendEvents.do(events) - end + expect(DfE::Analytics::SendEvents).to receive(:set).with(wait_until: expected_wait_until).and_call_original + send_events end + end - context 'outside the mainenance window' do - before do - allow(DfE::Analytics).to receive(:within_maintenance_window?).and_return(false) - end + context 'outside the mainenance window' do + before do + allow(DfE::Analytics).to receive(:within_maintenance_window?).and_return(false) + end - it 'enqueues the events for asynchronous execution' do - allow(DfE::Analytics).to receive(:async?).and_return(true) - expect(DfE::Analytics::SendEvents).to receive(:perform_later).with(events) - DfE::Analytics::SendEvents.do(events) - end + it 'enqueues the events for asynchronous execution' do + allow(DfE::Analytics).to receive(:async?).and_return(true) + expect(DfE::Analytics::SendEvents).to receive(:perform_later).with(events) + send_events + end - it 'executes the events synchronously' do - allow(DfE::Analytics).to receive(:async?).and_return(false) - expect(DfE::Analytics::SendEvents).to receive(:perform_now).with(events) - DfE::Analytics::SendEvents.do(events) - end + it 'executes the events synchronously' do + allow(DfE::Analytics).to receive(:async?).and_return(false) + expect(DfE::Analytics::SendEvents).to receive(:perform_now).with(events) + send_events end end end diff --git a/spec/dfe/analytics_spec.rb b/spec/dfe/analytics_spec.rb index 86c92ac6..147c60da 100644 --- a/spec/dfe/analytics_spec.rb +++ b/spec/dfe/analytics_spec.rb @@ -134,14 +134,6 @@ end end - it 'raises a configuration error on missing config values' do - with_analytics_config(bigquery_project_id: nil) do - DfE::Analytics::Testing.webmock! do - expect { DfE::Analytics.events_client }.to raise_error(DfE::Analytics::ConfigurationError) - end - end - end - describe '#entities_for_analytics' do with_model :Candidate do table diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 642910bc..997028db 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -62,7 +62,9 @@ def name; end config.before do DfE::Analytics.instance_variable_set(:@entity_model_mapping, nil) - DfE::Analytics.instance_variable_set(:@events_client, nil) + DfE::Analytics::BigQueryLegacyApi.instance_variable_set(:@events_client, nil) + DfE::Analytics::BigQueryApi.instance_variable_set(:@events_client, nil) + DfE::Analytics::AzureFederatedAuth.instance_variable_set(:@gcp_client_credentials, nil) end config.expect_with :rspec do |c|