Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update log masking method #150

Merged
merged 14 commits into from
Jul 1, 2024
12 changes: 0 additions & 12 deletions lib/dfe/analytics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -172,18 +172,6 @@ def self.event_debug_enabled?
event_debug_filters[:event_filters]&.any?
end

def self.mask_hidden_data(event, entity_table_name)
return event if entity_table_name.nil?

hidden_pii_fields = hidden_pii[entity_table_name.to_sym] || []

hidden_pii_fields.each do |field|
event[field] = '[HIDDEN]' if event.key?(field)
end

event
end

def self.async?
config.async
end
Expand Down
26 changes: 11 additions & 15 deletions lib/dfe/analytics/event_matcher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,11 @@ module DfE
module Analytics
# Match event against given filters
class EventMatcher
attr_reader :event, :filters, :mask_hidden_data

def initialize(event, filters = DfE::Analytics.event_debug_filters[:event_filters], mask_hidden_data: true)
raise 'Event filters must be set' if filters.nil?
attr_reader :event, :filters

def initialize(event, filters = DfE::Analytics.event_debug_filters[:event_filters])
@event = event.with_indifferent_access
@filters = filters.compact
@mask_hidden_data = mask_hidden_data

mask_hidden_data! if mask_hidden_data
end

def matched?
Expand All @@ -21,6 +16,8 @@ def matched?
private

def filter_matched?(filter, nested_fields = [])
return false if filter.nil? || filter.values.any?(&:nil?)

filter.all? do |field, filter_value|
fields = nested_fields + [field]

Expand All @@ -36,9 +33,14 @@ def filter_matched?(filter, nested_fields = [])
def field_matched?(filter_value, nested_fields)
event_value = event_value_for(nested_fields)

regexp = Regexp.new(filter_value)
return false if event_value.nil?

# Convert values to strings for comparison
filter_value_str = filter_value.to_s
event_value_str = event_value.to_s

regexp.match?(event_value)
regexp = Regexp.new(filter_value_str)
regexp.match?(event_value_str)
end

def event_value_for(nested_fields)
Expand All @@ -52,12 +54,6 @@ def event_value_for(nested_fields)
memo[field]
end
end

def mask_hidden_data!
return unless mask_hidden_data && @event[:data] && @event[:entity_table_name]

@event[:data] = DfE::Analytics.mask_hidden_data(@event[:data], @event[:entity_table_name])
end
end
end
end
37 changes: 30 additions & 7 deletions lib/dfe/analytics/send_events.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,46 @@ def self.do(events)
end

def perform(events)
masked_events = events.map do |event|
DfE::Analytics.mask_hidden_data(event, event[:entity_table_name])
end

if DfE::Analytics.log_only?
# Use the Rails logger here as the job's logger is set to :warn by default
Rails.logger.info("DfE::Analytics: #{masked_events.inspect}")
events.each { |event| Rails.logger.info("DfE::Analytics: #{mask_hidden_data(event).inspect}") }
else
if DfE::Analytics.event_debug_enabled?
masked_events
events
.select { |event| DfE::Analytics::EventMatcher.new(event).matched? }
.each { |event| Rails.logger.info("DfE::Analytics processing: #{event.inspect}") }
.each { |event| Rails.logger.info("DfE::Analytics processing: #{mask_hidden_data(event).inspect}") }
end

DfE::Analytics.config.azure_federated_auth ? DfE::Analytics::BigQueryApi.insert(events) : DfE::Analytics::BigQueryLegacyApi.insert(events)
end
end

private

def mask_hidden_data(event)
masked_event = event.deep_dup.with_indifferent_access
return event unless masked_event&.key?(:hidden_data)

mask_hidden_data_values(masked_event)
end

def mask_hidden_data_values(event)
hidden_data = event[:hidden_data]

hidden_data.each { |data| mask_data(data) } if hidden_data.is_a?(Array)

event
end

def mask_data(data)
return unless data.is_a?(Hash)

data[:value] = ['HIDDEN'] if data[:value].present?

return unless data[:key].is_a?(Hash) && data[:key][:value].present?

data[:key][:value] = ['HIDDEN']
end
end
end
end
21 changes: 21 additions & 0 deletions spec/dfe/analytics/event_matcher_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -127,5 +127,26 @@
end
end
end

describe '.field_matched?' do
let(:logging) do
{
event_filters: [
{
event_type: 'update_entity',
entity_table_name: 'course_options',
data: {
key: 'course_id'
}
}
]
}
end

it 'returns false when event_value is nil' do
allow(subject).to receive(:event_value_for).and_return(nil)
expect(subject.send(:field_matched?, 'course_id', 'data')).to be false
end
end
end
end
32 changes: 18 additions & 14 deletions spec/dfe/analytics/send_events_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,17 @@

let(:events) { [event.as_json] }

let(:masked_event) do
let(:hidden_pii_event) do
{
'entity_table_name' => 'user_profiles',
'event_type' => 'update_entity',
'data' => [
{ 'key' => 'dob', 'value' => '[HIDDEN]' },
{ 'key' => 'first_name', 'value' => '[HIDDEN]' },
{ 'key' => 'email', 'value' => 'user@example.com' },
{ 'key' => 'phone_number', 'value' => '1234567890' }
],
'hidden_data' => [
{ 'key' => 'dob', 'value' => '20/06/1990' },
{ 'key' => 'first_name', 'value' => 'Sarah' }
]
}
end
Expand All @@ -44,28 +46,30 @@

it 'logs events with all sensitive data masked' do
expect(Rails.logger).to receive(:info) do |log_message|
expect(log_message).to include('"key"=>"dob", "value"=>"[HIDDEN]"')
expect(log_message).to include('"key"=>"first_name", "value"=>"[HIDDEN]"')
expect(log_message).to include('"key"=>"dob", "value"=>["HIDDEN"]')
expect(log_message).to include('"key"=>"first_name", "value"=>["HIDDEN"]')
expect(log_message).to include('"key"=>"email", "value"=>"user@example.com"')
expect(log_message).to include('"key"=>"phone_number", "value"=>"1234567890"')
end

described_class.new.perform([masked_event])
described_class.new.perform([hidden_pii_event])
end
end

describe 'Masking hidden_pii when event_debug_enabled?' do
subject(:perform) { described_class.new.perform(events) }

let(:masked_event) do
let(:hidden_pii_event) do
{
'entity_table_name' => 'user_profiles',
'event_type' => 'update_entity',
'data' => [
{ 'key' => 'dob', 'value' => '[HIDDEN]' },
{ 'key' => 'first_name', 'value' => '[HIDDEN]' },
{ 'key' => 'email', 'value' => 'user@example.com' },
{ 'key' => 'phone_number', 'value' => '1234567890' }
],
'hidden_data' => [
{ 'key' => 'dob', 'value' => '20/06/1990' },
{ 'key' => 'first_name', 'value' => 'Sarah' }
]
}
end
Expand All @@ -78,15 +82,15 @@
entity_table_name: 'user_profiles',
data: {
key: 'dob',
value: '[HIDDEN]'
value: '20/06/1990'
}
},
{
event_type: 'update_entity',
entity_table_name: 'user_profiles',
data: {
key: 'first_name',
value: '[HIDDEN]'
value: 'Sarah'
}
},
{
Expand Down Expand Up @@ -117,13 +121,13 @@

it 'masks sensitive data in the log output' do
expect(Rails.logger).to receive(:info) do |log_message|
expect(log_message).to include('"key"=>"dob", "value"=>"[HIDDEN]"')
expect(log_message).to include('"key"=>"first_name", "value"=>"[HIDDEN]"')
expect(log_message).to include('"key"=>"dob", "value"=>["HIDDEN"]')
expect(log_message).to include('"key"=>"first_name", "value"=>["HIDDEN"]')
expect(log_message).to include('"key"=>"email", "value"=>"user@example.com"')
expect(log_message).to include('"key"=>"phone_number", "value"=>"1234567890"')
end

described_class.new.perform([masked_event])
described_class.new.perform([hidden_pii_event])
end
end

Expand Down