From 5894bf6c392b2838dbf0639ba45f9c13cbf21abf Mon Sep 17 00:00:00 2001 From: Erica Porter Date: Fri, 5 Apr 2024 09:14:50 +0100 Subject: [PATCH] Add hidden_pii.yml (#121) * Add hidden_pii.yml * Address PR comments * Update Readme for hidden_pii.yml --- README.md | 17 +++++---- lib/dfe/analytics.rb | 4 +++ lib/dfe/analytics/fields.rb | 4 +++ .../dfe/analytics/install_generator.rb | 1 + spec/dfe/analytics/fields_spec.rb | 35 +++++++++++++++++-- spec/dummy/config/analytics_hidden_pii.yml | 2 ++ 6 files changed, 52 insertions(+), 11 deletions(-) create mode 100644 spec/dummy/config/analytics_hidden_pii.yml diff --git a/README.md b/README.md index ac52cdf2..97fc8a22 100644 --- a/README.md +++ b/README.md @@ -162,13 +162,13 @@ The `dfe:analytics:install` generator will also initialize some empty config fil | Filename | Purpose | |---------------------------------------|--------------------------------------------------------------------------------------------------------------------| -| `config/analytics.yml` | List all fields we will send to BigQuery | -| `config/analytics_pii.yml` | List all fields we will obfuscate before sending to BigQuery. This should be a subset of fields in `analytics.yml` | -| `config/analytics_blocklist.yml` | Autogenerated file to list all fields we will NOT send to BigQuery, to support the `analytics:check` task | -| `config/analytics_custom_events.yml` | Optional file including list of all custom event names - -**It is imperative that you perform a full check of those fields are being sent, and exclude those containing personally-identifiable information (PII) in `config/analytics_pii.yml`, in order to comply with the requirements of the [Data Protection Act 2018](https://www.gov.uk/data-protection), unless an exemption has been obtained.** +| `config/analytics.yml` | List all fields we will send to BigQuery | +| `config/analytics_pii.yml` | List all fields we will obfuscate before sending to BigQuery. This should be a subset of fields in `analytics.yml` | +| `config/analytics_hidden_pii.yml` | List all fields we will send separately to BigQuery where they will be hidden. This should be a subset of fields in `analytics.yml` | +| `config/analytics_blocklist.yml` | Autogenerated file to list all fields we will NOT send to BigQuery, to support the `analytics:check` task | +| `config/analytics_custom_events.yml` | Optional file including list of all custom event names | +**It is imperative that you perform a full check of the fields that are being sent, and exclude those containing personally-identifiable information (PII) in `config/analytics_hidden_pii.yml`, in order to comply with the requirements of the [Data Protection Act 2018](https://www.gov.uk/data-protection), unless an exemption has been obtained.** When you first install the gem, none of your fields will be listed in `analytics.yml`, so no data will be sent to BigQuery. To get started, generate a blocklist using this command: @@ -177,7 +177,7 @@ bundle exec rails dfe:analytics:regenerate_blocklist ``` Work through `analytics_blocklist.yml` to move entries into `analytics.yml` and -optionally also to `analytics_pii.yml`. +optionally also to `analytics_hidden_pii.yml`. When you boot your app, DfE::Analytics will raise an error if there are fields in your field configuration which are present in the database but @@ -389,8 +389,7 @@ See the list of existing event types below for what kinds of event types can be The different types of events that DfE Analytics send are: - `web_request` - sent after a controller action is performed using controller callbacks -- `create_entity` - sent after an object is created using model callbacks -- `update_entity` - sent after an object is updated using model callbacks +- `create_entity` - sent after an object is created using model callbacks - `delete_entity` - sent after an object is deleted using model callbacks - `import_entity` - sent for each object imported using the DfE Analytics import rake tasks diff --git a/lib/dfe/analytics.rb b/lib/dfe/analytics.rb index ab6eb15e..3db8c906 100644 --- a/lib/dfe/analytics.rb +++ b/lib/dfe/analytics.rb @@ -138,6 +138,10 @@ def self.allowlist_pii Rails.application.config_for(:analytics_pii) end + def self.hidden_pii + Rails.application.config_for(:analytics_hidden_pii) + end + def self.blocklist Rails.application.config_for(:analytics_blocklist) end diff --git a/lib/dfe/analytics/fields.rb b/lib/dfe/analytics/fields.rb index bf4dabea..e816d353 100644 --- a/lib/dfe/analytics/fields.rb +++ b/lib/dfe/analytics/fields.rb @@ -58,6 +58,10 @@ def self.allowlist DfE::Analytics.allowlist end + def self.hidden_pii + DfE::Analytics.hidden_pii + end + def self.database DfE::Analytics.all_entities_in_application .reduce({}) do |list, entity| diff --git a/lib/generators/dfe/analytics/install_generator.rb b/lib/generators/dfe/analytics/install_generator.rb index 7fa1b70a..2baca88e 100644 --- a/lib/generators/dfe/analytics/install_generator.rb +++ b/lib/generators/dfe/analytics/install_generator.rb @@ -12,6 +12,7 @@ def install create_file 'config/analytics.yml', { 'shared' => {} }.to_yaml create_file 'config/analytics_pii.yml', { 'shared' => {} }.to_yaml + create_file 'config/analytics_hidden_pii.yml', { 'shared' => {} }.to_yaml create_file 'config/analytics_blocklist.yml', { 'shared' => {} }.to_yaml end diff --git a/spec/dfe/analytics/fields_spec.rb b/spec/dfe/analytics/fields_spec.rb index 545acbc8..a14f4676 100644 --- a/spec/dfe/analytics/fields_spec.rb +++ b/spec/dfe/analytics/fields_spec.rb @@ -5,15 +5,18 @@ t.string :email_address t.string :first_name t.string :last_name + t.string :dob end end - let(:existing_allowlist) { { Candidate.table_name.to_sym => ['email_address'] } } - let(:existing_blocklist) { { Candidate.table_name.to_sym => ['id'] } } + let(:existing_allowlist) { { Candidate.table_name.to_sym => %w[email_address] } } + let(:existing_blocklist) { { Candidate.table_name.to_sym => %w[id] } } + let(:hidden_pii) { { Candidate.table_name.to_sym => %w[dob] } } before do allow(DfE::Analytics).to receive(:allowlist).and_return(existing_allowlist) allow(DfE::Analytics).to receive(:blocklist).and_return(existing_blocklist) + allow(DfE::Analytics).to receive(:hidden_pii).and_return(hidden_pii) end describe '.allowlist' do @@ -111,5 +114,33 @@ end end end + + context 'handling of hidden PII fields' do + let(:existing_allowlist) { { Candidate.table_name.to_sym => %w[dob email_address id] } } + let(:hidden_pii) { { Candidate.table_name.to_sym => %w[dob] } } + let(:existing_blocklist) { { Candidate.table_name.to_sym => %w[first_name last_name] } } + + describe '.hidden_pii' do + it 'returns all the fields in the analytics_hidden_pii.yml file' do + expect(described_class.hidden_pii).to eq(hidden_pii) + end + end + + describe '.check!' do + context 'when hidden PII fields are improperly managed' do + let(:existing_allowlist) { { Candidate.table_name.to_sym => %w[id email_address] } } + + it 'raises an error about hidden PII fields not in allowlist' do + expect { described_class.check! }.to raise_error(DfE::Analytics::ConfigurationError, /New database field detected/) + end + end + + context 'when hidden PII fields are properly managed' do + it 'does not raise an error' do + expect { described_class.check! }.not_to raise_error + end + end + end + end end end diff --git a/spec/dummy/config/analytics_hidden_pii.yml b/spec/dummy/config/analytics_hidden_pii.yml new file mode 100644 index 00000000..3a42532a --- /dev/null +++ b/spec/dummy/config/analytics_hidden_pii.yml @@ -0,0 +1,2 @@ +--- +shared: {}