Skip to content

Commit

Permalink
Ensure order by column is in analytics.yml (#106)
Browse files Browse the repository at this point in the history
* Ensure order by column is in analytics.yml
* Restore ID check, remove ID order by check
* Update tests for new conditions
* Update test ids
  • Loading branch information
ericaporter authored Jan 8, 2024
1 parent 858c5b4 commit a757f60
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 13 deletions.
28 changes: 19 additions & 9 deletions lib/dfe/analytics/entity_table_check_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ def perform
return unless supported_adapter_and_environment?

DfE::Analytics.entities_for_analytics.each do |entity|
columns = DfE::Analytics.allowlist[entity]
next unless id_column_exists_for_entity?(entity)
next unless order_column_exposed_for_entity?(entity, columns)

order_column = determine_order_column(entity)
order_column = determine_order_column(entity, columns)

entity_table_check_event = build_event_for(entity, order_column)
DfE::Analytics::SendEvents.perform_later([entity_table_check_event]) if entity_table_check_event.present?
Expand Down Expand Up @@ -76,6 +78,16 @@ def fetch_checksum_data(entity, checksum_calculated_at, order_column)
end
end

def determine_order_column(entity, columns)
if ActiveRecord::Base.connection.column_exists?(entity, :updated_at) && columns.include?('updated_at')
'UPDATED_AT'
elsif ActiveRecord::Base.connection.column_exists?(entity, :created_at) && columns.include?('created_at')
'CREATED_AT'
else
Rails.logger.info("DfE::Analytics: Entity checksum: Order column missing in #{entity}")
end
end

def id_column_exists_for_entity?(entity)
return true if ActiveRecord::Base.connection.column_exists?(entity, :id)

Expand All @@ -84,14 +96,12 @@ def id_column_exists_for_entity?(entity)
false
end

def determine_order_column(entity)
if ActiveRecord::Base.connection.column_exists?(entity, :updated_at)
'UPDATED_AT'
elsif ActiveRecord::Base.connection.column_exists?(entity, :created_at)
'CREATED_AT'
else
'ID'
end
def order_column_exposed_for_entity?(entity, columns)
return true if columns.include?('updated_at') || columns.include?('created_at')

Rails.logger.info("DfE::Analytics Processing entity: Order columns missing in analytics.yml for #{entity} - Skipping checks")

false
end

def fetch_postgresql_checksum_data(table_name_sanitized, checksum_calculated_at_sanitized, order_column)
Expand Down
73 changes: 69 additions & 4 deletions spec/dfe/analytics/entity_table_check_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,46 @@
end
end

with_model :Application do
table do |t|
t.string :type
t.datetime :created_at
end
end

with_model :Course do
table id: false do |t|
t.string :name
t.string :duration
t.datetime :updated_at
end

model do |m|
m.primary_key = nil
end
end

with_model :Institution do
table do |t|
t.string :name
t.string :address
end
end

before do
DfE::Analytics.config.entity_table_checks_enabled = true
allow(DfE::Analytics::SendEvents).to receive(:perform_later)
allow(DfE::Analytics).to receive(:allowlist).and_return({
Candidate.table_name.to_sym => %w[id]
Candidate.table_name.to_sym => %w[updated_at],
Application.table_name.to_sym => %w[type created_at],
Course.table_name.to_sym => %w[name duration],
Institution.table_name.to_sym => %w[name address]
})
allow(DfE::Analytics).to receive(:allowlist_pii).and_return({
Candidate.table_name.to_sym => %w[]
Candidate.table_name.to_sym => %w[],
Application.table_name.to_sym => %w[],
Course.table_name.to_sym => %w[],
Institution.table_name.to_sym => %w[]
})
allow(Rails.logger).to receive(:info)
allow(Time).to receive(:now).and_return(time_now)
Expand All @@ -43,8 +75,41 @@
expect(DfE::Analytics::SendEvents).not_to have_received(:perform_later)
end

it 'skips an entity if there is no id' do
expected_message = "DfE::Analytics: Entity checksum: ID column missing in #{Course.table_name} - Skipping checks"
described_class.new.perform
expect(Rails.logger).to have_received(:info).with(expected_message)
end

it 'orders by created_at if updated_at is missing' do
order_column = 'CREATED_AT'
[123, 124, 125].map { |id| Application.create(id: id) }
table_ids = Application.where('created_at < ?', checksum_calculated_at).order(created_at: :asc).pluck(:id)
checksum = Digest::MD5.hexdigest(table_ids.join)
described_class.new.perform

expect(DfE::Analytics::SendEvents).to have_received(:perform_later)
.with([a_hash_including({
'data' =>
[
{ 'key' => 'row_count', 'value' => [table_ids.size] },
{ 'key' => 'checksum', 'value' => [checksum] },
{ 'key' => 'checksum_calculated_at', 'value' => [checksum_calculated_at] },
{ 'key' => 'order_column', 'value' => [order_column] }
]
})])
end

it 'returns an error info message if updated_at and created_at are missing' do
expected_message = "DfE::Analytics Processing entity: Order columns missing in analytics.yml for #{Institution.table_name} - Skipping checks"

described_class.new.perform

expect(Rails.logger).to have_received(:info).with(expected_message)
end

it 'sends the entity_table_check event to BigQuery' do
[123, 124, 125].map { |id| Candidate.create(id: id) }
[130, 131, 132].map { |id| Candidate.create(id: id) }
table_ids = Candidate.where('updated_at < ?', checksum_calculated_at).order(updated_at: :asc).pluck(:id)
checksum = Digest::MD5.hexdigest(table_ids.join)
described_class.new.perform
Expand Down Expand Up @@ -99,7 +164,7 @@
end

it 'logs the entity name and row count' do
Candidate.create(id: 123)
Candidate.create(id: 129)
expected_message = "DfE::Analytics Processing entity: #{Candidate.table_name}: Row count: #{Candidate.count}"

described_class.new.perform
Expand Down

0 comments on commit a757f60

Please sign in to comment.