Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure order by column is in analytics.yml #106

Merged
merged 4 commits into from
Jan 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 19 additions & 9 deletions lib/dfe/analytics/entity_table_check_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ def perform
return unless supported_adapter_and_environment?

DfE::Analytics.entities_for_analytics.each do |entity|
columns = DfE::Analytics.allowlist[entity]
next unless id_column_exists_for_entity?(entity)
next unless order_column_exposed_for_entity?(entity, columns)

order_column = determine_order_column(entity)
order_column = determine_order_column(entity, columns)

entity_table_check_event = build_event_for(entity, order_column)
DfE::Analytics::SendEvents.perform_later([entity_table_check_event]) if entity_table_check_event.present?
Expand Down Expand Up @@ -76,6 +78,16 @@ def fetch_checksum_data(entity, checksum_calculated_at, order_column)
end
end

def determine_order_column(entity, columns)
if ActiveRecord::Base.connection.column_exists?(entity, :updated_at) && columns.include?('updated_at')
'UPDATED_AT'
elsif ActiveRecord::Base.connection.column_exists?(entity, :created_at) && columns.include?('created_at')
'CREATED_AT'
else
Rails.logger.info("DfE::Analytics: Entity checksum: Order column missing in #{entity}")
end
end

def id_column_exists_for_entity?(entity)
return true if ActiveRecord::Base.connection.column_exists?(entity, :id)

Expand All @@ -84,14 +96,12 @@ def id_column_exists_for_entity?(entity)
false
end

def determine_order_column(entity)
if ActiveRecord::Base.connection.column_exists?(entity, :updated_at)
'UPDATED_AT'
elsif ActiveRecord::Base.connection.column_exists?(entity, :created_at)
'CREATED_AT'
else
'ID'
end
def order_column_exposed_for_entity?(entity, columns)
return true if columns.include?('updated_at') || columns.include?('created_at')

Rails.logger.info("DfE::Analytics Processing entity: Order columns missing in analytics.yml for #{entity} - Skipping checks")

false
end

def fetch_postgresql_checksum_data(table_name_sanitized, checksum_calculated_at_sanitized, order_column)
Expand Down
73 changes: 69 additions & 4 deletions spec/dfe/analytics/entity_table_check_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,46 @@
end
end

with_model :Application do
table do |t|
t.string :type
t.datetime :created_at
end
end

with_model :Course do
table id: false do |t|
t.string :name
t.string :duration
t.datetime :updated_at
end

model do |m|
m.primary_key = nil
end
end

with_model :Institution do
table do |t|
t.string :name
t.string :address
end
end

before do
DfE::Analytics.config.entity_table_checks_enabled = true
allow(DfE::Analytics::SendEvents).to receive(:perform_later)
allow(DfE::Analytics).to receive(:allowlist).and_return({
Candidate.table_name.to_sym => %w[id]
Candidate.table_name.to_sym => %w[updated_at],
Application.table_name.to_sym => %w[type created_at],
Course.table_name.to_sym => %w[name duration],
Institution.table_name.to_sym => %w[name address]
})
allow(DfE::Analytics).to receive(:allowlist_pii).and_return({
Candidate.table_name.to_sym => %w[]
Candidate.table_name.to_sym => %w[],
Application.table_name.to_sym => %w[],
Course.table_name.to_sym => %w[],
Institution.table_name.to_sym => %w[]
})
allow(Rails.logger).to receive(:info)
allow(Time).to receive(:now).and_return(time_now)
Expand All @@ -43,8 +75,41 @@
expect(DfE::Analytics::SendEvents).not_to have_received(:perform_later)
end

it 'skips an entity if there is no id' do
expected_message = "DfE::Analytics: Entity checksum: ID column missing in #{Course.table_name} - Skipping checks"
described_class.new.perform
expect(Rails.logger).to have_received(:info).with(expected_message)
end

it 'orders by created_at if updated_at is missing' do
order_column = 'CREATED_AT'
[123, 124, 125].map { |id| Application.create(id: id) }
table_ids = Application.where('created_at < ?', checksum_calculated_at).order(created_at: :asc).pluck(:id)
checksum = Digest::MD5.hexdigest(table_ids.join)
described_class.new.perform

expect(DfE::Analytics::SendEvents).to have_received(:perform_later)
.with([a_hash_including({
'data' =>
[
{ 'key' => 'row_count', 'value' => [table_ids.size] },
{ 'key' => 'checksum', 'value' => [checksum] },
{ 'key' => 'checksum_calculated_at', 'value' => [checksum_calculated_at] },
{ 'key' => 'order_column', 'value' => [order_column] }
]
})])
end

it 'returns an error info message if updated_at and created_at are missing' do
expected_message = "DfE::Analytics Processing entity: Order columns missing in analytics.yml for #{Institution.table_name} - Skipping checks"

described_class.new.perform

expect(Rails.logger).to have_received(:info).with(expected_message)
end

it 'sends the entity_table_check event to BigQuery' do
[123, 124, 125].map { |id| Candidate.create(id: id) }
[130, 131, 132].map { |id| Candidate.create(id: id) }
table_ids = Candidate.where('updated_at < ?', checksum_calculated_at).order(updated_at: :asc).pluck(:id)
checksum = Digest::MD5.hexdigest(table_ids.join)
described_class.new.perform
Expand Down Expand Up @@ -99,7 +164,7 @@
end

it 'logs the entity name and row count' do
Candidate.create(id: 123)
Candidate.create(id: 129)
expected_message = "DfE::Analytics Processing entity: #{Candidate.table_name}: Row count: #{Candidate.count}"

described_class.new.perform
Expand Down
Loading