Skip to content

Commit

Permalink
Consolidate award reference and allow backward compatibility (#4332)
Browse files Browse the repository at this point in the history
* #4331 Updated logic to handle discrepancies in award reference lengths

* #4331 Removed  unused REGEX

* #4331 Update validation on finding count to account for award reference version mismatch
  • Loading branch information
sambodeme authored Oct 4, 2024
1 parent a8bf3cc commit 3ee04d7
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 20 deletions.
65 changes: 64 additions & 1 deletion backend/audit/cross_validation/check_award_ref_declaration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,82 @@ def check_award_ref_declaration(sac_dict, *_args, **_kwargs):
declared_award_refs = set()
reported_award_refs = set()
errors = []

declared_award_ref_max_length = 0
reported_award_ref_max_length = 0
for award in federal_awards:
award_ref = award.get("award_reference")
if award_ref:
declared_award_refs.add(award_ref)
if len(award_ref) > declared_award_ref_max_length:
declared_award_ref_max_length = len(award_ref)

for finding in findings_uniform_guidance:
award_ref = finding["program"]["award_reference"]
if award_ref:
reported_award_refs.add(award_ref)
if len(award_ref) > reported_award_ref_max_length:
reported_award_ref_max_length = len(award_ref)

updated_declared_refs, updated_reported_refs = _normalize_award_ref_lengths(
declared_award_ref_max_length,
reported_award_ref_max_length,
federal_awards,
findings_uniform_guidance,
)
if updated_declared_refs:
declared_award_refs = updated_declared_refs
if updated_reported_refs:
reported_award_refs = updated_reported_refs

difference = reported_award_refs.difference(declared_award_refs)
if difference:
errors.append({"error": err_award_ref_not_declared(list(difference))})

return errors


def _normalize_award_ref_lengths(
declared_award_ref_max_length,
reported_award_ref_max_length,
federal_awards,
findings_uniform_guidance,
):
"""
Normalize the lengths of the award references in the Federal Awards and
Federal Awards Audit Findings workbooks before validation.
"""
reported_award_refs = set()
declared_award_refs = set()
if declared_award_ref_max_length > reported_award_ref_max_length:
# This is unlikely to happen, but still a good check. It means
# that the version of the Federal Awards workbook is newer than
# the version of the Federal Awards Audit Findings workbook.
diff = declared_award_ref_max_length - reported_award_ref_max_length
padding = "0" * diff

for finding in findings_uniform_guidance:
award_ref = finding["program"]["award_reference"]
if award_ref:
award_ref = (
f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
)
reported_award_refs.add(award_ref)
elif declared_award_ref_max_length < reported_award_ref_max_length:
# This is more likely to happen. It means the version of
# the Federal Awards Audit Findings workbook is newer than
# the version of the Federal Awards workbook.
diff = reported_award_ref_max_length - declared_award_ref_max_length
padding = "0" * diff

for award in federal_awards:
award_ref = award.get("award_reference")
if award_ref:
award_ref = (
f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
)
declared_award_refs.add(award_ref)
else:
# If the lengths are the same, do nothing.
pass

return declared_award_refs, reported_award_refs
130 changes: 115 additions & 15 deletions backend/audit/cross_validation/check_findings_count_consistency.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,137 @@ def check_findings_count_consistency(sac_dict, *_args, **_kwargs):
expected_award_refs_count = {}
found_award_refs_count = defaultdict(int)
errors = []
if (
if _should_skip_validation(data_source):
return errors

expected_award_refs_count, declared_award_ref_max_length = _get_federal_award_refs(
federal_awards
)
found_award_refs_count, reported_award_ref_max_length = _get_findings_award_refs(
findings_uniform_guidance, expected_award_refs_count
)

updated_expected_refs_count, updated_found_refs_count = (
_normalize_award_ref_lengths(
declared_award_ref_max_length,
reported_award_ref_max_length,
federal_awards,
findings_uniform_guidance,
)
)

if updated_expected_refs_count:
expected_award_refs_count = updated_expected_refs_count

if updated_found_refs_count:
found_award_refs_count = updated_found_refs_count

errors = _validate_findings(expected_award_refs_count, found_award_refs_count)

return errors


def _should_skip_validation(data_source):
# Skip this validation if it is an historical audit report with incorrect findings count
return (
data_source == settings.CENSUS_DATA_SOURCE
and "check_findings_count_consistency"
in InvalidRecord.fields["validations_to_skip"]
):
# Skip this validation if it is an historical audit report with incorrect findings count
return errors
)


def _get_federal_award_refs(federal_awards):
declared_award_ref_max_length = 0
expected_award_refs_count = {}

for award in federal_awards:
award_reference = award.get("award_reference", None)
award_reference = award.get("award_reference")
if award_reference:
declared_award_ref_max_length = max(
declared_award_ref_max_length, len(award_reference)
)
expected_award_refs_count[award_reference] = award["program"][
"number_of_audit_findings"
]

return expected_award_refs_count, declared_award_ref_max_length


def _get_findings_award_refs(findings_uniform_guidance, expected_award_refs_count):
reported_award_ref_max_length = 0
found_award_refs_count = defaultdict(int)

for finding in findings_uniform_guidance:
award_ref = finding["program"]["award_reference"]
if award_ref in expected_award_refs_count:
found_award_refs_count[award_ref] += 1
if award_ref:
reported_award_ref_max_length = max(
reported_award_ref_max_length, len(award_ref)
)
if award_ref in expected_award_refs_count:
found_award_refs_count[award_ref] += 1

return found_award_refs_count, reported_award_ref_max_length


def _validate_findings(expected_award_refs_count, found_award_refs_count):
errors = []
for award_ref, expected in expected_award_refs_count.items():
counted = found_award_refs_count[award_ref]
if counted != expected:
errors.append(
{
"error": err_findings_count_inconsistent(
expected,
counted,
award_ref,
)
}
{"error": err_findings_count_inconsistent(expected, counted, award_ref)}
)

return errors


def _normalize_award_ref_lengths(
declared_award_ref_max_length,
reported_award_ref_max_length,
federal_awards,
findings_uniform_guidance,
):
"""
Normalize the lengths of the award references in the Federal Awards and
Federal Awards Audit Findings workbooks before validation.
"""
expected_award_refs_count = {}
found_award_refs_count = defaultdict(int)

if declared_award_ref_max_length != reported_award_ref_max_length:
# Determine the required padding based on the difference in lengths.
diff = abs(reported_award_ref_max_length - declared_award_ref_max_length)
padding = "0" * diff

if declared_award_ref_max_length < reported_award_ref_max_length:
# This is means the version of the Federal Awards Audit Findings workbook
# is newer than the version of the Federal Awards workbook.
for award in federal_awards:
award_reference = award.get("award_reference")
if award_reference:
award_reference = _pad_award_ref(award_reference, padding)
expected_award_refs_count[award_reference] = award["program"][
"number_of_audit_findings"
]
for finding in findings_uniform_guidance:
award_ref = finding["program"]["award_reference"]
if award_ref in expected_award_refs_count:
found_award_refs_count[award_ref] += 1
else:
# This is unlikely to happen. It means the version of
# the Federal Awards workbook is newer than
# the version of the Federal Awards Audit Findings workbook.
for finding in findings_uniform_guidance:
award_ref = finding["program"]["award_reference"]
if award_ref:
award_ref = _pad_award_ref(award_ref, padding)
if award_ref in expected_award_refs_count:
found_award_refs_count[award_ref] += 1
else:
# No normalization needed if the lengths match
pass

return expected_award_refs_count, found_award_refs_count


def _pad_award_ref(award_ref, padding):
return f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
20 changes: 20 additions & 0 deletions backend/audit/cross_validation/test_check_award_ref_declaration.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ def setUp(self):
self.award3 = {
"award_reference": f"AWARD-{generate_random_integer(self.AWARD_MIN *3,self.AWARD_MAX *3)}"
}
self.award_with_longer_ref = {"award_reference": "AWARD-00123"}
self.award_with_shorter_ref = {"award_reference": "AWARD-0123"}

def _make_federal_awards(self, award_refs) -> dict:
return {
Expand Down Expand Up @@ -82,3 +84,21 @@ def test_errors_for_findings_with_undeclared_award_refs(self):
self.assertEqual(len(errors), 1)
expected_error = err_award_ref_not_declared([self.award2["award_reference"]])
self.assertIn({"error": expected_error}, errors)

def test_padding_when_declared_award_ref_max_length_greater(self):
"""Test case where declared award reference length is greater than reported award reference length."""
sac = self._make_sac(
[self.award_with_longer_ref], [self.award_with_shorter_ref]
)
errors = check_award_ref_declaration(sac_validation_shape(sac))
# No errors expected
self.assertEqual(errors, [])

def test_padding_when_reported_award_ref_max_length_greater(self):
"""Test case where reported award reference length is greater than declared award reference length."""
sac = self._make_sac(
[self.award_with_shorter_ref], [self.award_with_longer_ref]
)
errors = check_award_ref_declaration(sac_validation_shape(sac))
# No errors expected
self.assertEqual(errors, [])
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@ def _make_federal_awards(self, findings_count) -> dict:
}
}

def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:
def _make_findings_uniform_guidance(self, awards, mismatch, padding) -> dict:
entries = []
for award in awards["FederalAwards"]["federal_awards"]:
award_reference = award["award_reference"]
if padding:
award_reference = f"{award_reference.split('-')[0]}-{padding}{award_reference.split('-')[1]}"
count = award["program"]["number_of_audit_findings"]
for _ in range(count + mismatch):
entries.append({"program": {"award_reference": award_reference}})
Expand All @@ -48,11 +50,11 @@ def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:

return {"FindingsUniformGuidance": findings}

def _make_sac(self, findings_count, mismatch=0) -> SingleAuditChecklist:
def _make_sac(self, findings_count, mismatch=0, padding="") -> SingleAuditChecklist:
sac = baker.make(SingleAuditChecklist)
sac.federal_awards = self._make_federal_awards(findings_count)
sac.findings_uniform_guidance = self._make_findings_uniform_guidance(
sac.federal_awards, mismatch
sac.federal_awards, mismatch, padding
)
return sac

Expand Down Expand Up @@ -101,3 +103,14 @@ def test_declared_findings_exceed_reported_count(self):
self._test_findings_count_mismatch(
generate_random_integer(2, 4), generate_random_integer(-2, -1)
)

def test_normalize_award_ref_lengths_with_padding(self):
"""
Ensure that award reference normalization occurs when declared and reported
award reference lengths differ. Leading zeros are added appropriately.
"""
sac = self._make_sac(
generate_random_integer(self.FINDINGS_MIN, self.FINDINGS_MAX), 0, "0"
)
errors = check_findings_count_consistency(sac_validation_shape(sac))
self.assertEqual(errors, [])
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

# A version of this regex also exists in Base.libsonnet
AWARD_REFERENCES_REGEX = r"^AWARD-(?!0{4,5}$)[0-9]{4,5}$"
AWARD_REFERENCES_REGEX5 = r"^AWARD-(?!0{5}$)[0-9]{5}$"

AWARD_LEN_4_DIGITS = 10
AWARD_LEN_5_DIGITS = 11
Expand Down

0 comments on commit 3ee04d7

Please sign in to comment.