Merge pull request #4353 from GSA-TTS/main

GSA-TTS · Oct 5, 2024 · ce78e68 · ce78e68
2 parents 3f61db7 + 3ee04d7
commit ce78e68
Show file tree

Hide file tree

Showing 5 changed files with 215 additions and 20 deletions.
diff --git a/backend/audit/cross_validation/check_award_ref_declaration.py b/backend/audit/cross_validation/check_award_ref_declaration.py
@@ -22,19 +22,82 @@ def check_award_ref_declaration(sac_dict, *_args, **_kwargs):
     declared_award_refs = set()
     reported_award_refs = set()
     errors = []
-
+    declared_award_ref_max_length = 0
+    reported_award_ref_max_length = 0
     for award in federal_awards:
         award_ref = award.get("award_reference")
         if award_ref:
             declared_award_refs.add(award_ref)
+            if len(award_ref) > declared_award_ref_max_length:
+                declared_award_ref_max_length = len(award_ref)
 
     for finding in findings_uniform_guidance:
         award_ref = finding["program"]["award_reference"]
         if award_ref:
             reported_award_refs.add(award_ref)
+            if len(award_ref) > reported_award_ref_max_length:
+                reported_award_ref_max_length = len(award_ref)
+
+    updated_declared_refs, updated_reported_refs = _normalize_award_ref_lengths(
+        declared_award_ref_max_length,
+        reported_award_ref_max_length,
+        federal_awards,
+        findings_uniform_guidance,
+    )
+    if updated_declared_refs:
+        declared_award_refs = updated_declared_refs
+    if updated_reported_refs:
+        reported_award_refs = updated_reported_refs
 
     difference = reported_award_refs.difference(declared_award_refs)
     if difference:
         errors.append({"error": err_award_ref_not_declared(list(difference))})
 
     return errors
+
+
+def _normalize_award_ref_lengths(
+    declared_award_ref_max_length,
+    reported_award_ref_max_length,
+    federal_awards,
+    findings_uniform_guidance,
+):
+    """
+    Normalize the lengths of the award references in the Federal Awards and
+    Federal Awards Audit Findings workbooks before validation.
+    """
+    reported_award_refs = set()
+    declared_award_refs = set()
+    if declared_award_ref_max_length > reported_award_ref_max_length:
+        # This is unlikely to happen, but still a good check. It means
+        # that the version of the Federal Awards workbook is newer than
+        # the version of the Federal Awards Audit Findings workbook.
+        diff = declared_award_ref_max_length - reported_award_ref_max_length
+        padding = "0" * diff
+
+        for finding in findings_uniform_guidance:
+            award_ref = finding["program"]["award_reference"]
+            if award_ref:
+                award_ref = (
+                    f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
+                )
+                reported_award_refs.add(award_ref)
+    elif declared_award_ref_max_length < reported_award_ref_max_length:
+        # This is more likely to happen. It means the version of
+        # the Federal Awards Audit Findings workbook is newer than
+        # the version of the Federal Awards workbook.
+        diff = reported_award_ref_max_length - declared_award_ref_max_length
+        padding = "0" * diff
+
+        for award in federal_awards:
+            award_ref = award.get("award_reference")
+            if award_ref:
+                award_ref = (
+                    f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
+                )
+                declared_award_refs.add(award_ref)
+    else:
+        # If the lengths are the same, do nothing.
+        pass
+
+    return declared_award_refs, reported_award_refs
diff --git a/backend/audit/cross_validation/check_findings_count_consistency.py b/backend/audit/cross_validation/check_findings_count_consistency.py
@@ -26,37 +26,137 @@ def check_findings_count_consistency(sac_dict, *_args, **_kwargs):
     expected_award_refs_count = {}
     found_award_refs_count = defaultdict(int)
     errors = []
-    if (
+    if _should_skip_validation(data_source):
+        return errors
+
+    expected_award_refs_count, declared_award_ref_max_length = _get_federal_award_refs(
+        federal_awards
+    )
+    found_award_refs_count, reported_award_ref_max_length = _get_findings_award_refs(
+        findings_uniform_guidance, expected_award_refs_count
+    )
+
+    updated_expected_refs_count, updated_found_refs_count = (
+        _normalize_award_ref_lengths(
+            declared_award_ref_max_length,
+            reported_award_ref_max_length,
+            federal_awards,
+            findings_uniform_guidance,
+        )
+    )
+
+    if updated_expected_refs_count:
+        expected_award_refs_count = updated_expected_refs_count
+
+    if updated_found_refs_count:
+        found_award_refs_count = updated_found_refs_count
+
+    errors = _validate_findings(expected_award_refs_count, found_award_refs_count)
+
+    return errors
+
+
+def _should_skip_validation(data_source):
+    # Skip this validation if it is an historical audit report with incorrect findings count
+    return (
         data_source == settings.CENSUS_DATA_SOURCE
         and "check_findings_count_consistency"
         in InvalidRecord.fields["validations_to_skip"]
-    ):
-        # Skip this validation if it is an historical audit report with incorrect findings count
-        return errors
+    )
+
+
+def _get_federal_award_refs(federal_awards):
+    declared_award_ref_max_length = 0
+    expected_award_refs_count = {}
 
     for award in federal_awards:
-        award_reference = award.get("award_reference", None)
+        award_reference = award.get("award_reference")
         if award_reference:
+            declared_award_ref_max_length = max(
+                declared_award_ref_max_length, len(award_reference)
+            )
             expected_award_refs_count[award_reference] = award["program"][
                 "number_of_audit_findings"
             ]
 
+    return expected_award_refs_count, declared_award_ref_max_length
+
+
+def _get_findings_award_refs(findings_uniform_guidance, expected_award_refs_count):
+    reported_award_ref_max_length = 0
+    found_award_refs_count = defaultdict(int)
+
     for finding in findings_uniform_guidance:
         award_ref = finding["program"]["award_reference"]
-        if award_ref in expected_award_refs_count:
-            found_award_refs_count[award_ref] += 1
+        if award_ref:
+            reported_award_ref_max_length = max(
+                reported_award_ref_max_length, len(award_ref)
+            )
+            if award_ref in expected_award_refs_count:
+                found_award_refs_count[award_ref] += 1
+
+    return found_award_refs_count, reported_award_ref_max_length
 
+
+def _validate_findings(expected_award_refs_count, found_award_refs_count):
+    errors = []
     for award_ref, expected in expected_award_refs_count.items():
         counted = found_award_refs_count[award_ref]
         if counted != expected:
             errors.append(
-                {
-                    "error": err_findings_count_inconsistent(
-                        expected,
-                        counted,
-                        award_ref,
-                    )
-                }
+                {"error": err_findings_count_inconsistent(expected, counted, award_ref)}
             )
-
     return errors
+
+
+def _normalize_award_ref_lengths(
+    declared_award_ref_max_length,
+    reported_award_ref_max_length,
+    federal_awards,
+    findings_uniform_guidance,
+):
+    """
+    Normalize the lengths of the award references in the Federal Awards and
+    Federal Awards Audit Findings workbooks before validation.
+    """
+    expected_award_refs_count = {}
+    found_award_refs_count = defaultdict(int)
+
+    if declared_award_ref_max_length != reported_award_ref_max_length:
+        # Determine the required padding based on the difference in lengths.
+        diff = abs(reported_award_ref_max_length - declared_award_ref_max_length)
+        padding = "0" * diff
+
+        if declared_award_ref_max_length < reported_award_ref_max_length:
+            # This is means the version of the Federal Awards Audit Findings workbook
+            # is newer than the version of the Federal Awards workbook.
+            for award in federal_awards:
+                award_reference = award.get("award_reference")
+                if award_reference:
+                    award_reference = _pad_award_ref(award_reference, padding)
+                    expected_award_refs_count[award_reference] = award["program"][
+                        "number_of_audit_findings"
+                    ]
+            for finding in findings_uniform_guidance:
+                award_ref = finding["program"]["award_reference"]
+                if award_ref in expected_award_refs_count:
+                    found_award_refs_count[award_ref] += 1
+        else:
+            # This is unlikely to happen. It means the version of
+            # the Federal Awards workbook is newer than
+            # the version of the Federal Awards Audit Findings workbook.
+            for finding in findings_uniform_guidance:
+                award_ref = finding["program"]["award_reference"]
+                if award_ref:
+                    award_ref = _pad_award_ref(award_ref, padding)
+                    if award_ref in expected_award_refs_count:
+                        found_award_refs_count[award_ref] += 1
+    else:
+        # No normalization needed if the lengths match
+        pass
+
+    return expected_award_refs_count, found_award_refs_count
+
+
+def _pad_award_ref(award_ref, padding):
+    return f"{award_ref.split('-')[0]}-{padding}{award_ref.split('-')[1]}"
diff --git a/backend/audit/cross_validation/test_check_award_ref_declaration.py b/backend/audit/cross_validation/test_check_award_ref_declaration.py
@@ -22,6 +22,8 @@ def setUp(self):
         self.award3 = {
             "award_reference": f"AWARD-{generate_random_integer(self.AWARD_MIN *3,self.AWARD_MAX *3)}"
         }
+        self.award_with_longer_ref = {"award_reference": "AWARD-00123"}
+        self.award_with_shorter_ref = {"award_reference": "AWARD-0123"}
 
     def _make_federal_awards(self, award_refs) -> dict:
         return {
@@ -82,3 +84,21 @@ def test_errors_for_findings_with_undeclared_award_refs(self):
         self.assertEqual(len(errors), 1)
         expected_error = err_award_ref_not_declared([self.award2["award_reference"]])
         self.assertIn({"error": expected_error}, errors)
+
+    def test_padding_when_declared_award_ref_max_length_greater(self):
+        """Test case where declared award reference length is greater than reported award reference length."""
+        sac = self._make_sac(
+            [self.award_with_longer_ref], [self.award_with_shorter_ref]
+        )
+        errors = check_award_ref_declaration(sac_validation_shape(sac))
+        # No errors expected
+        self.assertEqual(errors, [])
+
+    def test_padding_when_reported_award_ref_max_length_greater(self):
+        """Test case where reported award reference length is greater than declared award reference length."""
+        sac = self._make_sac(
+            [self.award_with_shorter_ref], [self.award_with_longer_ref]
+        )
+        errors = check_award_ref_declaration(sac_validation_shape(sac))
+        # No errors expected
+        self.assertEqual(errors, [])
diff --git a/backend/audit/cross_validation/test_check_findings_count_consistency.py b/backend/audit/cross_validation/test_check_findings_count_consistency.py
@@ -29,10 +29,12 @@ def _make_federal_awards(self, findings_count) -> dict:
             }
         }
 
-    def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:
+    def _make_findings_uniform_guidance(self, awards, mismatch, padding) -> dict:
         entries = []
         for award in awards["FederalAwards"]["federal_awards"]:
             award_reference = award["award_reference"]
+            if padding:
+                award_reference = f"{award_reference.split('-')[0]}-{padding}{award_reference.split('-')[1]}"
             count = award["program"]["number_of_audit_findings"]
             for _ in range(count + mismatch):
                 entries.append({"program": {"award_reference": award_reference}})
@@ -48,11 +50,11 @@ def _make_findings_uniform_guidance(self, awards, mismatch) -> dict:
 
         return {"FindingsUniformGuidance": findings}
 
-    def _make_sac(self, findings_count, mismatch=0) -> SingleAuditChecklist:
+    def _make_sac(self, findings_count, mismatch=0, padding="") -> SingleAuditChecklist:
         sac = baker.make(SingleAuditChecklist)
         sac.federal_awards = self._make_federal_awards(findings_count)
         sac.findings_uniform_guidance = self._make_findings_uniform_guidance(
-            sac.federal_awards, mismatch
+            sac.federal_awards, mismatch, padding
         )
         return sac
 
@@ -101,3 +103,14 @@ def test_declared_findings_exceed_reported_count(self):
         self._test_findings_count_mismatch(
             generate_random_integer(2, 4), generate_random_integer(-2, -1)
         )
+
+    def test_normalize_award_ref_lengths_with_padding(self):
+        """
+        Ensure that award reference normalization occurs when declared and reported
+        award reference lengths differ. Leading zeros are added appropriately.
+        """
+        sac = self._make_sac(
+            generate_random_integer(self.FINDINGS_MIN, self.FINDINGS_MAX), 0, "0"
+        )
+        errors = check_findings_count_consistency(sac_validation_shape(sac))
+        self.assertEqual(errors, [])
diff --git a/backend/audit/intakelib/checks/check_finding_award_references_pattern.py b/backend/audit/intakelib/checks/check_finding_award_references_pattern.py
@@ -13,7 +13,6 @@
 
 # A version of this regex also exists in Base.libsonnet
 AWARD_REFERENCES_REGEX = r"^AWARD-(?!0{4,5}$)[0-9]{4,5}$"
-AWARD_REFERENCES_REGEX5 = r"^AWARD-(?!0{5}$)[0-9]{5}$"
 
 AWARD_LEN_4_DIGITS = 10
 AWARD_LEN_5_DIGITS = 11