chore: Switch to ruff

open-contracting · Sep 5, 2024 · 8f05317 · 8f05317
1 parent d03ec37
commit 8f05317
Show file tree

Hide file tree

Showing 69 changed files with 436 additions and 464 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,26 +2,18 @@ ci:
   autoupdate_schedule: quarterly
   skip: [pip-compile]
 repos:
-  - repo: https://github.com/psf/black
-    rev: 24.4.2
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.5.0
     hooks:
-      - id: black
-  - repo: https://github.com/pycqa/flake8
-    rev: 7.1.0
-    hooks:
-      - id: flake8
-        additional_dependencies: [flake8-comprehensions]
-  - repo: https://github.com/pycqa/isort
-    rev: 5.13.2
-    hooks:
-      - id: isort
+      - id: ruff
+      - id: ruff-format
   - repo: https://github.com/astral-sh/uv-pre-commit
     rev: 0.4.4
     hooks:
       - id: pip-compile
         name: pip-compile requirements.in
-        args: [requirements.in, -o, requirements.txt, --no-strip-extras]
+        args: [requirements.in, -o, requirements.txt]
       - id: pip-compile
         name: pip-compile requirements_dev.in
-        args: [requirements_dev.in, -o, requirements_dev.txt, --no-strip-extras]
+        args: [requirements_dev.in, -o, requirements_dev.txt]
         files: ^requirements_dev\.(in|txt)$
diff --git a/contracting_process/field_level/definitions.py b/contracting_process/field_level/definitions.py
@@ -26,7 +26,7 @@ def _definitions(properties, path=None, refs=None):
         refs = ()
 
     for key, value in properties.items():
-        new_path = path + (key,)
+        new_path = (*path, key)
         dot_path = ".".join(new_path)
 
         if "object" in value["type"] and "properties" in value:
@@ -58,10 +58,7 @@ def _definitions(properties, path=None, refs=None):
                 if key == "description":
                     checks.append((document_description_length.calculate, document_description_length.name))
                 elif key == "documentType":
-                    if refs[1] == "Implementation":
-                        index = 1
-                    else:
-                        index = 0
+                    index = 1 if refs[1] == "Implementation" else 0
                     checks.append(
                         (
                             functools.partial(document_type.calculate_section, section=refs[index].lower()),
@@ -84,7 +81,7 @@ def _definitions(properties, path=None, refs=None):
             elif refs[-1] == "Tender":
                 if key == "numberOfTenderers":
                     checks.append((number.calculate, number.name))
-            elif refs[-1] == "Value":
+            elif refs[-1] == "Value":  # noqa: SIM102 # consistency
                 if key == "amount" and new_path[-3] in ("transactions", "unit"):
                     checks.append((number.calculate, number.name))
 

diff --git a/contracting_process/field_level/report_examples.py b/contracting_process/field_level/report_examples.py
@@ -118,7 +118,7 @@ def create(dataset_id):
             check_group["passed_examples"] = check_group["passed_examples"].sample
             check_group["failed_examples"] = check_group["failed_examples"].sample
 
-            for check_name, check in check_group["checks"].items():
+            for check in check_group["checks"].values():
                 check["passed_examples"] = check["passed_examples"].sample
                 check["failed_examples"] = check["failed_examples"].sample
 

diff --git a/contracting_process/processor.py b/contracting_process/processor.py
@@ -7,7 +7,7 @@
 from contracting_process.resource_level.definitions import definitions as resource_level_definitions
 from pelican.util import settings
 from pelican.util.getter import get_values
-from pelican.util.services import Json, get_cursor, state, update_items_state
+from pelican.util.services import Json, State, get_cursor, update_items_state
 from pelican.util.workers import is_step_required
 
 logger = logging.getLogger("pelican.contracting_process.processor")
@@ -26,11 +26,13 @@ def do_work(dataset_id, items):
             logger.error("data_item %s has no ocid", item_id)
             continue
         if do_field_level:
-            field_level_check_arglist.append(field_level_checks(data, item_id, dataset_id, do_field_quality))
+            field_level_check_arglist.append(
+                field_level_checks(data, item_id, dataset_id, do_field_quality=do_field_quality)
+            )
         if do_resource_level:
             resource_level_check_arglist.append(resource_level_checks(data, item_id, dataset_id))
 
-    update_items_state(dataset_id, (item_id for _, item_id in items), state.OK)
+    update_items_state(dataset_id, (item_id for _, item_id in items), State.OK)
 
     if do_field_level:
         save_field_level_checks(field_level_check_arglist)
@@ -49,7 +51,7 @@ def resource_level_checks(data, item_id, dataset_id):
     return (Json(result), item_id, dataset_id)
 
 
-def field_level_checks(data, item_id, dataset_id, do_field_quality=True):
+def field_level_checks(data, item_id, dataset_id, *, do_field_quality=True):
     logger.debug("Dataset %s: Item %s: Calculating field-level checks", dataset_id, item_id)
 
     result = {"meta": {"ocid": data["ocid"], "item_id": item_id}, "checks": {}}
@@ -92,7 +94,7 @@ def field_level_checks(data, item_id, dataset_id, do_field_quality=True):
                 else:
                     field_result["path"] = leaf
 
-                for check, check_name in coverage_checks:
+                for check, _ in coverage_checks:
                     check_result = check(item, leaf)
                     passed = check_result["result"]
                     field_result["coverage"]["check_results"].append(check_result)
@@ -101,7 +103,7 @@ def field_level_checks(data, item_id, dataset_id, do_field_quality=True):
                         break
                 else:  # field_result["coverage"]["overall_result"] is True
                     if do_field_quality:
-                        for check, check_name in checks:
+                        for check, _ in checks:
                             check_result = check(item, leaf)
                             passed = check_result["result"]
                             field_result["quality"]["check_results"].append(check_result)

diff --git a/contracting_process/resource_level/coherent/amendments_dates.py b/contracting_process/resource_level/coherent/amendments_dates.py
@@ -1,4 +1,6 @@
 """
+Coherence check for amendment dates.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/awards_status.py b/contracting_process/resource_level/coherent/awards_status.py
@@ -1,6 +1,7 @@
 """
-If an award's ``status`` is inactive ('pending', 'cancelled', 'unsuccessful'), then no contract's ``awardID`` matches
-the award's ``id``.
+If an award's ``status`` is inactive, then no contract's ``awardID`` matches the award's ``id``.
+
+Inactive statuses are: 'pending', 'cancelled' are 'unsuccessful'.
 """
 
 from pelican.util.checks import complete_result_resource, get_empty_result_resource

diff --git a/contracting_process/resource_level/coherent/dates.py b/contracting_process/resource_level/coherent/dates.py
@@ -1,4 +1,6 @@
 """
+Coherence check for the contracting process timeline.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/documents_dates.py b/contracting_process/resource_level/coherent/documents_dates.py
@@ -1,4 +1,6 @@
 """
+Coherence check for document dates.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/milestones_dates.py b/contracting_process/resource_level/coherent/milestones_dates.py
@@ -1,4 +1,6 @@
 """
+Coherence check for milestone dates.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/period.py b/contracting_process/resource_level/coherent/period.py
@@ -1,4 +1,6 @@
 """
+Coherence check for period objects.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py b/contracting_process/resource_level/coherent/procurement_method_vs_number_of_tenderers.py
@@ -25,6 +25,6 @@ def calculate(item):
 
     return complete_result_resource_pass_fail(
         result,
-        number_of_tenderers == 0 or number_of_tenderers == 1,
+        number_of_tenderers in {0, 1},
         {"numberOfTenderers": item["tender"]["numberOfTenderers"]},
     )
diff --git a/contracting_process/resource_level/coherent/release_date.py b/contracting_process/resource_level/coherent/release_date.py
@@ -1,4 +1,6 @@
 """
+Coherence check for release date.
+
 .. seealso::
 
    :func:`pelican.util.checks.coherent_dates_check

diff --git a/contracting_process/resource_level/coherent/tender_status.py b/contracting_process/resource_level/coherent/tender_status.py
@@ -1,6 +1,7 @@
 """
-If ``tender.status`` is incomplete ('planning', 'planned', 'active', 'cancelled', 'unsuccessful' or 'withdrawn'), then
-``awards`` and ``contracts`` are blank.
+If ``tender.status`` is incomplete, then ``awards`` and ``contracts`` are blank.
+
+Incomplete statuses are: 'planning', 'planned', 'active', 'cancelled', 'unsuccessful' are 'withdrawn'.
 """
 
 from pelican.util.checks import complete_result_resource_pass_fail, get_empty_result_resource

diff --git a/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py b/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py
@@ -1,6 +1,7 @@
 """
-For each contract, the sum of its transaction's values is less than or equal to the contract's value, after conversion
-to USD if necessary.
+For each contract, the sum of its transaction's values is less than or equal to the contract's value.
+
+Valeus are converted to USD if necessary.
 
 Since the test operates on all contract and transaction objects, the test silently ignores any missing or non-numeric
 amounts and any missing or unknown currencies. If currency conversion is necessary, but the release date is invalid,

diff --git a/contracting_process/resource_level/consistent/contracts_value.py b/contracting_process/resource_level/consistent/contracts_value.py
@@ -1,6 +1,7 @@
 """
-For each award, the sum of its contracts' values isn't less than 50%, or more than 150%, of the award's value, after
-conversion to USD if necessary.
+For each award, the sum of its contracts' values isn't less than 50%, or more than 150%, of the award's value.
+
+Values are converted to USD if necessary.
 
 Since the test operates on all award and contract values, the test silently ignores:
 

diff --git a/contracting_process/resource_level/consistent/parties_role.py b/contracting_process/resource_level/consistent/parties_role.py
@@ -1,5 +1,7 @@
 """
-For each role of each party, there is an organization reference. The roles to test are:
+For each role of each party, there is an organization reference.
+
+The roles to test are:
 
 -  procuringEntity
 -  tenderer

diff --git a/contracting_process/resource_level/consistent/period_duration_in_days.py b/contracting_process/resource_level/consistent/period_duration_in_days.py
@@ -1,6 +1,8 @@
 """
-For each period, ``durationInDays`` is equal to the difference between ``startDate`` and ``endDate``. If ``endDate`` is
-blank or unparsable, then ``durationInDays`` is equal to the difference between ``startDate`` and ``maxExtentDate``.
+For each period, ``durationInDays`` is equal to the difference between ``startDate`` and ``endDate``.
+
+If ``endDate`` is blank or unparsable, then, for each period, ``durationInDays`` is equal to the difference between
+``startDate`` and ``maxExtentDate``.
 
 Since the test operates on all period objects, the test silently ignores any dates that can't be parsed.
 """

diff --git a/contracting_process/resource_level/consistent/tender_value.py b/contracting_process/resource_level/consistent/tender_value.py
@@ -1,6 +1,7 @@
 """
-``planning.budget.amount`` isn't less than 50%, or more than 150%, of ``tender.value``, after conversion to USD if
-necessary.
+``planning.budget.amount`` isn't less than 50%, or more than 150%, of ``tender.value``.
+
+Values are converted to USD if necessary.
 
 The test is skipped if an amount is missing, zero or non-numeric, if a currency is missing or unknown, if the two
 amounts aren't both positive or both negative, or if currency conversion is necessary and the release date is invalid,

diff --git a/contracting_process/resource_level/reference/contract_in_awards.py b/contracting_process/resource_level/reference/contract_in_awards.py
@@ -32,27 +32,27 @@ def calculate(item):
         if not deep_has(value["value"], "awardID"):
             failed_paths.append({"path": path, "awardID": None, "reason": "contract has no awardID"})
         else:
-            awardID = value["value"]["awardID"]
-            if awardID not in id_counts:
+            award_id = value["value"]["awardID"]
+            if award_id not in id_counts:
                 if not ids:
-                    failed_paths.append({"path": path, "awardID": awardID, "reason": "no award has an id"})
-                elif str(awardID) in id_counts_str:
+                    failed_paths.append({"path": path, "awardID": award_id, "reason": "no award has an id"})
+                elif str(award_id) in id_counts_str:
                     failed_paths.append(
-                        {"path": path, "awardID": awardID, "reason": "id is not the same type as awardID"}
+                        {"path": path, "awardID": award_id, "reason": "id is not the same type as awardID"}
                     )
                 else:
-                    failed_paths.append({"path": path, "awardID": awardID, "reason": "no award matches the awardID"})
-            elif id_counts[awardID] > 1:
+                    failed_paths.append({"path": path, "awardID": award_id, "reason": "no award matches the awardID"})
+            elif id_counts[award_id] > 1:
                 failed_paths.append(
-                    {"path": path, "awardID": awardID, "reason": "multiple awards match the awardID"}  # (same type)
+                    {"path": path, "awardID": award_id, "reason": "multiple awards match the awardID"}  # (same type)
                 )
             # Multiple matches across different types are currently designed to pass. (This assumes users do not coerce
             # IDs to strings.) If we change this to a failure, uncomment the following lines.
             #
-            # elif id_counts_str[str(awardID)] > 1:
-            #     failed_paths.append(
-            #       {"path": path, "awardID": awardID, "reason": "multiple awards match the awardID (types differ)"}
-            #     )
+            # > elif id_counts_str[str(award_id)] > 1:
+            # >     failed_paths.append(
+            # >       {"path": path, "awardID": award_id, "reason": "multiple awards match the awardID (types differ)"}
+            # >     )
             else:
                 pass_count += 1
 

diff --git a/contracting_process/resource_level/reference/parties.py b/contracting_process/resource_level/reference/parties.py
@@ -43,10 +43,10 @@ def calculate_path(item, path):
             # Multiple matches across different types are currently designed to pass. (This assumes users do not coerce
             # IDs to strings.) If we change this to a failure, uncomment the following lines.
             #
-            # elif id_counts_str[str(ident)] > 1:
-            #     failed_paths.append(
-            #       {"path": path, "id": ident, "reason": "multiple parties match the referencing id (types differ)"}
-            #     )
+            # > elif id_counts_str[str(ident)] > 1:
+            # >     failed_paths.append(
+            # >       {"path": path, "id": ident, "reason": "multiple parties match the referencing id (types differ)"}
+            # >     )
             else:
                 pass_count += 1
 

diff --git a/dataset/consistent/related_process_title.py b/dataset/consistent/related_process_title.py
@@ -1,6 +1,7 @@
 """
-A related process object has the same value for its ``title`` field as the ``tender.title`` field of the compiled
-release it references. The related process fields are:
+The ``title`` of a related process is equal to the ``tender.title`` of the compiled release it references.
+
+The related process fields are:
 
 -  ``contracts.relatedProcesses``
 -  ``relatedProcesses``
@@ -84,7 +85,7 @@ def add_item(scope, item, item_id):
 def get_result(scope):
     result = get_empty_result_dataset(version)
 
-    for ref, example in scope["related_processes"].items():
+    for example in scope["related_processes"].values():
         if scope["ocids"][example["related_ocid"]]["found"]:
             scope = _add_example(scope, example)
 

diff --git a/dataset/distribution/buyer.py b/dataset/distribution/buyer.py
@@ -1,6 +1,8 @@
 """
-Fewer than 50% of all buyers are identified in only one compiled release. Failure indicates issues in buyer
-identification. Buyers are identified by ``buyer.identifier.scheme`` and ``buyer.identifier.id``.
+Fewer than 50% of all buyers are identified in only one compiled release.
+
+Failure indicates issues in buyer identification. Buyers are identified by ``buyer.identifier.scheme`` and
+``buyer.identifier.id``.
 
 The test is skipped if the ``buyer.identifier.scheme`` and ``buyer.identifier.id`` fields are both present in fewer
 than 1,000 compiled releases.

diff --git a/dataset/distribution/buyer_repetition.py b/dataset/distribution/buyer_repetition.py
@@ -1,6 +1,8 @@
 """
-The most common buyer is identified in 1% to 50% of compiled releases. Failure indicates issues in buyer identification
-or buyer over-representation. Buyers are identified by ``buyer.identifier.scheme`` and ``buyer.identifier.id``.
+The most common buyer is identified in 1% to 50% of compiled releases.
+
+Failure indicates issues in buyer identification or buyer over-representation. Buyers are identified by
+``buyer.identifier.scheme`` and ``buyer.identifier.id``.
 
 The test is skipped if the ``buyer.identifier.scheme`` and ``buyer.identifier.id`` fields are both present in fewer
 than 1,000 compiled releases.

diff --git a/dataset/distribution/code_distribution.py b/dataset/distribution/code_distribution.py
@@ -1,14 +1,17 @@
 """
-If ``test_values`` is set, then each test value occurs in between 0.1% and 99% of cases. Otherwise, no test is
-performed. The test is skipped if the ``paths`` are never present.
+If ``test_values`` is set, then each test value occurs in between 0.1% and 99% of cases.
+
+Otherwise, no test is performed.
+
+The test is skipped if the ``paths`` are never present.
 """
 
 from pelican.util.checks import ReservoirSampler, get_empty_result_dataset
 from pelican.util.getter import get_values
 
 
 class CodeDistribution:
-    def __init__(self, paths, test_values=[], limit=20):
+    def __init__(self, paths, test_values=(), limit=20):
         self.paths = paths
         self.test_values = set(test_values)
         self.limit = limit

diff --git a/dataset/distribution/value.py b/dataset/distribution/value.py
@@ -1,6 +1,7 @@
 """
-The total value of the top 1% of values is less than the total value of the remaining values. Failure indicates extreme
-outliers in the top 1%. All values are converted to USD as of the compiled release's ``date``.
+The total value of the top 1% of values is less than the total value of the remaining values.
+
+Failure indicates extreme outliers in the top 1%. Values are converted to USD as of the compiled release's ``date``.
 
 The test is skipped if fewer than 100 values are included. A value is excluded if an amount is missing, negative or
 non-numeric, if a currency is missing or unknown, or if currency conversion is necessary and the release date is

diff --git a/dataset/metadata_aggregator.py b/dataset/metadata_aggregator.py
@@ -38,6 +38,8 @@ def get_result(scope):
 
 def get_kingfisher_metadata(kingfisher_process_cursor, collection_id):
     """
+    Return metadata from Kingfisher Process.
+
     :param kingfisher_process_cursor: the cursor must be initialized with `cursor_factory=psycopg2.extras.DictCursor`
     :param collection_id: the ID of the compiled collection
     """
@@ -192,7 +194,7 @@ def get_kingfisher_metadata(kingfisher_process_cursor, collection_id):
         for repository_url in deep_get(row["data"], "extensions", list):
             try:
                 response = requests.get(repository_url, timeout=30)
-                if response.status_code != 200:
+                if response.status_code != requests.codes.ok:
                     continue
 
                 extension = response.json()