diff --git a/cachito/web/api_v1.py b/cachito/web/api_v1.py index 3766252b1..5343c50e6 100644 --- a/cachito/web/api_v1.py +++ b/cachito/web/api_v1.py @@ -6,7 +6,7 @@ from collections import OrderedDict from copy import deepcopy from datetime import date, datetime -from typing import Any, Dict, List, Set, Union +from typing import Any, Dict, List, Optional, Set, Union, cast import flask import kombu.exceptions @@ -15,14 +15,14 @@ from flask import stream_with_context from flask_login import current_user, login_required from opentelemetry import trace -from sqlalchemy import and_, func +from sqlalchemy import and_, desc, func, or_ from sqlalchemy.orm import joinedload, load_only from werkzeug.exceptions import BadRequest, Forbidden, Gone, InternalServerError, NotFound from cachito.common.checksum import hash_file from cachito.common.packages_data import PackagesData from cachito.common.paths import RequestBundleDir -from cachito.common.utils import b64encode +from cachito.common.utils import b64encode, get_repo_name from cachito.errors import MessageBrokerError, NoWorkers, RequestErrorOrigin, ValidationError from cachito.web import db from cachito.web.content_manifest import BASE_ICM, BASE_SBOM @@ -184,6 +184,68 @@ def get_request(request_id): return flask.jsonify(json) +def get_latest_request() -> flask.Response: + """ + Retrieve the latest request for a repo_name/ref and return as JSON. + + :return: a Flask JSON response + :rtype: flask.Response + :raise NotFound: if the request is not found + """ + # mypy: Connexion ensures that these cannot be None + repo_name = cast(str, flask.request.args.get("repo_name")) + ref = cast(str, flask.request.args.get("ref")) + + request = _get_latest_request_by_repo_name_and_ref(repo_name, ref) + + if not request: + raise NotFound + + return flask.jsonify(request.to_json(verbose=False)) + + +def _get_latest_request_by_repo_name_and_ref(repo_name: str, ref: str) -> Optional[Request]: + """ + Retrieve the latest request for a repo_name/ref. + + The latest request will be the one with the highest id. Initially query the DB + for a request with: + - matching git ref + - repo URL that ends with either repo_name or repo_name.git + - the highest request_id + + Failing the initial match on repo_name, fall back to iterating over the result set + in-order until a match is found (or not found). This is necessary because we don't + know the URL scheme/netloc from the repo_name. + + :param str repo_name: the namespaced repository name + :param str ref: the git ref + :return: a Request object or None + :rtype: Request or None + """ + # The .git ending *may* be present at the end of the Request.repo URL + repo_name_with_git = f"{repo_name}.git" + + query = ( + Request.query.filter(Request.ref == ref) + .filter(or_(Request.repo.endswith(repo_name), Request.repo.endswith(repo_name_with_git))) + .order_by(desc(Request.id)) + ) + + # Check the first result for a matching repo_name + first_request = query.first() + if first_request and get_repo_name(first_request.repo) == repo_name: + return first_request + + # Fall back to iterating over the full result set, loading it in chunks, + # looking for a matching repo_name + for request in query.yield_per(10): + if get_repo_name(request.repo) == repo_name: + return request + + return None + + def get_request_config_files(request_id): """ Retrieve the configuration files associated with the given request. diff --git a/cachito/web/static/api_v1.yaml b/cachito/web/static/api_v1.yaml index 89372c548..ad01fb749 100644 --- a/cachito/web/static/api_v1.yaml +++ b/cachito/web/static/api_v1.yaml @@ -341,6 +341,52 @@ paths: application/json: schema: $ref: '#/components/schemas/RequestUpdate' + "/requests/latest": + get: + operationId: cachito.web.api_v1.get_latest_request + summary: Get the latest request for a repo/ref + description: Return the latest request for a specified repo_name and ref + parameters: + - name: repo_name + required: true + in: query + description: > + The namespaced repository name (namespace/name). The domain and protocol/scheme + must not be present in this param (e.g. https://github.com/my-org/my-repo translates + to my-org/my-repo). + schema: + type: string + minLength: 3 + maxLength: 200 + pattern: '^.*\/[\w.-]+$' + example: release-engineering/retrodep + - name: ref + required: true + in: query + description: The git reference + schema: + type: string + minLength: 40 + maxLength: 40 + pattern: '^[a-f0-9]{40}$' + example: bc9767a71ede6e0084ae4a9e01dcd8b81c30b741 + responses: + "200": + description: The requested Cachito request + content: + application/json: + schema: + $ref: "#/components/schemas/Request" + "404": + description: The request wasn't found + content: + application/json: + schema: + type: object + properties: + error: + type: string + example: The requested resource was not found "/requests/{request_id}/configuration-files": get: operationId: cachito.web.api_v1.get_request_config_files diff --git a/tests/test_api_v1.py b/tests/test_api_v1.py index 87a472906..deeea9c7b 100644 --- a/tests/test_api_v1.py +++ b/tests/test_api_v1.py @@ -545,6 +545,186 @@ def test_datetime_validator(client, date, is_valid, expected_status): assert rv.status_code == expected_status +@pytest.fixture() +def latest_requests_db(app, db, worker_auth_env): + """Add requests to the db for testing the requests/latest endpoint.""" + data = [ + { + "repo": "https://github.com/org/foo.git", + "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/foo.git", + "ref": "b50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/foo.git", + "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/bar.git", + "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://otherforge.com/org/baz.git", + "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/baz.git", + "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/spam.git", + "ref": "d50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/spam", + "ref": "d50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/org/eggs", + "ref": "e50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "https://github.com/git/org/eggs", + "ref": "e50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "git@github.com:org/ham", + "ref": "f50b93a32df1c9d700e3e80996845bc2e13be848", + }, + { + "repo": "git@github.com:org/ham", + "ref": "f50b93a32df1c9d700e3e80996845bc2e13be848", + }, + ] + + for item in data: + with app.test_request_context(environ_base=worker_auth_env): + request = Request.from_json(item) + db.session.add(request) + db.session.commit() + + +@pytest.mark.parametrize( + "query_params, latest_request_id", + [ + pytest.param( + {"repo_name": "org/foo", "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848"}, + 3, + id="same_repo_same_ref", + ), + pytest.param( + {"repo_name": "org/foo", "ref": "b50b93a32df1c9d700e3e80996845bc2e13be848"}, + 2, + id="same_repo_different_ref", + ), + pytest.param( + {"repo_name": "org/bar", "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848"}, + 4, + id="different_repo_same_ref", + ), + pytest.param( + {"repo_name": "org/baz", "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848"}, + 6, + id="different_forge", + ), + pytest.param( + {"repo_name": "org/spam", "ref": "d50b93a32df1c9d700e3e80996845bc2e13be848"}, + 8, + id="with_or_without_dot_git", + ), + pytest.param( + {"repo_name": "org/eggs", "ref": "e50b93a32df1c9d700e3e80996845bc2e13be848"}, + 9, + id="extended_namespace", + ), + pytest.param( + { + "repo_name": "git@github.com:org/ham", + "ref": "f50b93a32df1c9d700e3e80996845bc2e13be848", + }, + 12, + id="ssh_repository_name", + ), + ], +) +def test_get_latest_request(client, latest_requests_db, query_params, latest_request_id): + rv = client.get("/api/v1/requests/latest", query_string=query_params) + assert HTTPStatus.OK == rv.status_code + response = rv.json + assert latest_request_id == response["id"] + + +@pytest.mark.parametrize( + "query_params", + [ + pytest.param( + {"repo_name": "org/foo", "ref": "d50b93a32df1c9d700e3e80996845bc2e13be848"}, + id="ref_not_found", + ), + pytest.param( + {"repo_name": "org/qux", "ref": "a50b93a32df1c9d700e3e80996845bc2e13be848"}, + id="repo_not_found", + ), + ], +) +def test_get_latest_request_not_found(client, latest_requests_db, query_params): + rv = client.get("/api/v1/requests/latest", query_string=query_params) + assert HTTPStatus.NOT_FOUND == rv.status_code + + +@pytest.mark.parametrize( + "query_params, error_str", + [ + pytest.param( + {"repo_name": "org/repo", "ref": "c50b93a32df1c9d700e3e80996845bc2e13be84"}, + "'c50b93a32df1c9d700e3e80996845bc2e13be84' is too short", + id="ref_too_short", + ), + pytest.param( + {"repo_name": "org/repo", "ref": "c50b93a32df1c9d700e3e80996845bc2e13be8489"}, + "'c50b93a32df1c9d700e3e80996845bc2e13be8489' is too long", + id="ref_too_long", + ), + pytest.param( + {"repo_name": "org/repo", "ref": "c50b93a32df1c9d700e*e80996845bc2e13be848"}, + "'c50b93a32df1c9d700e*e80996845bc2e13be848' does not match", + id="ref_invalid_character", + ), + pytest.param( + { + "repo_name": "repo_name=org/" + ("repo" * 51), + "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848", + }, + "reporepo' is too long", + id="repo_name_too_long", + ), + pytest.param( + {"repo_name": "git", "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848"}, + "'git' does not match", + id="invalid_repo_name_format", + ), + pytest.param( + {"repo_name": "org/*", "ref": "c50b93a32df1c9d700e3e80996845bc2e13be848"}, + "'org/*' does not match ", + id="invalid_repo_name_character", + ), + pytest.param( + {"ref": "c50b93a32df1c9d700e3e80996845bc2e13be848"}, + "Missing query parameter 'repo_name'", + id="missing_repo_name", + ), + pytest.param({"repo_name": "org/repo"}, "Missing query parameter 'ref'", id="missing_ref"), + ], +) +def test_get_latest_request_invalid_input(app, client, query_params, error_str): + rv = client.get("/api/v1/requests/latest", query_string=query_params) + assert rv.status_code == 400 + response = rv.json + assert error_str in response["error"] + + def test_fetch_paginated_requests( app, auth_env, client, db, sample_deps_replace, sample_package, worker_auth_env, tmpdir ):