From 410547ee7a9169510321a161ca505758d3706ac6 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Mon, 11 Mar 2024 16:09:31 +0100 Subject: [PATCH 01/49] pre-commit triggers pytest, no need to run pytest again --- .github/workflows/pytest-tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest-tests.yml b/.github/workflows/pytest-tests.yml index eb2d5d59..65e2d1ab 100644 --- a/.github/workflows/pytest-tests.yml +++ b/.github/workflows/pytest-tests.yml @@ -41,10 +41,10 @@ jobs: source venv/bin/activate pre-commit run --all - - name: Test with pytest - run: | - source venv/bin/activate - pytest ./src/tests/ + # - name: Test with pytest + # run: | + # source venv/bin/activate + # pytest ./src/tests/ From 63fb96d0e69c58f1a9a525102ca9bc41662b23a2 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Mon, 11 Mar 2024 16:12:09 +0100 Subject: [PATCH 02/49] cleanup --- .github/workflows/pytest-tests.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/pytest-tests.yml b/.github/workflows/pytest-tests.yml index 65e2d1ab..868c2293 100644 --- a/.github/workflows/pytest-tests.yml +++ b/.github/workflows/pytest-tests.yml @@ -41,10 +41,7 @@ jobs: source venv/bin/activate pre-commit run --all - # - name: Test with pytest - # run: | - # source venv/bin/activate - # pytest ./src/tests/ + From 305720601068e6f63dfbfd107e69dcdb15dfecbd Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 14 Mar 2024 09:32:39 +0100 Subject: [PATCH 03/49] chenge in user/permissions of scripts --- scripts/backup.sh | 0 scripts/mysql_dump.sh | 0 scripts/realm_export.sh | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/backup.sh mode change 100644 => 100755 scripts/mysql_dump.sh mode change 100644 => 100755 scripts/realm_export.sh diff --git a/scripts/backup.sh b/scripts/backup.sh old mode 100644 new mode 100755 diff --git a/scripts/mysql_dump.sh b/scripts/mysql_dump.sh old mode 100644 new mode 100755 diff --git a/scripts/realm_export.sh b/scripts/realm_export.sh old mode 100644 new mode 100755 From 0977315d6561187a9c4c50dabe91c7f3bca7a24d Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 14 Mar 2024 14:23:06 +0100 Subject: [PATCH 04/49] change realm_export to include users --- scripts/realm_export.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/realm_export.sh b/scripts/realm_export.sh index 58d3ea9a..bdb39799 100755 --- a/scripts/realm_export.sh +++ b/scripts/realm_export.sh @@ -9,7 +9,7 @@ source .env DATA_PATH=$(realpath "$DATA_PATH") LOCAL_BACKUP_PATH="$DATA_PATH"/keycloak_realm -docker exec -i keycloak /bin/bash -c "/opt/keycloak/bin/kc.sh export --file /tmp/aiod.json --realm aiod" +docker exec -i keycloak /bin/bash -c "/opt/keycloak/bin/kc.sh export --file /tmp/aiod.json --realm aiod --users realm_file" if [ ! -d "$LOCAL_BACKUP_PATH" ]; then mkdir "$LOCAL_BACKUP_PATH" From 1edf8bd552f4b671c6dea58c6895cfd147b3f095 Mon Sep 17 00:00:00 2001 From: mrorro Date: Fri, 15 Mar 2024 15:52:47 +0100 Subject: [PATCH 05/49] Minor typos in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2bf73de..bf6e16a4 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ Information on how to install Docker is found in [their documentation](https://d docker compose --profile examples up -d ``` -starts the MYSQL Server, the REST API, Keycloak for Identy and access management and Nginx for reverse proxing. \ +starts the MYSQL Server, the REST API, Keycloak for Identity and access management and Nginx for reverse proxying. \ Once started, you should be able to visit the REST API server at: http://localhost and Keycloak at http://localhost/aiod-auth \ To authenticate to the REST API swagger interface the predefined user is: user, and password: password \ To authenticate as admin to Keycloak the predefined user is: admin and password: password \ From 00fb5d9372a055dd8a2656a8bd59da807cb94b6b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Mar 2024 11:23:14 +0000 Subject: [PATCH 06/49] Bump pre-commit from 3.6.0 to 3.7.0 Bumps [pre-commit](https://github.com/pre-commit/pre-commit) from 3.6.0 to 3.7.0. - [Release notes](https://github.com/pre-commit/pre-commit/releases) - [Changelog](https://github.com/pre-commit/pre-commit/blob/main/CHANGELOG.md) - [Commits](https://github.com/pre-commit/pre-commit/compare/v3.6.0...v3.7.0) --- updated-dependencies: - dependency-name: pre-commit dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 39339db2..abce2405 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dev = [ "pytest-asyncio==0.23.2", "pytest-dotenv==0.5.2", "pytest-xdist==3.5.0", - "pre-commit==3.6.0", + "pre-commit==3.7.0", "responses==0.24.1", "freezegun==1.4.0", ] From e062d14b1474c3f4d915c1b3e146770258199ed3 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Mon, 8 Apr 2024 11:31:00 +0100 Subject: [PATCH 07/49] change permission of scripts --- scripts/mysql_restore.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/mysql_restore.sh diff --git a/scripts/mysql_restore.sh b/scripts/mysql_restore.sh old mode 100644 new mode 100755 From eb9f6446494716951dd7e8da8bae851f1f43b54a Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 11 Apr 2024 17:31:04 +0100 Subject: [PATCH 08/49] hide fields for persons from drupal platform --- src/authentication.py | 34 ++++++++++++ src/routers/resource_router.py | 54 ++++++++++++------- src/routers/resource_routers/person_router.py | 20 +++++++ .../resource_routers/test_router_person.py | 32 +++++++++++ src/tests/testutils/default_instances.py | 8 +++ 5 files changed, 129 insertions(+), 19 deletions(-) diff --git a/src/authentication.py b/src/authentication.py index 0496dff4..d178d7d7 100644 --- a/src/authentication.py +++ b/src/authentication.py @@ -17,6 +17,7 @@ performs a separate authorization request. The only downside is the overhead of the additional keycloak requests - if that becomes prohibitive in the future, we should reevaluate this design. """ + import logging import os @@ -97,3 +98,36 @@ async def get_current_user(token=Security(oidc)) -> User: detail="Invalid authentication token", headers={"WWW-Authenticate": "Bearer"}, ) + + +async def get_current_user_without_exception(token=Security(oidc)) -> User | None: + + if not client_secret: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="This instance is not configured correctly. You'll need to set the env var " + "KEYCLOAK_CLIENT_SECRET (e.g. in src/.env). You need to obtain this secret " + "from a Keycloak Administrator of AIoD.", + ) + if not token: + return None + try: + token = token.replace("Bearer ", "") + + # query the authorization server to determine the active state of this token and to + # determine meta-information. + userinfo = keycloak_openid.introspect(token) + + if not userinfo.get("active", False): + logging.error("Invalid userinfo or inactive user.") + return None + return User( + name=userinfo["username"], roles=set(userinfo.get("realm_access", {}).get("roles", [])) + ) + except Exception as e: + logging.error(f"Error while checking the access token: '{e}'") + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid authentication token", + headers={"WWW-Authenticate": "Bearer"}, + ) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 84b0b389..3481c1b7 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -14,7 +14,7 @@ from sqlmodel import SQLModel, Session, select, Field from starlette.responses import JSONResponse -from authentication import get_current_user, User +from authentication import get_current_user, User, get_current_user_without_exception from config import KEYCLOAK_CONFIG from converters.schema_converters.schema_converter import SchemaConverter from database.model.ai_resource.resource import AbstractAIResource @@ -204,7 +204,13 @@ def create(self, url_prefix: str) -> APIRouter: ) return router - def get_resources(self, schema: str, pagination: Pagination, platform: str | None = None): + def get_resources( + self, + schema: str, + pagination: Pagination, + user: User | None = None, + platform: str | None = None, + ): """Fetch all resources of this platform in given schema, using pagination""" _raise_error_on_invalid_schema(self._possible_schemas, schema) with DbSession() as session: @@ -214,24 +220,14 @@ def get_resources(self, schema: str, pagination: Pagination, platform: str | Non if schema != "aiod" else self.resource_class_read.from_orm ) - where_clause = and_( - is_(self.resource_class.date_deleted, None), - (self.resource_class.platform == platform) if platform is not None else True, - ) - query = ( - select(self.resource_class) - .where(where_clause) - .offset(pagination.offset) - .limit(pagination.limit) - ) - - return self._wrap_with_headers( - [convert_schema(resource) for resource in session.scalars(query).all()] - ) + resources = self._retrieve_resources(session, pagination, user, platform) + return self._wrap_with_headers([convert_schema(resource) for resource in resources]) except Exception as e: raise as_http_exception(e) - def get_resource(self, identifier: str, schema: str, platform: str | None = None): + def get_resource( + self, identifier: str, schema: str, user: User | None = None, platform: str | None = None + ): """ Get the resource identified by AIoD identifier (if platform is None) or by platform AND platform-identifier (if platform is not None), return in given schema. @@ -256,8 +252,11 @@ def get_resources_func(self): def get_resources( pagination: Pagination = Depends(), schema: self._possible_schemas_type = "aiod", # type:ignore + user: User | None = Depends(get_current_user_without_exception), ): - resources = self.get_resources(pagination=pagination, schema=schema, platform=None) + resources = self.get_resources( + pagination=pagination, schema=schema, user=user, platform=None + ) return resources return get_resources @@ -334,8 +333,11 @@ def get_resource_func(self): def get_resource( identifier: str, schema: self._possible_schemas_type = "aiod", # type: ignore + user: User | None = Depends(get_current_user_without_exception), ): - resource = self.get_resource(identifier=identifier, schema=schema, platform=None) + resource = self.get_resource( + identifier=identifier, schema=schema, user=user, platform=None + ) return self._wrap_with_headers(resource) return get_resource @@ -525,6 +527,20 @@ def _retrieve_resource(self, session, identifier, platform=None): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"{name} {msg}") return resource + def _retrieve_resources(self, session, pagination, user=None, platform=None): + where_clause = and_( + is_(self.resource_class.date_deleted, None), + (self.resource_class.platform == platform) if platform is not None else True, + ) + query = ( + select(self.resource_class) + .where(where_clause) + .offset(pagination.offset) + .limit(pagination.limit) + ) + resources = session.scalars(query).all() + return resources + @property def _possible_schemas(self) -> list[str]: return ["aiod"] + list(self.schema_converters.keys()) diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index f77768cf..75b3e0d4 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -1,5 +1,6 @@ from database.model.agent.person import Person from routers.resource_router import ResourceRouter +from authentication import User class PersonRouter(ResourceRouter): @@ -18,3 +19,22 @@ def resource_name_plural(self) -> str: @property def resource_class(self) -> type[Person]: return Person + + @staticmethod + def _verify_user_roles(person, user: User | None): + if (person.platform == "drupal") and not ( + user and user.has_role("full_view_drupal_resources") + ): + person.name = "******" + person.given_name = "******" + person.surname = "******" + return person + + def _retrieve_resource(self, session, identifier, user=None, platform=None): + person = super()._retrieve_resource(session, identifier, platform) + return self._verify_user_roles(person, user) + + def _retrieve_resources(self, session, pagination, user=None, platform=None): + persons = super()._retrieve_resources(session, pagination, user, platform) + persons = [self._verify_user_roles(person, user) for person in persons] + return persons diff --git a/src/tests/routers/resource_routers/test_router_person.py b/src/tests/routers/resource_routers/test_router_person.py index 5bf048d8..7814d88f 100644 --- a/src/tests/routers/resource_routers/test_router_person.py +++ b/src/tests/routers/resource_routers/test_router_person.py @@ -6,6 +6,7 @@ from authentication import keycloak_openid from database.model.agent.contact import Contact from database.model.agent.person import Person +from database.model.platform.platform import Platform from database.session import DbSession @@ -48,3 +49,34 @@ def test_happy_path( assert response_json["price_per_hour_euro"] == 10.50 assert response_json["wants_to_be_contacted"] assert response_json["contact_details"] == 1 + + +def test_privacy( + client: TestClient, + mocked_privileged_token: Mock, + platform: Platform, + person: Person, + contact: Contact, +): + keycloak_openid.introspect = mocked_privileged_token + + with DbSession() as session: + platform.name = "drupal" + session.add(platform) + person.platform = "drupal" + person.platform_resource_identifier = "2" + person.given_name = "Joe" + person.surname = "Doe" + session.add(person) + session.add(contact) + session.commit() + + response = client.get("/persons/v1/") + assert response.status_code == 200, response.json() + + response_json = response.json() + + assert len(response_json) == 1 + assert response_json[0]["name"] == "******" + assert response_json[0]["given_name"] == "******" + assert response_json[0]["surname"] == "******" diff --git a/src/tests/testutils/default_instances.py b/src/tests/testutils/default_instances.py index c6f3f15d..6cb8a9cc 100644 --- a/src/tests/testutils/default_instances.py +++ b/src/tests/testutils/default_instances.py @@ -3,6 +3,7 @@ This way you have easy access to, for instance, an AIoDDataset filled with default values. """ + import copy import json @@ -16,6 +17,7 @@ from database.model.dataset.dataset import Dataset from database.model.knowledge_asset.publication import Publication from database.model.models_and_experiments.experiment import Experiment +from database.model.platform.platform import Platform from database.model.resource_read_and_create import resource_create from database.model.serializers import deserialize_resource_relationships from database.session import DbSession @@ -116,6 +118,12 @@ def experiment(body_asset) -> Experiment: return _create_class_with_body(Experiment, body) +@pytest.fixture +def platform() -> Platform: + body = {"name": "aiod"} + return _create_class_with_body(Platform, body) + + def _create_class_with_body(clz, body: dict): pydantic_class = resource_create(clz) res_create = pydantic_class(**body) From 73e047fac6bbcbb75a35d66f1ea8584d7c2131f8 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 12 Apr 2024 09:56:28 +0100 Subject: [PATCH 09/49] fix GET endpoint to evaluate user authentication --- src/routers/resource_router.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 3481c1b7..b845436d 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -235,7 +235,7 @@ def get_resource( _raise_error_on_invalid_schema(self._possible_schemas, schema) try: with DbSession() as session: - resource = self._retrieve_resource(session, identifier, platform=platform) + resource = self._retrieve_resource(session, identifier, user, platform=platform) if schema != "aiod": return self.schema_converters[schema].convert(session, resource) return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) @@ -497,7 +497,7 @@ def delete_resource( return delete_resource - def _retrieve_resource(self, session, identifier, platform=None): + def _retrieve_resource(self, session, identifier, user=None, platform=None): if platform is None: query = select(self.resource_class).where(self.resource_class.identifier == identifier) else: From fbe4f72919a5a65dcaf6c9789dfab803ff42e876 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 12 Apr 2024 10:01:43 +0100 Subject: [PATCH 10/49] hide email information when user is not authenticated --- .../resource_routers/contact_router.py | 30 +++++++++++++++++++ .../resource_routers/test_router_contact.py | 6 ++-- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index 6a4dadb1..1bbd3405 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -1,8 +1,12 @@ +from authentication import User from database.model.agent.contact import Contact +from database.model.agent.email import Email from database.model.agent.organisation import Organisation from database.model.agent.person import Person from routers.resource_router import ResourceRouter +from sqlmodel import select + class ContactRouter(ResourceRouter): def __init__(self): @@ -30,3 +34,29 @@ def resource_name_plural(self) -> str: @property def resource_class(self) -> type[Contact]: return Contact + + @staticmethod + def _verify_user_roles( + session, + contact: Contact, + user: User | None, + ): + if not user: + email_mask = "******" + # This ensures that the API doesn't break in case the email mask exists in + # in the DB + email = session.exec(select(Email).where(Email.name == email_mask)).first() + if not email: + email = Email(name=email_mask) + session.add(email) + contact.email = [email] * len(contact.email) + return contact + + def _retrieve_resource(self, session, identifier, user=None, platform=None): + contact = super()._retrieve_resource(session, identifier, platform) + return self._verify_user_roles(session, contact, user) + + def _retrieve_resources(self, session, pagination, user=None, platform=None): + contacts = super()._retrieve_resources(session, pagination, user, platform) + contacts = [self._verify_user_roles(session, contact, user) for contact in contacts] + return contacts diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index d0d725c4..d1955825 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -27,7 +27,7 @@ def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asse response = client.post("/contacts/v1", json=body, headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() - response = client.get("/contacts/v1/1") + response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() response_json = response.json() @@ -59,11 +59,11 @@ def test_post_duplicate_email( response = client.post("/contacts/v1", json=body2, headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() - contact = client.get("/contacts/v1/2").json() + contact = client.get("/contacts/v1/2", headers={"Authorization": "Fake token"}).json() assert set(contact["email"]) == {"b@example.com", "c@example.com"} body3 = {"email": ["d@example.com", "b@example.com"]} client.put("/contacts/v1/1", json=body3, headers={"Authorization": "Fake token"}) - contact = client.get("/contacts/v1/2").json() + contact = client.get("/contacts/v1/2", headers={"Authorization": "Fake token"}).json() msg = "changing emails of contact 1 should not change emails of contact 2." assert set(contact["email"]) == {"b@example.com", "c@example.com"}, msg From f4779fa9de52e0e38c03cef57e1462ebe78c9c8e Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 12 Apr 2024 11:17:10 +0100 Subject: [PATCH 11/49] rename functions --- src/authentication.py | 13 ++++++++++--- src/main.py | 5 +++-- src/routers/resource_router.py | 12 ++++++------ .../upload_router_huggingface.py | 4 ++-- .../uploader_routers/upload_router_zenodo.py | 4 ++-- src/tests/test_authentication.py | 17 +++++++++-------- 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/authentication.py b/src/authentication.py index d178d7d7..c33cd3d9 100644 --- a/src/authentication.py +++ b/src/authentication.py @@ -56,7 +56,7 @@ def has_any_role(self, *roles: str) -> bool: return bool(set(roles) & self.roles) -async def get_current_user(token=Security(oidc)) -> User: +async def get_current_user_or_raise_exception(token=Security(oidc)) -> User: """ Use this function in Depends() to force authentication. Check the roles of the user for authorization. @@ -100,8 +100,16 @@ async def get_current_user(token=Security(oidc)) -> User: ) -async def get_current_user_without_exception(token=Security(oidc)) -> User | None: +async def get_current_user(token=Security(oidc)) -> User | None: + """ + Use this function in Depends() to ask for authentication. + Check whether the user is authenticated or not. + Raises: + HTTPException with status 401 on any problem with the token + (we don't want to leak information), + and status 500 on any request if Keycloak is configured incorrectly. + """ if not client_secret: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -113,7 +121,6 @@ async def get_current_user_without_exception(token=Security(oidc)) -> User | Non return None try: token = token.replace("Bearer ", "") - # query the authorization server to determine the active state of this token and to # determine meta-information. userinfo = keycloak_openid.introspect(token) diff --git a/src/main.py b/src/main.py index cd816744..4484fb50 100644 --- a/src/main.py +++ b/src/main.py @@ -4,6 +4,7 @@ Note: order matters for overloaded paths (https://fastapi.tiangolo.com/tutorial/path-params/#order-matters). """ + import argparse import pkg_resources @@ -12,7 +13,7 @@ from fastapi.responses import HTMLResponse from sqlmodel import select -from authentication import get_current_user, User +from authentication import get_current_user_or_raise_exception, User from config import KEYCLOAK_CONFIG from database.deletion.triggers import add_delete_triggers from database.model.concept.concept import AIoDConcept @@ -62,7 +63,7 @@ def home() -> str: """ @app.get(url_prefix + "/authorization_test") - def test_authorization(user: User = Depends(get_current_user)) -> User: + def test_authorization(user: User = Depends(get_current_user_or_raise_exception)) -> User: """ Returns the user, if authenticated correctly. """ diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index b845436d..0efc3ac9 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -14,7 +14,7 @@ from sqlmodel import SQLModel, Session, select, Field from starlette.responses import JSONResponse -from authentication import get_current_user, User, get_current_user_without_exception +from authentication import get_current_user_or_raise_exception, User, get_current_user from config import KEYCLOAK_CONFIG from converters.schema_converters.schema_converter import SchemaConverter from database.model.ai_resource.resource import AbstractAIResource @@ -252,7 +252,7 @@ def get_resources_func(self): def get_resources( pagination: Pagination = Depends(), schema: self._possible_schemas_type = "aiod", # type:ignore - user: User | None = Depends(get_current_user_without_exception), + user: User | None = Depends(get_current_user), ): resources = self.get_resources( pagination=pagination, schema=schema, user=user, platform=None @@ -333,7 +333,7 @@ def get_resource_func(self): def get_resource( identifier: str, schema: self._possible_schemas_type = "aiod", # type: ignore - user: User | None = Depends(get_current_user_without_exception), + user: User | None = Depends(get_current_user), ): resource = self.get_resource( identifier=identifier, schema=schema, user=user, platform=None @@ -379,7 +379,7 @@ def register_resource_func(self): def register_resource( resource_create: clz_create, # type: ignore - user: User = Depends(get_current_user), + user: User = Depends(get_current_user_or_raise_exception), ): if not user.has_any_role( KEYCLOAK_CONFIG.get("role"), @@ -423,7 +423,7 @@ def put_resource_func(self): def put_resource( identifier: int, resource_create_instance: clz_create, # type: ignore - user: User = Depends(get_current_user), + user: User = Depends(get_current_user_or_raise_exception), ): if not user.has_any_role( KEYCLOAK_CONFIG.get("role"), @@ -467,7 +467,7 @@ def delete_resource_func(self): def delete_resource( identifier: str, - user: User = Depends(get_current_user), + user: User = Depends(get_current_user_or_raise_exception), ): with DbSession() as session: if not user.has_any_role( diff --git a/src/routers/uploader_routers/upload_router_huggingface.py b/src/routers/uploader_routers/upload_router_huggingface.py index 1a847e44..70e4cb7c 100644 --- a/src/routers/uploader_routers/upload_router_huggingface.py +++ b/src/routers/uploader_routers/upload_router_huggingface.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, Depends from fastapi import File, Query, UploadFile -from authentication import User, get_current_user +from authentication import User, get_current_user_or_raise_exception from routers.uploader_router import UploaderRouter from uploaders.hugging_face_uploader import HuggingfaceUploader @@ -24,7 +24,7 @@ def huggingface_upload( username: str = Query( ..., title="Huggingface username", description="The username of HuggingFace" ), - user: User = Depends(get_current_user), + user: User = Depends(get_current_user_or_raise_exception), ) -> int: """ Use this endpoint to upload a file (content) to Hugging Face using diff --git a/src/routers/uploader_routers/upload_router_zenodo.py b/src/routers/uploader_routers/upload_router_zenodo.py index 80278624..780fb656 100644 --- a/src/routers/uploader_routers/upload_router_zenodo.py +++ b/src/routers/uploader_routers/upload_router_zenodo.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Depends from fastapi import File, Query, UploadFile, Path -from authentication import User, get_current_user +from authentication import User, get_current_user_or_raise_exception from uploaders.zenodo_uploader import ZenodoUploader from routers.uploader_router import UploaderRouter @@ -32,7 +32,7 @@ def zenodo_upload( ), ] = False, token: str = Query(title="Zenodo Token", description="The access token of Zenodo"), - user: User = Depends(get_current_user), + user: User = Depends(get_current_user_or_raise_exception), ) -> int: """ Use this endpoint to upload a file (content) to Zenodo using diff --git a/src/tests/test_authentication.py b/src/tests/test_authentication.py index 00ccba37..a8de8cf9 100644 --- a/src/tests/test_authentication.py +++ b/src/tests/test_authentication.py @@ -1,4 +1,5 @@ -"""Unittests for the behaviour of get_current_user().""" +"""Unittests for the behaviour of get_current_user_or_raise_exception().""" + import inspect from unittest.mock import Mock @@ -8,14 +9,14 @@ from starlette import status -from authentication import get_current_user, keycloak_openid, User +from authentication import get_current_user_or_raise_exception, keycloak_openid, User from tests.testutils.mock_keycloak import MockedKeycloak, TestUserType @pytest.mark.asyncio async def test_happy_path(): with MockedKeycloak() as _: - user = await get_current_user(token="Bearer mocked") + user = await get_current_user_or_raise_exception(token="Bearer mocked") assert user.name == "user" assert set(user.roles) == {"offline_access", "uma_authorization", "default-roles-aiod"} @@ -23,7 +24,7 @@ async def test_happy_path(): @pytest.mark.asyncio async def test_happy_path_privileged(): with MockedKeycloak(type_=TestUserType.privileged) as _: - user = await get_current_user(token="Bearer mocked") + user = await get_current_user_or_raise_exception(token="Bearer mocked") assert user.name == "user" assert set(user.roles) == { "offline_access", @@ -40,7 +41,7 @@ def test_get_current_user_leaks_no_information(): our application if it is not necessary. Moreover, the User class is returned by the authorization_test endpoint. """ - assert inspect.signature(get_current_user).return_annotation == User + assert inspect.signature(get_current_user_or_raise_exception).return_annotation == User assert set(inspect.get_annotations(User)) == {"name", "roles"} @@ -48,7 +49,7 @@ def test_get_current_user_leaks_no_information(): async def test_inactive_user(): with MockedKeycloak(type_=TestUserType.inactive) as _: with pytest.raises(HTTPException) as exception_info: - await get_current_user(token="Bearer mocked") + await get_current_user_or_raise_exception(token="Bearer mocked") assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED assert exception_info.value.detail == "Invalid authentication token" @@ -57,7 +58,7 @@ async def test_inactive_user(): @pytest.mark.asyncio async def test_unauthenticated(): with pytest.raises(HTTPException) as exception_info: - await get_current_user(token=None) + await get_current_user_or_raise_exception(token=None) assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED assert ( exception_info.value.detail @@ -79,6 +80,6 @@ async def test_keycloak_error(): ) ) with pytest.raises(HTTPException) as exception_info: - await get_current_user(token="Bearer mocked") + await get_current_user_or_raise_exception(token="Bearer mocked") assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED assert exception_info.value.detail == "Invalid authentication token" From b059c52506b1423a085aaaff937abc4353899c83 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 12 Apr 2024 15:10:59 +0100 Subject: [PATCH 12/49] add documentation --- src/routers/resource_router.py | 46 ++++++++++++++----- .../resource_routers/contact_router.py | 36 +++++++++++---- src/routers/resource_routers/person_router.py | 32 ++++++++++--- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 0efc3ac9..ac562d35 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -2,8 +2,7 @@ import datetime import traceback from functools import partial -from typing import Literal, Union, Any, Annotated -from typing import TypeVar, Type +from typing import Annotated, Any, Literal, Sequence, Type, TypeVar, Union from wsgiref.handlers import format_date_time from fastapi import APIRouter, Depends, HTTPException, status, Query, Path @@ -14,7 +13,7 @@ from sqlmodel import SQLModel, Session, select, Field from starlette.responses import JSONResponse -from authentication import get_current_user_or_raise_exception, User, get_current_user +from authentication import User, get_current_user, get_current_user_or_raise_exception from config import KEYCLOAK_CONFIG from converters.schema_converters.schema_converter import SchemaConverter from database.model.ai_resource.resource import AbstractAIResource @@ -52,6 +51,7 @@ class Pagination(BaseModel): RESOURCE = TypeVar("RESOURCE", bound=AbstractAIResource) RESOURCE_CREATE = TypeVar("RESOURCE_CREATE", bound=SQLModel) RESOURCE_READ = TypeVar("RESOURCE_READ", bound=SQLModel) +RESOURCE_MODEL = TypeVar("RESOURCE_MODEL", bound=SQLModel) class ResourceRouter(abc.ABC): @@ -103,7 +103,7 @@ def resource_name_plural(self) -> str: @property @abc.abstractmethod - def resource_class(self): + def resource_class(self) -> type[RESOURCE_MODEL]: pass @property @@ -220,7 +220,7 @@ def get_resources( if schema != "aiod" else self.resource_class_read.from_orm ) - resources = self._retrieve_resources(session, pagination, user, platform) + resources: Any = self._retrieve_resources(session, pagination, user, platform) return self._wrap_with_headers([convert_schema(resource) for resource in resources]) except Exception as e: raise as_http_exception(e) @@ -235,7 +235,9 @@ def get_resource( _raise_error_on_invalid_schema(self._possible_schemas, schema) try: with DbSession() as session: - resource = self._retrieve_resource(session, identifier, user, platform=platform) + resource: Any = self._retrieve_resource( + session, identifier, user, platform=platform + ) if schema != "aiod": return self.schema_converters[schema].convert(session, resource) return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) @@ -437,7 +439,7 @@ def put_resource( with DbSession() as session: try: - resource = self._retrieve_resource(session, identifier) + resource: Any = self._retrieve_resource(session, identifier) for attribute_name in resource.schema()["properties"]: if hasattr(resource_create_instance, attribute_name): new_value = getattr(resource_create_instance, attribute_name) @@ -481,7 +483,7 @@ def delete_resource( ) try: # Raise error if it does not exist - resource = self._retrieve_resource(session, identifier) + resource: Any = self._retrieve_resource(session, identifier) if ( hasattr(self.resource_class, "__deletion_config__") and not self.resource_class.__deletion_config__["soft_delete"] @@ -497,7 +499,18 @@ def delete_resource( return delete_resource - def _retrieve_resource(self, session, identifier, user=None, platform=None): + def _retrieve_resource( + self, + session: Session, + identifier: int | str, + user: User | None = None, + platform: str | None = None, + ) -> type[RESOURCE_MODEL]: + """ + Retrieve a resource from the database based on the provided identifier + and platform (if applicable). The user parameter is option so subclasses can + implement further verification on user access to the resource. + """ if platform is None: query = select(self.resource_class).where(self.resource_class.identifier == identifier) else: @@ -527,7 +540,18 @@ def _retrieve_resource(self, session, identifier, user=None, platform=None): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"{name} {msg}") return resource - def _retrieve_resources(self, session, pagination, user=None, platform=None): + def _retrieve_resources( + self, + session: Session, + pagination: Pagination, + user: User | None = None, + platform: str | None = None, + ) -> Sequence[type[RESOURCE_MODEL]]: + """ + Retrieve a sequence of resources from the database based on the provided identifier + and platform (if applicable). The user parameter is option so subclasses can + implement further verification on user access to the resource. + """ where_clause = and_( is_(self.resource_class.date_deleted, None), (self.resource_class.platform == platform) if platform is not None else True, @@ -538,7 +562,7 @@ def _retrieve_resources(self, session, pagination, user=None, platform=None): .offset(pagination.offset) .limit(pagination.limit) ) - resources = session.scalars(query).all() + resources: Sequence = session.scalars(query).all() return resources @property diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index 1bbd3405..a97e4cee 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -1,11 +1,12 @@ +from typing import Sequence from authentication import User from database.model.agent.contact import Contact from database.model.agent.email import Email from database.model.agent.organisation import Organisation from database.model.agent.person import Person -from routers.resource_router import ResourceRouter +from routers.resource_router import Pagination, ResourceRouter -from sqlmodel import select +from sqlmodel import Session, select class ContactRouter(ResourceRouter): @@ -37,10 +38,13 @@ def resource_class(self) -> type[Contact]: @staticmethod def _verify_user_roles( - session, - contact: Contact, + session: Session, + contact: type[Contact], user: User | None, - ): + ) -> type[Contact]: + """ + Only authenticated users can see the contact email. + """ if not user: email_mask = "******" # This ensures that the API doesn't break in case the email mask exists in @@ -52,11 +56,25 @@ def _verify_user_roles( contact.email = [email] * len(contact.email) return contact - def _retrieve_resource(self, session, identifier, user=None, platform=None): - contact = super()._retrieve_resource(session, identifier, platform) + def _retrieve_resource( + self, + session: Session, + identifier: int | str, + user: User | None = None, + platform: str | None = None, + ) -> type[Contact]: + contact: type[Contact] = super()._retrieve_resource(session, identifier, user, platform) return self._verify_user_roles(session, contact, user) - def _retrieve_resources(self, session, pagination, user=None, platform=None): - contacts = super()._retrieve_resources(session, pagination, user, platform) + def _retrieve_resources( + self, + session: Session, + pagination: Pagination, + user: User | None = None, + platform: str | None = None, + ) -> Sequence[type[Contact]]: + contacts: Sequence[type[Contact]] = super()._retrieve_resources( + session, pagination, user, platform + ) contacts = [self._verify_user_roles(session, contact, user) for contact in contacts] return contacts diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index 75b3e0d4..916fd698 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -1,5 +1,7 @@ +from typing import Sequence +from sqlmodel import Session from database.model.agent.person import Person -from routers.resource_router import ResourceRouter +from routers.resource_router import Pagination, ResourceRouter from authentication import User @@ -21,7 +23,11 @@ def resource_class(self) -> type[Person]: return Person @staticmethod - def _verify_user_roles(person, user: User | None): + def _verify_user_roles(person: type[Person], user: User | None) -> type[Person]: + """ + Only users with role 'full_view_drupal_resources' can see sensitive + of a person from the old drupal platform. + """ if (person.platform == "drupal") and not ( user and user.has_role("full_view_drupal_resources") ): @@ -30,11 +36,25 @@ def _verify_user_roles(person, user: User | None): person.surname = "******" return person - def _retrieve_resource(self, session, identifier, user=None, platform=None): - person = super()._retrieve_resource(session, identifier, platform) + def _retrieve_resource( + self, + session: Session, + identifier: int | str, + user: User | None = None, + platform: str | None = None, + ) -> type[Person]: + person: type[Person] = super()._retrieve_resource(session, identifier, user, platform) return self._verify_user_roles(person, user) - def _retrieve_resources(self, session, pagination, user=None, platform=None): - persons = super()._retrieve_resources(session, pagination, user, platform) + def _retrieve_resources( + self, + session: Session, + pagination: Pagination, + user: User | None = None, + platform: str | None = None, + ) -> Sequence[type[Person]]: + persons: Sequence[type[Person]] = super()._retrieve_resources( + session, pagination, user, platform + ) persons = [self._verify_user_roles(person, user) for person in persons] return persons From b82af3b1ea5f2c1b28b869ba2523812b7bd342af Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 12 Apr 2024 16:37:27 +0100 Subject: [PATCH 13/49] bug fix and tests included --- .../resource_routers/contact_router.py | 2 +- .../resource_routers/test_router_contact.py | 30 +++++++ .../resource_routers/test_router_person.py | 81 ++++++++++++------- src/tests/testutils/default_sqlalchemy.py | 11 +++ 4 files changed, 92 insertions(+), 32 deletions(-) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index a97e4cee..608fbef2 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -53,7 +53,7 @@ def _verify_user_roles( if not email: email = Email(name=email_mask) session.add(email) - contact.email = [email] * len(contact.email) + contact.email = [email] return contact def _retrieve_resource( diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index d1955825..1723b068 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -4,6 +4,36 @@ from starlette.testclient import TestClient from authentication import keycloak_openid +from database.model.agent.contact import Contact +from database.model.agent.email import Email +from database.session import DbSession + + +def test_email_privacy( + client: TestClient, + mocked_privileged_token: Mock, + contact: Contact, +): + keycloak_openid.introspect = mocked_privileged_token + + with DbSession() as session: + email = Email(name="fake@email.com") + another_email = Email(name="fake2@email.com") + contact.email = [email, another_email] + session.add(contact) + session.commit() + + guest_response = client.get("/contacts/v1/") + assert guest_response.status_code == 200, guest_response.json() + guest_response_json = guest_response.json() + assert len(guest_response_json) == 1 + assert guest_response_json[0]["email"] == ["******"] + + response = client.get("/contacts/v1/", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + response_json = response.json() + assert len(response_json) == 1 + assert response_json[0]["email"] == ["fake@email.com", "fake2@email.com"] def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asset: dict): diff --git a/src/tests/routers/resource_routers/test_router_person.py b/src/tests/routers/resource_routers/test_router_person.py index 7814d88f..07ea6baf 100644 --- a/src/tests/routers/resource_routers/test_router_person.py +++ b/src/tests/routers/resource_routers/test_router_person.py @@ -10,6 +10,56 @@ from database.session import DbSession +def test_drupal_privacy( + client: TestClient, + mocked_privileged_token: Mock, + mocked_drupal_token: Mock, + platform: Platform, + person: Person, + contact: Contact, +): + """Test to ensure that only authenticated users with "full_view_drupal_resources" role + can visualise fields such as name, given_name and surname of a person migrated from + the old drupal platform. + """ + + with DbSession() as session: + platform.name = "drupal" + session.add(platform) + person.platform = "drupal" + person.platform_resource_identifier = "2" + person.name = "Joe Doe" + person.given_name = "Joe" + person.surname = "Doe" + session.add(person) + session.add(contact) + session.commit() + + keycloak_openid.introspect = mocked_privileged_token + + response = client.get("/persons/v1/") + assert response.status_code == 200, response.json() + + response_json = response.json() + + assert len(response_json) == 1 + assert response_json[0]["name"] == "******" + assert response_json[0]["given_name"] == "******" + assert response_json[0]["surname"] == "******" + + keycloak_openid.introspect = mocked_drupal_token + + response = client.get("/persons/v1/", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + + response_json = response.json() + + assert len(response_json) == 1 + assert response_json[0]["name"] == "Joe Doe" + assert response_json[0]["given_name"] == "Joe" + assert response_json[0]["surname"] == "Doe" + + def test_happy_path( client: TestClient, mocked_privileged_token: Mock, @@ -49,34 +99,3 @@ def test_happy_path( assert response_json["price_per_hour_euro"] == 10.50 assert response_json["wants_to_be_contacted"] assert response_json["contact_details"] == 1 - - -def test_privacy( - client: TestClient, - mocked_privileged_token: Mock, - platform: Platform, - person: Person, - contact: Contact, -): - keycloak_openid.introspect = mocked_privileged_token - - with DbSession() as session: - platform.name = "drupal" - session.add(platform) - person.platform = "drupal" - person.platform_resource_identifier = "2" - person.given_name = "Joe" - person.surname = "Doe" - session.add(person) - session.add(contact) - session.commit() - - response = client.get("/persons/v1/") - assert response.status_code == 200, response.json() - - response_json = response.json() - - assert len(response_json) == 1 - assert response_json[0]["name"] == "******" - assert response_json[0]["given_name"] == "******" - assert response_json[0]["surname"] == "******" diff --git a/src/tests/testutils/default_sqlalchemy.py b/src/tests/testutils/default_sqlalchemy.py index c893bdc2..982e11a8 100644 --- a/src/tests/testutils/default_sqlalchemy.py +++ b/src/tests/testutils/default_sqlalchemy.py @@ -141,3 +141,14 @@ def mocked_token(request: SubRequest) -> Mock: def mocked_privileged_token() -> Mock: roles = ["offline_access", "uma_authorization", "default-roles-aiod", "edit_aiod_resources"] return Mock(return_value=_user_with_roles(*roles)) + + +@pytest.fixture() +def mocked_drupal_token() -> Mock: + roles = [ + "offline_access", + "uma_authorization", + "default-roles-aiod", + "full_view_drupal_resources", + ] + return Mock(return_value=_user_with_roles(*roles)) From f504c2b7ab05a770ddd340d4446d8b4db0ba0961 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 16 Apr 2024 16:50:17 +0200 Subject: [PATCH 14/49] Specify the references require identifiers of contact details --- src/database/model/agent/organisation.py | 3 ++- src/database/model/agent/person.py | 2 +- src/database/model/ai_resource/resource.py | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/database/model/agent/organisation.py b/src/database/model/agent/organisation.py index 5cefea81..60505e5f 100644 --- a/src/database/model/agent/organisation.py +++ b/src/database/model/agent/organisation.py @@ -50,7 +50,8 @@ class Organisation(OrganisationBase, Agent, table=True): # type: ignore [call-a class RelationshipConfig(Agent.RelationshipConfig): contact_details: int | None = OneToOne( - description="The contact details by which this organisation can be reached", + description="The identifier of the contact details by which this organisation " + "can be reached.", deserializer=FindByIdentifierDeserializer(Contact), _serializer=AttributeSerializer("identifier"), ) diff --git a/src/database/model/agent/person.py b/src/database/model/agent/person.py index 4408b70b..a8f783cc 100644 --- a/src/database/model/agent/person.py +++ b/src/database/model/agent/person.py @@ -58,7 +58,7 @@ class Person(PersonBase, Agent, table=True): # type: ignore [call-arg] class RelationshipConfig(Agent.RelationshipConfig): contact_details: int | None = OneToOne( - description="The contact details by which this person can be reached", + description="The identifier of the contact details by which this person can be reached", deserializer=FindByIdentifierDeserializer(Contact), _serializer=AttributeSerializer("identifier"), ) diff --git a/src/database/model/ai_resource/resource.py b/src/database/model/ai_resource/resource.py index fbfaf61d..5c04c757 100644 --- a/src/database/model/ai_resource/resource.py +++ b/src/database/model/ai_resource/resource.py @@ -180,14 +180,15 @@ class RelationshipConfig(AIoDConcept.RelationshipConfig): ) # TODO(jos): documentedIn - KnowledgeAsset. This should probably be defined on ResourceTable contact: list[int] = ManyToMany( - description="Contact information of persons/organisations that can be contacted about " - "this resource.", + description="The identifiers of the contact information of the persons and/or " + "organisations that can be contacted about this resource.", _serializer=AttributeSerializer("identifier"), deserializer=FindByIdentifierDeserializerList(Contact), default_factory_pydantic=list, ) creator: list[int] = ManyToMany( - description="Contact information of persons/organisations that created this resource.", + description="The identifiers of the contact information of the persons and/or " + "organisations that created this resource.", _serializer=AttributeSerializer("identifier"), deserializer=FindByIdentifierDeserializerList(Contact), default_factory_pydantic=list, From 34b146c066b944fcd7ddf398adf50a883b81fdcf Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 18 Apr 2024 17:25:24 +0100 Subject: [PATCH 15/49] final adjustments and tests --- .../resource_routers/contact_router.py | 7 +- src/routers/resource_routers/person_router.py | 4 +- .../resource_routers/test_router_contact.py | 86 +++++++++++------ .../resource_routers/test_router_person.py | 96 +++++++++---------- 4 files changed, 113 insertions(+), 80 deletions(-) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index 608fbef2..33a86b55 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -44,8 +44,12 @@ def _verify_user_roles( ) -> type[Contact]: """ Only authenticated users can see the contact email. + For the old drupal platform, only users with "full_view_drupal_resources" role + can view the contact emails. """ - if not user: + if not user or ( + (contact.platform == "drupal") and not user.has_role("full_view_drupal_resources") + ): email_mask = "******" # This ensures that the API doesn't break in case the email mask exists in # in the DB @@ -54,6 +58,7 @@ def _verify_user_roles( email = Email(name=email_mask) session.add(email) contact.email = [email] + return contact return contact def _retrieve_resource( diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index 916fd698..19c2e688 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -25,8 +25,8 @@ def resource_class(self) -> type[Person]: @staticmethod def _verify_user_roles(person: type[Person], user: User | None) -> type[Person]: """ - Only users with role 'full_view_drupal_resources' can see sensitive - of a person from the old drupal platform. + For the old drupal platform, only users with "full_view_drupal_resources" role can + see the person's sensitive information. """ if (person.platform == "drupal") and not ( user and user.has_role("full_view_drupal_resources") diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index 1723b068..f58533ab 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -6,36 +6,10 @@ from authentication import keycloak_openid from database.model.agent.contact import Contact from database.model.agent.email import Email +from database.model.platform.platform import Platform from database.session import DbSession -def test_email_privacy( - client: TestClient, - mocked_privileged_token: Mock, - contact: Contact, -): - keycloak_openid.introspect = mocked_privileged_token - - with DbSession() as session: - email = Email(name="fake@email.com") - another_email = Email(name="fake2@email.com") - contact.email = [email, another_email] - session.add(contact) - session.commit() - - guest_response = client.get("/contacts/v1/") - assert guest_response.status_code == 200, guest_response.json() - guest_response_json = guest_response.json() - assert len(guest_response_json) == 1 - assert guest_response_json[0]["email"] == ["******"] - - response = client.get("/contacts/v1/", headers={"Authorization": "Fake token"}) - assert response.status_code == 200, response.json() - response_json = response.json() - assert len(response_json) == 1 - assert response_json[0]["email"] == ["fake@email.com", "fake2@email.com"] - - def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asset: dict): keycloak_openid.introspect = mocked_privileged_token client.post( @@ -108,3 +82,61 @@ def test_person_and_organisation_both_specified(client: TestClient, mocked_privi response = client.post("/contacts/v1", json=body, headers=headers) assert response.status_code == 400, response.json() assert response.json()["detail"] == "Person and organisation cannot be both filled." + + +def test_email_privacy( + client: TestClient, + mocked_privileged_token: Mock, + contact: Contact, +): + keycloak_openid.introspect = mocked_privileged_token + + with DbSession() as session: + email = Email(name="fake@email.com") + another_email = Email(name="fake2@email.com") + contact.email = [email, another_email] + session.add(contact) + session.commit() + + guest_response = client.get("/contacts/v1/1") + assert guest_response.status_code == 200, guest_response.json() + guest_response_json = guest_response.json() + assert guest_response_json["email"] == ["******"] + + response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + response_json = response.json() + assert response_json["email"] == ["fake@email.com", "fake2@email.com"] + + +def test_email_privacy_for_drupal( + client: TestClient, + mocked_privileged_token: Mock, + mocked_drupal_token: Mock, + contact: Contact, + platform: Platform, +): + + with DbSession() as session: + platform.name = "drupal" + session.add(platform) + contact.platform = "drupal" + email = Email(name="fake@email.com") + another_email = Email(name="fake2@email.com") + contact.email = [email, another_email] + session.add(contact) + session.commit() + + keycloak_openid.introspect = mocked_privileged_token + + response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + response_json = response.json() + assert response_json["email"] == ["******"] + + keycloak_openid.introspect = mocked_drupal_token + + response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + response_json = response.json() + assert response_json["email"] == ["fake@email.com", "fake2@email.com"] diff --git a/src/tests/routers/resource_routers/test_router_person.py b/src/tests/routers/resource_routers/test_router_person.py index 07ea6baf..29716e32 100644 --- a/src/tests/routers/resource_routers/test_router_person.py +++ b/src/tests/routers/resource_routers/test_router_person.py @@ -10,56 +10,6 @@ from database.session import DbSession -def test_drupal_privacy( - client: TestClient, - mocked_privileged_token: Mock, - mocked_drupal_token: Mock, - platform: Platform, - person: Person, - contact: Contact, -): - """Test to ensure that only authenticated users with "full_view_drupal_resources" role - can visualise fields such as name, given_name and surname of a person migrated from - the old drupal platform. - """ - - with DbSession() as session: - platform.name = "drupal" - session.add(platform) - person.platform = "drupal" - person.platform_resource_identifier = "2" - person.name = "Joe Doe" - person.given_name = "Joe" - person.surname = "Doe" - session.add(person) - session.add(contact) - session.commit() - - keycloak_openid.introspect = mocked_privileged_token - - response = client.get("/persons/v1/") - assert response.status_code == 200, response.json() - - response_json = response.json() - - assert len(response_json) == 1 - assert response_json[0]["name"] == "******" - assert response_json[0]["given_name"] == "******" - assert response_json[0]["surname"] == "******" - - keycloak_openid.introspect = mocked_drupal_token - - response = client.get("/persons/v1/", headers={"Authorization": "Fake token"}) - assert response.status_code == 200, response.json() - - response_json = response.json() - - assert len(response_json) == 1 - assert response_json[0]["name"] == "Joe Doe" - assert response_json[0]["given_name"] == "Joe" - assert response_json[0]["surname"] == "Doe" - - def test_happy_path( client: TestClient, mocked_privileged_token: Mock, @@ -99,3 +49,49 @@ def test_happy_path( assert response_json["price_per_hour_euro"] == 10.50 assert response_json["wants_to_be_contacted"] assert response_json["contact_details"] == 1 + + +def test_privacy_for_drupal( + client: TestClient, + mocked_privileged_token: Mock, + mocked_drupal_token: Mock, + platform: Platform, + person: Person, + contact: Contact, +): + """Test to ensure that only authenticated users with "full_view_drupal_resources" role + can visualise fields such as name, given_name and surname of a person migrated from + the old drupal platform. + """ + + with DbSession() as session: + platform.name = "drupal" + session.add(platform) + person.platform = "drupal" + person.platform_resource_identifier = "2" + person.name = "Joe Doe" + person.given_name = "Joe" + person.surname = "Doe" + session.add(person) + session.add(contact) + session.commit() + + keycloak_openid.introspect = mocked_privileged_token + + response = client.get("/persons/v1/1") + assert response.status_code == 200, response.json() + + response_json = response.json() + assert response_json["name"] == "******" + assert response_json["given_name"] == "******" + assert response_json["surname"] == "******" + + keycloak_openid.introspect = mocked_drupal_token + + response = client.get("/persons/v1/1", headers={"Authorization": "Fake token"}) + assert response.status_code == 200, response.json() + + response_json = response.json() + assert response_json["name"] == "Joe Doe" + assert response_json["given_name"] == "Joe" + assert response_json["surname"] == "Doe" From 527d3f1ee9af8d8f1303fdfb855cdfae58ff7ce2 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 18 Apr 2024 19:02:38 +0100 Subject: [PATCH 16/49] clean up --- src/routers/resource_router.py | 164 ++++++++++-------- .../resource_routers/contact_router.py | 10 +- src/routers/resource_routers/person_router.py | 10 +- 3 files changed, 107 insertions(+), 77 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index ac562d35..72278946 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -204,6 +204,35 @@ def create(self, url_prefix: str) -> APIRouter: ) return router + def create_resource(self, session: Session, resource_create_instance: SQLModel): + """Store a resource in the database""" + resource = self.resource_class.from_orm(resource_create_instance) + deserialize_resource_relationships( + session, self.resource_class, resource, resource_create_instance + ) + session.add(resource) + session.commit() + return resource + + def get_resource( + self, identifier: str, schema: str, user: User | None = None, platform: str | None = None + ): + """ + Get the resource identified by AIoD identifier (if platform is None) or by platform AND + platform-identifier (if platform is not None), return in given schema. + """ + _raise_error_on_invalid_schema(self._possible_schemas, schema) + try: + with DbSession() as session: + resource: Any = self._retrieve_resource_and_check_roles( + session, identifier, user, platform=platform + ) + if schema != "aiod": + return self.schema_converters[schema].convert(session, resource) + return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) + except Exception as e: + raise as_http_exception(e) + def get_resources( self, schema: str, @@ -220,29 +249,31 @@ def get_resources( if schema != "aiod" else self.resource_class_read.from_orm ) - resources: Any = self._retrieve_resources(session, pagination, user, platform) + resources: Any = self._retrieve_resources_and_check_roles( + session, pagination, user, platform + ) return self._wrap_with_headers([convert_schema(resource) for resource in resources]) except Exception as e: raise as_http_exception(e) - def get_resource( - self, identifier: str, schema: str, user: User | None = None, platform: str | None = None - ): + def get_resource_func(self): """ - Get the resource identified by AIoD identifier (if platform is None) or by platform AND - platform-identifier (if platform is not None), return in given schema. + Return a function that can be used to retrieve a single resource. + This function returns a function (instead of being that function directly) because the + docstring and the variables are dynamic, and used in Swagger. """ - _raise_error_on_invalid_schema(self._possible_schemas, schema) - try: - with DbSession() as session: - resource: Any = self._retrieve_resource( - session, identifier, user, platform=platform - ) - if schema != "aiod": - return self.schema_converters[schema].convert(session, resource) - return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) - except Exception as e: - raise as_http_exception(e) + + def get_resource( + identifier: str, + schema: self._possible_schemas_type = "aiod", # type: ignore + user: User | None = Depends(get_current_user), + ): + resource = self.get_resource( + identifier=identifier, schema=schema, user=user, platform=None + ) + return self._wrap_with_headers(resource) + + return get_resource def get_resources_func(self): """ @@ -302,14 +333,20 @@ def get_resource_count( return get_resource_count - def get_platform_resources_func(self): + def get_platform_resource_func(self): """ - Return a function that can be used to retrieve a list of resources for a platform. + Return a function that can be used to retrieve a single resource of a platform. This function returns a function (instead of being that function directly) because the docstring and the variables are dynamic, and used in Swagger. """ - def get_resources( + def get_resource( + identifier: Annotated[ + str, + Path( + description="The identifier under which the resource is known by the platform.", + ), + ], platform: Annotated[ str, Path( @@ -317,47 +354,20 @@ def get_resources( example="huggingface", ), ], - pagination: Annotated[Pagination, Depends(Pagination)], schema: self._possible_schemas_type = "aiod", # type:ignore ): - resources = self.get_resources(pagination=pagination, schema=schema, platform=platform) - return resources - - return get_resources - - def get_resource_func(self): - """ - Return a function that can be used to retrieve a single resource. - This function returns a function (instead of being that function directly) because the - docstring and the variables are dynamic, and used in Swagger. - """ - - def get_resource( - identifier: str, - schema: self._possible_schemas_type = "aiod", # type: ignore - user: User | None = Depends(get_current_user), - ): - resource = self.get_resource( - identifier=identifier, schema=schema, user=user, platform=None - ) - return self._wrap_with_headers(resource) + return self.get_resource(identifier=identifier, schema=schema, platform=platform) return get_resource - def get_platform_resource_func(self): + def get_platform_resources_func(self): """ - Return a function that can be used to retrieve a single resource of a platform. + Return a function that can be used to retrieve a list of resources for a platform. This function returns a function (instead of being that function directly) because the docstring and the variables are dynamic, and used in Swagger. """ - def get_resource( - identifier: Annotated[ - str, - Path( - description="The identifier under which the resource is known by the platform.", - ), - ], + def get_resources( platform: Annotated[ str, Path( @@ -365,11 +375,13 @@ def get_resource( example="huggingface", ), ], + pagination: Annotated[Pagination, Depends(Pagination)], schema: self._possible_schemas_type = "aiod", # type:ignore ): - return self.get_resource(identifier=identifier, schema=schema, platform=platform) + resources = self.get_resources(pagination=pagination, schema=schema, platform=platform) + return resources - return get_resource + return get_resources def register_resource_func(self): """ @@ -404,16 +416,6 @@ def register_resource( return register_resource - def create_resource(self, session: Session, resource_create_instance: SQLModel): - """Store a resource in the database""" - resource = self.resource_class.from_orm(resource_create_instance) - deserialize_resource_relationships( - session, self.resource_class, resource, resource_create_instance - ) - session.add(resource) - session.commit() - return resource - def put_resource_func(self): """ Return a function that can be used to update a resource. @@ -503,13 +505,11 @@ def _retrieve_resource( self, session: Session, identifier: int | str, - user: User | None = None, platform: str | None = None, ) -> type[RESOURCE_MODEL]: """ Retrieve a resource from the database based on the provided identifier - and platform (if applicable). The user parameter is option so subclasses can - implement further verification on user access to the resource. + and platform (if applicable). """ if platform is None: query = select(self.resource_class).where(self.resource_class.identifier == identifier) @@ -544,13 +544,11 @@ def _retrieve_resources( self, session: Session, pagination: Pagination, - user: User | None = None, platform: str | None = None, ) -> Sequence[type[RESOURCE_MODEL]]: """ Retrieve a sequence of resources from the database based on the provided identifier - and platform (if applicable). The user parameter is option so subclasses can - implement further verification on user access to the resource. + and platform (if applicable). """ where_clause = and_( is_(self.resource_class.date_deleted, None), @@ -565,6 +563,34 @@ def _retrieve_resources( resources: Sequence = session.scalars(query).all() return resources + def _retrieve_resource_and_check_roles( + self, + session: Session, + identifier: int | str, + user: User | None = None, + platform: str | None = None, + ) -> type[RESOURCE_MODEL]: + """ + Retrieve a resource from the database based on the provided identifier + and platform (if applicable). The user parameter can be used by subclasses to + implement further verification on user access to the resource. + """ + return self._retrieve_resource(session, identifier, platform) + + def _retrieve_resources_and_check_roles( + self, + session: Session, + pagination: Pagination, + user: User | None = None, + platform: str | None = None, + ) -> Sequence[type[RESOURCE_MODEL]]: + """ + Retrieve a sequence of resources from the database based on the provided identifier + and platform (if applicable). The user parameter can be used by subclasses to + implement further verification on user access to the resource. + """ + return self._retrieve_resources(session, pagination, platform) + @property def _possible_schemas(self) -> list[str]: return ["aiod"] + list(self.schema_converters.keys()) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index 33a86b55..7a5e8a4a 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -61,24 +61,26 @@ def _verify_user_roles( return contact return contact - def _retrieve_resource( + def _retrieve_resource_and_check_roles( self, session: Session, identifier: int | str, user: User | None = None, platform: str | None = None, ) -> type[Contact]: - contact: type[Contact] = super()._retrieve_resource(session, identifier, user, platform) + contact: type[Contact] = super()._retrieve_resource_and_check_roles( + session, identifier, user, platform + ) return self._verify_user_roles(session, contact, user) - def _retrieve_resources( + def _retrieve_resources_and_check_roles( self, session: Session, pagination: Pagination, user: User | None = None, platform: str | None = None, ) -> Sequence[type[Contact]]: - contacts: Sequence[type[Contact]] = super()._retrieve_resources( + contacts: Sequence[type[Contact]] = super()._retrieve_resources_and_check_roles( session, pagination, user, platform ) contacts = [self._verify_user_roles(session, contact, user) for contact in contacts] diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index 19c2e688..0e9ab64d 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -36,24 +36,26 @@ def _verify_user_roles(person: type[Person], user: User | None) -> type[Person]: person.surname = "******" return person - def _retrieve_resource( + def _retrieve_resource_and_check_roles( self, session: Session, identifier: int | str, user: User | None = None, platform: str | None = None, ) -> type[Person]: - person: type[Person] = super()._retrieve_resource(session, identifier, user, platform) + person: type[Person] = super()._retrieve_resource_and_check_roles( + session, identifier, user, platform + ) return self._verify_user_roles(person, user) - def _retrieve_resources( + def _retrieve_resources_and_check_roles( self, session: Session, pagination: Pagination, user: User | None = None, platform: str | None = None, ) -> Sequence[type[Person]]: - persons: Sequence[type[Person]] = super()._retrieve_resources( + persons: Sequence[type[Person]] = super()._retrieve_resources_and_check_roles( session, pagination, user, platform ) persons = [self._verify_user_roles(person, user) for person in persons] From 83957954850c402521222de5544341d91aa8d7e9 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Fri, 19 Apr 2024 11:23:14 +0100 Subject: [PATCH 17/49] put methods in original order to ease review --- src/routers/resource_router.py | 124 ++++++++++++++++----------------- 1 file changed, 62 insertions(+), 62 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 72278946..a3289cf0 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -204,35 +204,6 @@ def create(self, url_prefix: str) -> APIRouter: ) return router - def create_resource(self, session: Session, resource_create_instance: SQLModel): - """Store a resource in the database""" - resource = self.resource_class.from_orm(resource_create_instance) - deserialize_resource_relationships( - session, self.resource_class, resource, resource_create_instance - ) - session.add(resource) - session.commit() - return resource - - def get_resource( - self, identifier: str, schema: str, user: User | None = None, platform: str | None = None - ): - """ - Get the resource identified by AIoD identifier (if platform is None) or by platform AND - platform-identifier (if platform is not None), return in given schema. - """ - _raise_error_on_invalid_schema(self._possible_schemas, schema) - try: - with DbSession() as session: - resource: Any = self._retrieve_resource_and_check_roles( - session, identifier, user, platform=platform - ) - if schema != "aiod": - return self.schema_converters[schema].convert(session, resource) - return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) - except Exception as e: - raise as_http_exception(e) - def get_resources( self, schema: str, @@ -256,24 +227,24 @@ def get_resources( except Exception as e: raise as_http_exception(e) - def get_resource_func(self): + def get_resource( + self, identifier: str, schema: str, user: User | None = None, platform: str | None = None + ): """ - Return a function that can be used to retrieve a single resource. - This function returns a function (instead of being that function directly) because the - docstring and the variables are dynamic, and used in Swagger. + Get the resource identified by AIoD identifier (if platform is None) or by platform AND + platform-identifier (if platform is not None), return in given schema. """ - - def get_resource( - identifier: str, - schema: self._possible_schemas_type = "aiod", # type: ignore - user: User | None = Depends(get_current_user), - ): - resource = self.get_resource( - identifier=identifier, schema=schema, user=user, platform=None - ) - return self._wrap_with_headers(resource) - - return get_resource + _raise_error_on_invalid_schema(self._possible_schemas, schema) + try: + with DbSession() as session: + resource: Any = self._retrieve_resource_and_check_roles( + session, identifier, user, platform=platform + ) + if schema != "aiod": + return self.schema_converters[schema].convert(session, resource) + return self._wrap_with_headers(self.resource_class_read.from_orm(resource)) + except Exception as e: + raise as_http_exception(e) def get_resources_func(self): """ @@ -333,20 +304,14 @@ def get_resource_count( return get_resource_count - def get_platform_resource_func(self): + def get_platform_resources_func(self): """ - Return a function that can be used to retrieve a single resource of a platform. + Return a function that can be used to retrieve a list of resources for a platform. This function returns a function (instead of being that function directly) because the docstring and the variables are dynamic, and used in Swagger. """ - def get_resource( - identifier: Annotated[ - str, - Path( - description="The identifier under which the resource is known by the platform.", - ), - ], + def get_resources( platform: Annotated[ str, Path( @@ -354,20 +319,47 @@ def get_resource( example="huggingface", ), ], + pagination: Annotated[Pagination, Depends(Pagination)], schema: self._possible_schemas_type = "aiod", # type:ignore ): - return self.get_resource(identifier=identifier, schema=schema, platform=platform) + resources = self.get_resources(pagination=pagination, schema=schema, platform=platform) + return resources + + return get_resources + + def get_resource_func(self): + """ + Return a function that can be used to retrieve a single resource. + This function returns a function (instead of being that function directly) because the + docstring and the variables are dynamic, and used in Swagger. + """ + + def get_resource( + identifier: str, + schema: self._possible_schemas_type = "aiod", # type: ignore + user: User | None = Depends(get_current_user), + ): + resource = self.get_resource( + identifier=identifier, schema=schema, user=user, platform=None + ) + return self._wrap_with_headers(resource) return get_resource - def get_platform_resources_func(self): + def get_platform_resource_func(self): """ - Return a function that can be used to retrieve a list of resources for a platform. + Return a function that can be used to retrieve a single resource of a platform. This function returns a function (instead of being that function directly) because the docstring and the variables are dynamic, and used in Swagger. """ - def get_resources( + def get_resource( + identifier: Annotated[ + str, + Path( + description="The identifier under which the resource is known by the platform.", + ), + ], platform: Annotated[ str, Path( @@ -375,13 +367,11 @@ def get_resources( example="huggingface", ), ], - pagination: Annotated[Pagination, Depends(Pagination)], schema: self._possible_schemas_type = "aiod", # type:ignore ): - resources = self.get_resources(pagination=pagination, schema=schema, platform=platform) - return resources + return self.get_resource(identifier=identifier, schema=schema, platform=platform) - return get_resources + return get_resource def register_resource_func(self): """ @@ -416,6 +406,16 @@ def register_resource( return register_resource + def create_resource(self, session: Session, resource_create_instance: SQLModel): + """Store a resource in the database""" + resource = self.resource_class.from_orm(resource_create_instance) + deserialize_resource_relationships( + session, self.resource_class, resource, resource_create_instance + ) + session.add(resource) + session.commit() + return resource + def put_resource_func(self): """ Return a function that can be used to update a resource. From 1eb0150b8e21f643ceb1eb9b2d09c2fe00b90b45 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Mon, 29 Apr 2024 16:55:41 +0200 Subject: [PATCH 18/49] simplify get_user methods for raising or not raising exceptions --- src/authentication.py | 84 +++++++++---------- src/main.py | 4 +- src/routers/resource_router.py | 12 +-- .../upload_router_huggingface.py | 4 +- .../uploader_routers/upload_router_zenodo.py | 4 +- .../routers/generic/test_authentication.py | 4 +- src/tests/test_authentication.py | 25 +++--- 7 files changed, 69 insertions(+), 68 deletions(-) diff --git a/src/authentication.py b/src/authentication.py index c33cd3d9..0398ac5e 100644 --- a/src/authentication.py +++ b/src/authentication.py @@ -56,15 +56,15 @@ def has_any_role(self, *roles: str) -> bool: return bool(set(roles) & self.roles) -async def get_current_user_or_raise_exception(token=Security(oidc)) -> User: + +async def _get_user(token) -> User: """ - Use this function in Depends() to force authentication. Check the roles of the user for - authorization. + Check the roles of the user for authorization. Raises: - HTTPException with status 401 on missing token (unauthorized message), also status 401 on - any problem with the token (we don't want to leak information), status 500 on any - request if Keycloak is configured incorrectly. + NoTokenError on missing token (unauthorized message) and InvalidUserError on inactive user. + Also HTTPException with status 401 on any problem with the token (we don't want to leak information), + and status 500 on any request if Keycloak is configured incorrectly. """ if not client_secret: raise HTTPException( @@ -74,11 +74,7 @@ async def get_current_user_or_raise_exception(token=Security(oidc)) -> User: "from a Keycloak Administrator of AIoD.", ) if not token: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="This endpoint requires authorization. You need to be logged in.", - headers={"WWW-Authenticate": "Bearer"}, - ) + raise NoTokenError("No token found") try: token = token.replace("Bearer ", "") # query the authorization server to determine the active state of this token and to @@ -87,54 +83,56 @@ async def get_current_user_or_raise_exception(token=Security(oidc)) -> User: if not userinfo.get("active", False): logging.error("Invalid userinfo or inactive user.") - raise RuntimeError("Invalid userinfo or inactive user.") # caught below + raise InvalidUserError("Invalid userinfo or inactive user") # caught below return User( name=userinfo["username"], roles=set(userinfo.get("realm_access", {}).get("roles", [])) ) + except InvalidUserError: + raise except Exception as e: logging.error(f"Error while checking the access token: '{e}'") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Invalid authentication token", + detail=f"Invalid authentication token", headers={"WWW-Authenticate": "Bearer"}, ) + - -async def get_current_user(token=Security(oidc)) -> User | None: +async def get_user_or_none(token=Security(oidc)) -> User | None: """ Use this function in Depends() to ask for authentication. - Check whether the user is authenticated or not. + This method should be only used to get the current user + without raising exception when the token is not found, + or the user is not active, or the userinfo is invalid. + """ + try: + return await _get_user(token) + except (NoTokenError, InvalidUserError): + return None + +async def get_user_or_raise(token=Security(oidc)) -> User: + """ + Use this function in Depends() to force authentication. Check the roles of the user for + authorization. Raises: - HTTPException with status 401 on any problem with the token - (we don't want to leak information), - and status 500 on any request if Keycloak is configured incorrectly. + HTTPException with status 401 on missing token (unauthorized message), or invalid user. + It also raises a HTTPException with status 401 on + any problem with the token (we don't want to leak information), + status 500 on any request if Keycloak is configured incorrectly. """ - if not client_secret: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="This instance is not configured correctly. You'll need to set the env var " - "KEYCLOAK_CLIENT_SECRET (e.g. in src/.env). You need to obtain this secret " - "from a Keycloak Administrator of AIoD.", - ) - if not token: - return None try: - token = token.replace("Bearer ", "") - # query the authorization server to determine the active state of this token and to - # determine meta-information. - userinfo = keycloak_openid.introspect(token) - - if not userinfo.get("active", False): - logging.error("Invalid userinfo or inactive user.") - return None - return User( - name=userinfo["username"], roles=set(userinfo.get("realm_access", {}).get("roles", [])) - ) - except Exception as e: - logging.error(f"Error while checking the access token: '{e}'") + return await _get_user(token) + except (InvalidUserError, NoTokenError) as err: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail="Invalid authentication token", + detail= f"{err} - This endpoint requires authorization. You need to be logged in.", headers={"WWW-Authenticate": "Bearer"}, - ) + ) from err + + +class InvalidUserError(Exception): + """Raise an error on invalid userinfo or inactive user.""" + +class NoTokenError(Exception): + """Raise an error when no token is found.""" \ No newline at end of file diff --git a/src/main.py b/src/main.py index 4484fb50..44acb904 100644 --- a/src/main.py +++ b/src/main.py @@ -13,7 +13,7 @@ from fastapi.responses import HTMLResponse from sqlmodel import select -from authentication import get_current_user_or_raise_exception, User +from authentication import get_user_or_raise, User from config import KEYCLOAK_CONFIG from database.deletion.triggers import add_delete_triggers from database.model.concept.concept import AIoDConcept @@ -63,7 +63,7 @@ def home() -> str: """ @app.get(url_prefix + "/authorization_test") - def test_authorization(user: User = Depends(get_current_user_or_raise_exception)) -> User: + def test_authorization(user: User = Depends(get_user_or_raise)) -> User: """ Returns the user, if authenticated correctly. """ diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index a3289cf0..34b381b0 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -13,7 +13,7 @@ from sqlmodel import SQLModel, Session, select, Field from starlette.responses import JSONResponse -from authentication import User, get_current_user, get_current_user_or_raise_exception +from authentication import User, get_user_or_none, get_user_or_raise from config import KEYCLOAK_CONFIG from converters.schema_converters.schema_converter import SchemaConverter from database.model.ai_resource.resource import AbstractAIResource @@ -256,7 +256,7 @@ def get_resources_func(self): def get_resources( pagination: Pagination = Depends(), schema: self._possible_schemas_type = "aiod", # type:ignore - user: User | None = Depends(get_current_user), + user: User | None = Depends(get_user_or_none), ): resources = self.get_resources( pagination=pagination, schema=schema, user=user, platform=None @@ -337,7 +337,7 @@ def get_resource_func(self): def get_resource( identifier: str, schema: self._possible_schemas_type = "aiod", # type: ignore - user: User | None = Depends(get_current_user), + user: User | None = Depends(get_user_or_none), ): resource = self.get_resource( identifier=identifier, schema=schema, user=user, platform=None @@ -383,7 +383,7 @@ def register_resource_func(self): def register_resource( resource_create: clz_create, # type: ignore - user: User = Depends(get_current_user_or_raise_exception), + user: User = Depends(get_user_or_raise), ): if not user.has_any_role( KEYCLOAK_CONFIG.get("role"), @@ -427,7 +427,7 @@ def put_resource_func(self): def put_resource( identifier: int, resource_create_instance: clz_create, # type: ignore - user: User = Depends(get_current_user_or_raise_exception), + user: User = Depends(get_user_or_raise), ): if not user.has_any_role( KEYCLOAK_CONFIG.get("role"), @@ -471,7 +471,7 @@ def delete_resource_func(self): def delete_resource( identifier: str, - user: User = Depends(get_current_user_or_raise_exception), + user: User = Depends(get_user_or_raise), ): with DbSession() as session: if not user.has_any_role( diff --git a/src/routers/uploader_routers/upload_router_huggingface.py b/src/routers/uploader_routers/upload_router_huggingface.py index 70e4cb7c..a31c95ad 100644 --- a/src/routers/uploader_routers/upload_router_huggingface.py +++ b/src/routers/uploader_routers/upload_router_huggingface.py @@ -1,7 +1,7 @@ from fastapi import APIRouter, Depends from fastapi import File, Query, UploadFile -from authentication import User, get_current_user_or_raise_exception +from authentication import User, get_user_or_raise from routers.uploader_router import UploaderRouter from uploaders.hugging_face_uploader import HuggingfaceUploader @@ -24,7 +24,7 @@ def huggingface_upload( username: str = Query( ..., title="Huggingface username", description="The username of HuggingFace" ), - user: User = Depends(get_current_user_or_raise_exception), + user: User = Depends(get_user_or_raise), ) -> int: """ Use this endpoint to upload a file (content) to Hugging Face using diff --git a/src/routers/uploader_routers/upload_router_zenodo.py b/src/routers/uploader_routers/upload_router_zenodo.py index 780fb656..b96b5dc8 100644 --- a/src/routers/uploader_routers/upload_router_zenodo.py +++ b/src/routers/uploader_routers/upload_router_zenodo.py @@ -3,7 +3,7 @@ from fastapi import APIRouter, Depends from fastapi import File, Query, UploadFile, Path -from authentication import User, get_current_user_or_raise_exception +from authentication import User, get_user_or_raise from uploaders.zenodo_uploader import ZenodoUploader from routers.uploader_router import UploaderRouter @@ -32,7 +32,7 @@ def zenodo_upload( ), ] = False, token: str = Query(title="Zenodo Token", description="The access token of Zenodo"), - user: User = Depends(get_current_user_or_raise_exception), + user: User = Depends(get_user_or_raise), ) -> int: """ Use this endpoint to upload a file (content) to Zenodo using diff --git a/src/tests/routers/generic/test_authentication.py b/src/tests/routers/generic/test_authentication.py index 13f35649..1af60984 100644 --- a/src/tests/routers/generic/test_authentication.py +++ b/src/tests/routers/generic/test_authentication.py @@ -124,7 +124,7 @@ def test_post_unauthenticated(client_test_resource: TestClient): assert response.status_code == 401, response.json() response_json = response.json() assert ( - response_json["detail"] == "This endpoint requires authorization. You need to be logged in." + response_json["detail"] == "No token found - This endpoint requires authorization. You need to be logged in." ) @@ -173,5 +173,5 @@ def test_put_unauthenticated(client_test_resource: TestClient): assert response.status_code == 401, response.json() response_json = response.json() assert ( - response_json["detail"] == "This endpoint requires authorization. You need to be logged in." + response_json["detail"] == "No token found - This endpoint requires authorization. You need to be logged in." ) diff --git a/src/tests/test_authentication.py b/src/tests/test_authentication.py index a8de8cf9..799d91fd 100644 --- a/src/tests/test_authentication.py +++ b/src/tests/test_authentication.py @@ -1,4 +1,4 @@ -"""Unittests for the behaviour of get_current_user_or_raise_exception().""" +"""Unittests for the behaviour of get_user_or_raise().""" import inspect from unittest.mock import Mock @@ -9,14 +9,14 @@ from starlette import status -from authentication import get_current_user_or_raise_exception, keycloak_openid, User +from authentication import get_user_or_raise, keycloak_openid, User from tests.testutils.mock_keycloak import MockedKeycloak, TestUserType @pytest.mark.asyncio async def test_happy_path(): with MockedKeycloak() as _: - user = await get_current_user_or_raise_exception(token="Bearer mocked") + user = await get_user_or_raise(token="Bearer mocked") assert user.name == "user" assert set(user.roles) == {"offline_access", "uma_authorization", "default-roles-aiod"} @@ -24,7 +24,7 @@ async def test_happy_path(): @pytest.mark.asyncio async def test_happy_path_privileged(): with MockedKeycloak(type_=TestUserType.privileged) as _: - user = await get_current_user_or_raise_exception(token="Bearer mocked") + user = await get_user_or_raise(token="Bearer mocked") assert user.name == "user" assert set(user.roles) == { "offline_access", @@ -34,14 +34,14 @@ async def test_happy_path_privileged(): } -def test_get_current_user_leaks_no_information(): +def test_get_user_or_none_leaks_no_information(): """ Make sure an error is thrown if you change the fields on User. There may be good reasons to make a change, but please be very careful: we don't want to expose sensitive information to our application if it is not necessary. Moreover, the User class is returned by the authorization_test endpoint. """ - assert inspect.signature(get_current_user_or_raise_exception).return_annotation == User + assert inspect.signature(get_user_or_raise).return_annotation == User assert set(inspect.get_annotations(User)) == {"name", "roles"} @@ -49,20 +49,23 @@ def test_get_current_user_leaks_no_information(): async def test_inactive_user(): with MockedKeycloak(type_=TestUserType.inactive) as _: with pytest.raises(HTTPException) as exception_info: - await get_current_user_or_raise_exception(token="Bearer mocked") + await get_user_or_raise(token="Bearer mocked") assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED - assert exception_info.value.detail == "Invalid authentication token" + assert exception_info.value.detail == ( + "Invalid userinfo or inactive user - " + "This endpoint requires authorization. You need to be logged in." + ) @pytest.mark.asyncio async def test_unauthenticated(): with pytest.raises(HTTPException) as exception_info: - await get_current_user_or_raise_exception(token=None) + await get_user_or_raise(token=None) assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED assert ( exception_info.value.detail - == "This endpoint requires authorization. You need to be logged in." + == "No token found - This endpoint requires authorization. You need to be logged in." ) @@ -80,6 +83,6 @@ async def test_keycloak_error(): ) ) with pytest.raises(HTTPException) as exception_info: - await get_current_user_or_raise_exception(token="Bearer mocked") + await get_user_or_raise(token="Bearer mocked") assert exception_info.value.status_code == status.HTTP_401_UNAUTHORIZED assert exception_info.value.detail == "Invalid authentication token" From 187c426ce542e0242c6b0a28d4951477c28be79b Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Mon, 29 Apr 2024 17:00:03 +0200 Subject: [PATCH 19/49] simplify get_user methods for raising or not raising exceptions --- src/authentication.py | 26 ++++++++++--------- .../routers/generic/test_authentication.py | 6 +++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/authentication.py b/src/authentication.py index 0398ac5e..105ec7f8 100644 --- a/src/authentication.py +++ b/src/authentication.py @@ -56,15 +56,15 @@ def has_any_role(self, *roles: str) -> bool: return bool(set(roles) & self.roles) - async def _get_user(token) -> User: """ Check the roles of the user for authorization. Raises: NoTokenError on missing token (unauthorized message) and InvalidUserError on inactive user. - Also HTTPException with status 401 on any problem with the token (we don't want to leak information), - and status 500 on any request if Keycloak is configured incorrectly. + Also HTTPException with status 401 on any problem with the token + (we don't want to leak information), and status 500 on any request + if Keycloak is configured incorrectly. """ if not client_secret: raise HTTPException( @@ -74,7 +74,7 @@ async def _get_user(token) -> User: "from a Keycloak Administrator of AIoD.", ) if not token: - raise NoTokenError("No token found") + raise NoTokenError("No token found") try: token = token.replace("Bearer ", "") # query the authorization server to determine the active state of this token and to @@ -88,21 +88,21 @@ async def _get_user(token) -> User: name=userinfo["username"], roles=set(userinfo.get("realm_access", {}).get("roles", [])) ) except InvalidUserError: - raise + raise except Exception as e: logging.error(f"Error while checking the access token: '{e}'") raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail=f"Invalid authentication token", + detail="Invalid authentication token", headers={"WWW-Authenticate": "Bearer"}, ) - + async def get_user_or_none(token=Security(oidc)) -> User | None: """ Use this function in Depends() to ask for authentication. - This method should be only used to get the current user - without raising exception when the token is not found, + This method should be only used to get the current user + without raising exception when the token is not found, or the user is not active, or the userinfo is invalid. """ try: @@ -110,6 +110,7 @@ async def get_user_or_none(token=Security(oidc)) -> User | None: except (NoTokenError, InvalidUserError): return None + async def get_user_or_raise(token=Security(oidc)) -> User: """ Use this function in Depends() to force authentication. Check the roles of the user for @@ -118,7 +119,7 @@ async def get_user_or_raise(token=Security(oidc)) -> User: Raises: HTTPException with status 401 on missing token (unauthorized message), or invalid user. It also raises a HTTPException with status 401 on - any problem with the token (we don't want to leak information), + any problem with the token (we don't want to leak information), status 500 on any request if Keycloak is configured incorrectly. """ try: @@ -126,7 +127,7 @@ async def get_user_or_raise(token=Security(oidc)) -> User: except (InvalidUserError, NoTokenError) as err: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, - detail= f"{err} - This endpoint requires authorization. You need to be logged in.", + detail=f"{err} - This endpoint requires authorization. You need to be logged in.", headers={"WWW-Authenticate": "Bearer"}, ) from err @@ -134,5 +135,6 @@ async def get_user_or_raise(token=Security(oidc)) -> User: class InvalidUserError(Exception): """Raise an error on invalid userinfo or inactive user.""" + class NoTokenError(Exception): - """Raise an error when no token is found.""" \ No newline at end of file + """Raise an error when no token is found.""" diff --git a/src/tests/routers/generic/test_authentication.py b/src/tests/routers/generic/test_authentication.py index 1af60984..3325993e 100644 --- a/src/tests/routers/generic/test_authentication.py +++ b/src/tests/routers/generic/test_authentication.py @@ -124,7 +124,8 @@ def test_post_unauthenticated(client_test_resource: TestClient): assert response.status_code == 401, response.json() response_json = response.json() assert ( - response_json["detail"] == "No token found - This endpoint requires authorization. You need to be logged in." + response_json["detail"] + == "No token found - This endpoint requires authorization. You need to be logged in." ) @@ -173,5 +174,6 @@ def test_put_unauthenticated(client_test_resource: TestClient): assert response.status_code == 401, response.json() response_json = response.json() assert ( - response_json["detail"] == "No token found - This endpoint requires authorization. You need to be logged in." + response_json["detail"] + == "No token found - This endpoint requires authorization. You need to be logged in." ) From 284a6af9915de26172d06c158fef01bc12c68c82 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 30 Apr 2024 11:10:25 +0200 Subject: [PATCH 20/49] Separate build and test, add publish to dockerhub --- .github/workflows/dockerized-tests.yml | 83 ++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 10 deletions(-) diff --git a/.github/workflows/dockerized-tests.yml b/.github/workflows/dockerized-tests.yml index 7c4561a0..9f944179 100644 --- a/.github/workflows/dockerized-tests.yml +++ b/.github/workflows/dockerized-tests.yml @@ -1,4 +1,4 @@ -name: Built docker image and run tests +name: Docker - build, test, push # This workflow will built docker image and run tests inside the container. # This workflow is only executed if there is pull request with change in pyproject.toml dependencies, @@ -15,17 +15,80 @@ on: workflow_dispatch: jobs: - built: + build: runs-on: ubuntu-latest - permissions: - packages: write + steps: + - uses: actions/checkout@v4 + # We do not bother with setup-qemu-action since we don't care about emulation right now + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + - name: Build + uses: docker/build-push-action@v5 + with: + context: . + file: ./Dockerfile + tags: aiod/metadata_catalogue:latest + outputs: type=docker,dest=/tmp/aiod_mc_image.tar + # We store the image as an artifact so it can be used by the `test` step, + # and inspected manually if needed (download it through Github Actions UI) + - name: Store Image + uses: actions/upload-artifact@v4 + with: + name: aiod_mc_image + path: /tmp/aiod_mc_image.tar + test: + runs-on: ubuntu-latest + needs: [build] steps: - - uses: actions/checkout@v2 - - name: Build the docker image - run: docker build --tag aiod_metadata_catalogue:latest -f Dockerfile . - - - name: Run docker container and pytest tests + # We need to check out the repository, so that we have the `scripts` directory to mount. + - uses: actions/checkout@v4 + - name: Retrieve Image + uses: actions/download-artifact@v4 + with: + name: aiod_mc_image + path: /tmp + - name: Load Image + run: | + docker load --input /tmp/aiod_mc_image.tar + docker image ls -a + - name: Run pytest from docker run: | - docker run -v ./scripts:/scripts -e KEYCLOAK_CLIENT_SECRET="mocked_secret" --entrypoint "" aiod_metadata_catalogue sh -c "pip install \".[dev]\" && pytest tests -s" + docker run -v ./scripts:/scripts -e KEYCLOAK_CLIENT_SECRET="mocked_secret" --entrypoint "" aiod/metadata_catalogue sh -c "pip install \".[dev]\" && pytest tests -s" + publish: + # TODO: Make conditional on being a release or manual action + needs: [test] + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + # My hope is that by retrieving the image, it can use the cache. + # But I fear that cache needs to be archived/restored separately. + # I want to avoid re-building the docker image, but I do not see + # an option for that in build-push-action. Perhaps it should just + # be a shell `docker push` command instead. + - name: Retrieve Image + uses: actions/download-artifact@v4 + with: + name: aiod_mc_image + path: /tmp + - name: Load Image + run: | + docker load --input /tmp/aiod_mc_image.tar + docker image ls -a + - uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.AIOD_DOCKER_PAT }} + - name: Build + uses: docker/build-push-action@v5 + with: + push: true + context: . + file: ./Dockerfile + tags: | + aiod/metadata_catalogue:latest + aiod/metadata_catalogue:${{ github.sha }} + # peter-evans/dockerhub-description@v3 From d0326fd8e6fe3de5f48c573674a34d54f9ef1cc3 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 30 Apr 2024 15:05:48 +0200 Subject: [PATCH 21/49] Add automatic updating of docker repository descriptions --- .github/workflows/dockerized-tests.yml | 13 ++++++++++++- docker-description.md | 6 ++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 docker-description.md diff --git a/.github/workflows/dockerized-tests.yml b/.github/workflows/dockerized-tests.yml index 9f944179..0a4a3fc7 100644 --- a/.github/workflows/dockerized-tests.yml +++ b/.github/workflows/dockerized-tests.yml @@ -29,6 +29,8 @@ jobs: file: ./Dockerfile tags: aiod/metadata_catalogue:latest outputs: type=docker,dest=/tmp/aiod_mc_image.tar + cache-from: type=gha + cache-to: type=gha,mode=min # We store the image as an artifact so it can be used by the `test` step, # and inspected manually if needed (download it through Github Actions UI) - name: Store Image @@ -91,4 +93,13 @@ jobs: tags: | aiod/metadata_catalogue:latest aiod/metadata_catalogue:${{ github.sha }} - # peter-evans/dockerhub-description@v3 + cache-from: type=gha + cache-to: type=gha,mode=min + - name: Update repository description + uses: peter-evans/dockerhub-description@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.AIOD_DOCKER_PAT }} + repository: aiod/metadata_catalogue + readme-filepath: ./docker-description.md + short-description: "Metadata catalogue REST API for AI on Demand." diff --git a/docker-description.md b/docker-description.md new file mode 100644 index 00000000..8190d619 --- /dev/null +++ b/docker-description.md @@ -0,0 +1,6 @@ +# AIOD Metadata Catalogue + +Image for AI on Demand's (AIOD) metadata catalogue REST API, developed on [Github](https://github.com/aiondemand/AIOD-rest-api/). +This image also requires _at least_ a database setup to function, but for all features authentication (through keycloak) and +search (through elastic search) also need to be configured. +Please refer to the documentation in the readme of the ["AIOD-rest-api"](https://github.com/aiondemand/AIOD-rest-api) repository. \ No newline at end of file From 85a328993d4297539bf4e47553fb219d53477c04 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 30 Apr 2024 15:39:57 +0200 Subject: [PATCH 22/49] Fetching the image serves no purpose, build cache is used. --- .github/workflows/dockerized-tests.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/dockerized-tests.yml b/.github/workflows/dockerized-tests.yml index 0a4a3fc7..15167181 100644 --- a/.github/workflows/dockerized-tests.yml +++ b/.github/workflows/dockerized-tests.yml @@ -66,20 +66,6 @@ jobs: - uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - # My hope is that by retrieving the image, it can use the cache. - # But I fear that cache needs to be archived/restored separately. - # I want to avoid re-building the docker image, but I do not see - # an option for that in build-push-action. Perhaps it should just - # be a shell `docker push` command instead. - - name: Retrieve Image - uses: actions/download-artifact@v4 - with: - name: aiod_mc_image - path: /tmp - - name: Load Image - run: | - docker load --input /tmp/aiod_mc_image.tar - docker image ls -a - uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} From 1694b8078a4565a23f9759abf56f17d84fd6fa24 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 30 Apr 2024 15:41:07 +0200 Subject: [PATCH 23/49] implement post_process method --- src/routers/resource_router.py | 19 ++++++++- src/routers/resource_routers/person_router.py | 40 ++++++++----------- 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 34b381b0..625b0d0e 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -575,7 +575,9 @@ def _retrieve_resource_and_check_roles( and platform (if applicable). The user parameter can be used by subclasses to implement further verification on user access to the resource. """ - return self._retrieve_resource(session, identifier, platform) + resource: type[RESOURCE_MODEL] = self._retrieve_resource(session, identifier, platform) + [processed_resource] = self._post_process([resource], session, user) + return processed_resource def _retrieve_resources_and_check_roles( self, @@ -589,7 +591,20 @@ def _retrieve_resources_and_check_roles( and platform (if applicable). The user parameter can be used by subclasses to implement further verification on user access to the resource. """ - return self._retrieve_resources(session, pagination, platform) + resources: Sequence[type[RESOURCE_MODEL]] = self._retrieve_resources( + session, pagination, platform + ) + return self._post_process(resources, session, user) + + @staticmethod + def _post_process( + resources: Sequence[type[RESOURCE_MODEL]], session: Session, user: User | None + ) -> Sequence[type[RESOURCE_MODEL]]: + """ + Can be implemented in children to post process resources based on user roles + or something else. + """ + return resources @property def _possible_schemas(self) -> list[str]: diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index 0e9ab64d..c6a2b74a 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -1,7 +1,7 @@ from typing import Sequence from sqlmodel import Session from database.model.agent.person import Person -from routers.resource_router import Pagination, ResourceRouter +from routers.resource_router import ResourceRouter from authentication import User @@ -36,27 +36,21 @@ def _verify_user_roles(person: type[Person], user: User | None) -> type[Person]: person.surname = "******" return person - def _retrieve_resource_and_check_roles( - self, - session: Session, - identifier: int | str, - user: User | None = None, - platform: str | None = None, - ) -> type[Person]: - person: type[Person] = super()._retrieve_resource_and_check_roles( - session, identifier, user, platform - ) - return self._verify_user_roles(person, user) - - def _retrieve_resources_and_check_roles( - self, - session: Session, - pagination: Pagination, - user: User | None = None, - platform: str | None = None, + @staticmethod + def _post_process( + resources: Sequence[type[Person]], session: Session, user: User | None ) -> Sequence[type[Person]]: - persons: Sequence[type[Person]] = super()._retrieve_resources_and_check_roles( - session, pagination, user, platform - ) - persons = [self._verify_user_roles(person, user) for person in persons] + """ + For the old drupal platform, only users with "full_view_drupal_resources" role can + see the person's sensitive information. + """ + persons = [] + for person in resources: + if (person.platform == "drupal") and not ( + user and user.has_role("full_view_drupal_resources") + ): + person.name = "******" + person.given_name = "******" + person.surname = "******" + persons.append(person) return persons From d6fd353715c81b86e67a8a6687a2eb8f2b738248 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 30 Apr 2024 15:47:16 +0200 Subject: [PATCH 24/49] extended post_process method to contacts --- .../resource_routers/contact_router.py | 61 ++++++------------- src/routers/resource_routers/person_router.py | 14 ----- 2 files changed, 18 insertions(+), 57 deletions(-) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index 7a5e8a4a..b230d3b9 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -4,7 +4,7 @@ from database.model.agent.email import Email from database.model.agent.organisation import Organisation from database.model.agent.person import Person -from routers.resource_router import Pagination, ResourceRouter +from routers.resource_router import ResourceRouter from sqlmodel import Session, select @@ -37,51 +37,26 @@ def resource_class(self) -> type[Contact]: return Contact @staticmethod - def _verify_user_roles( - session: Session, - contact: type[Contact], - user: User | None, - ) -> type[Contact]: + def _post_process( + resources: Sequence[type[Contact]], session: Session, user: User | None + ) -> Sequence[type[Contact]]: """ Only authenticated users can see the contact email. For the old drupal platform, only users with "full_view_drupal_resources" role can view the contact emails. """ - if not user or ( - (contact.platform == "drupal") and not user.has_role("full_view_drupal_resources") - ): - email_mask = "******" - # This ensures that the API doesn't break in case the email mask exists in - # in the DB - email = session.exec(select(Email).where(Email.name == email_mask)).first() - if not email: - email = Email(name=email_mask) - session.add(email) - contact.email = [email] - return contact - return contact - - def _retrieve_resource_and_check_roles( - self, - session: Session, - identifier: int | str, - user: User | None = None, - platform: str | None = None, - ) -> type[Contact]: - contact: type[Contact] = super()._retrieve_resource_and_check_roles( - session, identifier, user, platform - ) - return self._verify_user_roles(session, contact, user) - - def _retrieve_resources_and_check_roles( - self, - session: Session, - pagination: Pagination, - user: User | None = None, - platform: str | None = None, - ) -> Sequence[type[Contact]]: - contacts: Sequence[type[Contact]] = super()._retrieve_resources_and_check_roles( - session, pagination, user, platform - ) - contacts = [self._verify_user_roles(session, contact, user) for contact in contacts] + contacts = [] + for contact in resources: + if not user or ( + (contact.platform == "drupal") and not user.has_role("full_view_drupal_resources") + ): + email_mask = "******" + # This ensures that the API doesn't break in case the email mask exists in + # in the DB + email = session.exec(select(Email).where(Email.name == email_mask)).first() + if not email: + email = Email(name=email_mask) + session.add(email) + contact.email = [email] + contacts.append(contact) return contacts diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index c6a2b74a..e06d0e6f 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -22,20 +22,6 @@ def resource_name_plural(self) -> str: def resource_class(self) -> type[Person]: return Person - @staticmethod - def _verify_user_roles(person: type[Person], user: User | None) -> type[Person]: - """ - For the old drupal platform, only users with "full_view_drupal_resources" role can - see the person's sensitive information. - """ - if (person.platform == "drupal") and not ( - user and user.has_role("full_view_drupal_resources") - ): - person.name = "******" - person.given_name = "******" - person.surname = "******" - return person - @staticmethod def _post_process( resources: Sequence[type[Person]], session: Session, user: User | None From 431a3643709ede16ba14013c551ad88e7d80acae Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 30 Apr 2024 16:01:07 +0200 Subject: [PATCH 25/49] rename post_process to mask_or_filter --- src/routers/resource_router.py | 6 +++--- src/routers/resource_routers/contact_router.py | 2 +- src/routers/resource_routers/person_router.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 625b0d0e..f8e12eee 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -576,7 +576,7 @@ def _retrieve_resource_and_check_roles( implement further verification on user access to the resource. """ resource: type[RESOURCE_MODEL] = self._retrieve_resource(session, identifier, platform) - [processed_resource] = self._post_process([resource], session, user) + [processed_resource] = self._mask_or_filter([resource], session, user) return processed_resource def _retrieve_resources_and_check_roles( @@ -594,10 +594,10 @@ def _retrieve_resources_and_check_roles( resources: Sequence[type[RESOURCE_MODEL]] = self._retrieve_resources( session, pagination, platform ) - return self._post_process(resources, session, user) + return self._mask_or_filter(resources, session, user) @staticmethod - def _post_process( + def _mask_or_filter( resources: Sequence[type[RESOURCE_MODEL]], session: Session, user: User | None ) -> Sequence[type[RESOURCE_MODEL]]: """ diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index b230d3b9..d5d48feb 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -37,7 +37,7 @@ def resource_class(self) -> type[Contact]: return Contact @staticmethod - def _post_process( + def _mask_or_filter( resources: Sequence[type[Contact]], session: Session, user: User | None ) -> Sequence[type[Contact]]: """ diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index e06d0e6f..bb172074 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -23,7 +23,7 @@ def resource_class(self) -> type[Person]: return Person @staticmethod - def _post_process( + def _mask_or_filter( resources: Sequence[type[Person]], session: Session, user: User | None ) -> Sequence[type[Person]]: """ From 9df93c30969dd0541e0978f26fecf7afdc988efa Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 30 Apr 2024 16:17:31 +0200 Subject: [PATCH 26/49] Towards changing text based on workflow trigger --- .github/workflows/dockerized-tests.yml | 52 ++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 8 deletions(-) diff --git a/.github/workflows/dockerized-tests.yml b/.github/workflows/dockerized-tests.yml index 15167181..9a2f6adf 100644 --- a/.github/workflows/dockerized-tests.yml +++ b/.github/workflows/dockerized-tests.yml @@ -1,6 +1,6 @@ name: Docker - build, test, push -# This workflow will built docker image and run tests inside the container. +# This workflow will build docker image and run tests inside the container. # This workflow is only executed if there is pull request with change in pyproject.toml dependencies, # or in Dockerfile, or in docker workflow. @@ -10,9 +10,32 @@ on: - '.github/workflows/docker-image.yml' - 'pyproject.toml' - 'Dockerfile' + + push: + branches: + - 'develop' + + release: + types: [published] # allows to manually start a workflow run from the GitHub UI or using the GitHub API. workflow_dispatch: + inputs: + push-image: + description: "Push image to docker hub" + required: false + type: boolean + default: false + push-description: + description: "Update docker hub description" + required: false + type: boolean + default: false + tag: + description: "Tag for the docker image" + required: false + default: "workflow-dispatch" + jobs: build: @@ -27,11 +50,11 @@ jobs: with: context: . file: ./Dockerfile - tags: aiod/metadata_catalogue:latest + tags: aiod/metadata_catalogue:ci outputs: type=docker,dest=/tmp/aiod_mc_image.tar cache-from: type=gha cache-to: type=gha,mode=min - # We store the image as an artifact so it can be used by the `test` step, + # We store the image as an artifact, so it can be used by the `test` step # and inspected manually if needed (download it through Github Actions UI) - name: Store Image uses: actions/upload-artifact@v4 @@ -44,6 +67,7 @@ jobs: needs: [build] steps: # We need to check out the repository, so that we have the `scripts` directory to mount. + # This is required to run the backup script tests. - uses: actions/checkout@v4 - name: Retrieve Image uses: actions/download-artifact@v4 @@ -56,13 +80,23 @@ jobs: docker image ls -a - name: Run pytest from docker run: | - docker run -v ./scripts:/scripts -e KEYCLOAK_CLIENT_SECRET="mocked_secret" --entrypoint "" aiod/metadata_catalogue sh -c "pip install \".[dev]\" && pytest tests -s" + docker run -v ./scripts:/scripts -e KEYCLOAK_CLIENT_SECRET="mocked_secret" --entrypoint "" aiod/metadata_catalogue:ci sh -c "pip install \".[dev]\" && pytest tests -s" publish: - # TODO: Make conditional on being a release or manual action needs: [test] runs-on: ubuntu-latest + if: github.event_name != 'pull_request' steps: + # The correct tag depends on how this workflow was invoked, see also docker-description.md + - name: Set Develop Tag + if: github.ref == 'refs/heads/develop' + run: echo "IMAGE_TAGS=aiod/metadata_catalogue:develop" >> "$GITHUB_ENV" + - name: Set Release Tag + if: github.event_name == 'release' + run: echo "IMAGE_TAGS=aiod/metadata_catalogue:latest,aiod/metadata_catalogue:${{ github.event.release.tag_name }}" >> "$GITHUB_ENV" + - name: Set Dispatch Tag + if: github.event_name == 'workflow_dispatch' + run: echo "IMAGE_TAGS=aiod/metadata_catalogue:${{ inputs.tag }}" >> "$GITHUB_ENV" - uses: actions/checkout@v4 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -70,18 +104,20 @@ jobs: with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.AIOD_DOCKER_PAT }} + - name: Echo tags + run: echo $IMAGE_TAGS - name: Build + if: (github.event_name != 'workflow_dispatch') || inputs.push-image uses: docker/build-push-action@v5 with: push: true context: . file: ./Dockerfile - tags: | - aiod/metadata_catalogue:latest - aiod/metadata_catalogue:${{ github.sha }} + tags: ${{ env.IMAGE_TAGS }} cache-from: type=gha cache-to: type=gha,mode=min - name: Update repository description + if: (github.event_name == 'release') || inputs.push-description uses: peter-evans/dockerhub-description@v4 with: username: ${{ secrets.DOCKERHUB_USERNAME }} From cefdef5fc06f34d9dccf4c19adfb6c978288b8fc Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Tue, 30 Apr 2024 17:20:39 +0200 Subject: [PATCH 27/49] Provide information on the different tags available --- docker-description.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docker-description.md b/docker-description.md index 8190d619..70e09c0e 100644 --- a/docker-description.md +++ b/docker-description.md @@ -3,4 +3,11 @@ Image for AI on Demand's (AIOD) metadata catalogue REST API, developed on [Github](https://github.com/aiondemand/AIOD-rest-api/). This image also requires _at least_ a database setup to function, but for all features authentication (through keycloak) and search (through elastic search) also need to be configured. -Please refer to the documentation in the readme of the ["AIOD-rest-api"](https://github.com/aiondemand/AIOD-rest-api) repository. \ No newline at end of file +Please refer to the documentation in the readme of the ["AIOD-rest-api"](https://github.com/aiondemand/AIOD-rest-api) repository. + +The following tags are available: + + - `latest`: the latest official release + - `develop`: the head of the development branch + - `v*`, e.g. `v1.3.20240308`: that specific release + - a number of custom tags may be introduced for testing, but these are not intended for general use. From fce83be20ae5b4b1ede1dd5485f35418c0e45911 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 30 Apr 2024 17:35:26 +0200 Subject: [PATCH 28/49] fix mask_or_filter for contacts --- src/routers/resource_routers/contact_router.py | 15 +++------------ src/routers/resource_routers/person_router.py | 4 +--- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index d5d48feb..cc479919 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -6,7 +6,7 @@ from database.model.agent.person import Person from routers.resource_router import ResourceRouter -from sqlmodel import Session, select +from sqlmodel import Session class ContactRouter(ResourceRouter): @@ -45,18 +45,9 @@ def _mask_or_filter( For the old drupal platform, only users with "full_view_drupal_resources" role can view the contact emails. """ - contacts = [] for contact in resources: if not user or ( (contact.platform == "drupal") and not user.has_role("full_view_drupal_resources") ): - email_mask = "******" - # This ensures that the API doesn't break in case the email mask exists in - # in the DB - email = session.exec(select(Email).where(Email.name == email_mask)).first() - if not email: - email = Email(name=email_mask) - session.add(email) - contact.email = [email] - contacts.append(contact) - return contacts + contact.email = [Email(name="******")] + return resources diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index bb172074..f03a2766 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -30,7 +30,6 @@ def _mask_or_filter( For the old drupal platform, only users with "full_view_drupal_resources" role can see the person's sensitive information. """ - persons = [] for person in resources: if (person.platform == "drupal") and not ( user and user.has_role("full_view_drupal_resources") @@ -38,5 +37,4 @@ def _mask_or_filter( person.name = "******" person.given_name = "******" person.surname = "******" - persons.append(person) - return persons + return resources From 10adff6a0294f4a3074a5c0ffb66a550edc8a23e Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 1 May 2024 11:20:18 +0200 Subject: [PATCH 29/49] fix autoflush --- src/database/session.py | 4 +-- src/routers/resource_router.py | 2 +- .../resource_routers/test_router_contact.py | 25 +++++++++++++------ 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/src/database/session.py b/src/database/session.py index 49ba51f0..5f1955c6 100644 --- a/src/database/session.py +++ b/src/database/session.py @@ -38,14 +38,14 @@ def db_url(including_db=True): @contextmanager -def DbSession() -> Session: +def DbSession(autoflush: bool = True) -> Session: """ Returning a SQLModel session bound to the (configured) database engine. Alternatively, we could have used FastAPI Depends, but that only works for FastAPI - while the synchronization, for instance, also needs a Session, but doesn't use FastAPI. """ - session = Session(EngineSingleton().engine) + session = Session(EngineSingleton().engine, autoflush=autoflush) try: yield session finally: diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index f8e12eee..5d6c0c57 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -213,7 +213,7 @@ def get_resources( ): """Fetch all resources of this platform in given schema, using pagination""" _raise_error_on_invalid_schema(self._possible_schemas, schema) - with DbSession() as session: + with DbSession(autoflush=False) as session: try: convert_schema = ( partial(self.schema_converters[schema].convert, session) diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index f58533ab..1ca1477f 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -1,4 +1,5 @@ import copy +import pytest from unittest.mock import Mock from starlette.testclient import TestClient @@ -8,6 +9,7 @@ from database.model.agent.email import Email from database.model.platform.platform import Platform from database.session import DbSession +from tests.testutils.default_instances import _create_class_with_body def test_happy_path(client: TestClient, mocked_privileged_token: Mock, body_asset: dict): @@ -84,29 +86,38 @@ def test_person_and_organisation_both_specified(client: TestClient, mocked_privi assert response.json()["detail"] == "Person and organisation cannot be both filled." +@pytest.fixture +def contact2(body_concept) -> Contact: + body = copy.copy(body_concept) + body["platform_resource_identifier"] = "fake:100" + body["email"] = ["fake@email.com", "fake2@email.com"] + return _create_class_with_body(Contact, body) + + def test_email_privacy( client: TestClient, mocked_privileged_token: Mock, contact: Contact, + contact2: Contact, ): keycloak_openid.introspect = mocked_privileged_token with DbSession() as session: - email = Email(name="fake@email.com") - another_email = Email(name="fake2@email.com") - contact.email = [email, another_email] session.add(contact) + session.add(contact2) session.commit() - guest_response = client.get("/contacts/v1/1") + guest_response = client.get("/contacts/v1") assert guest_response.status_code == 200, guest_response.json() guest_response_json = guest_response.json() - assert guest_response_json["email"] == ["******"] + assert len(guest_response_json) == 2, guest_response_json + for contact_json in guest_response_json: + assert contact_json["email"] == ["******"] - response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) + response = client.get("/contacts/v1/2", headers={"Authorization": "Fake token"}) assert response.status_code == 200, response.json() response_json = response.json() - assert response_json["email"] == ["fake@email.com", "fake2@email.com"] + assert set(response_json["email"]) == {"fake2@email.com", "fake@email.com"} def test_email_privacy_for_drupal( From f1ea8aefd41b61c442f96a40fb29b734bad6af8b Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 1 May 2024 16:29:19 +0200 Subject: [PATCH 30/49] add further tests --- src/database/model/platform/platform_names.py | 1 + src/routers/resource_router.py | 12 ++- .../resource_routers/contact_router.py | 4 +- src/routers/resource_routers/person_router.py | 3 +- .../resource_routers/test_router_contact.py | 93 ++++++++++++++++--- .../resource_routers/test_router_person.py | 54 +++++++---- 6 files changed, 131 insertions(+), 36 deletions(-) diff --git a/src/database/model/platform/platform_names.py b/src/database/model/platform/platform_names.py index dadb61d4..eba71643 100644 --- a/src/database/model/platform/platform_names.py +++ b/src/database/model/platform/platform_names.py @@ -9,6 +9,7 @@ class PlatformName(str, enum.Enum): """ aiod = "aiod" + drupal = "drupal" example = "example" openml = "openml" huggingface = "huggingface" diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index 5d6c0c57..ffd85db9 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -236,7 +236,7 @@ def get_resource( """ _raise_error_on_invalid_schema(self._possible_schemas, schema) try: - with DbSession() as session: + with DbSession(autoflush=False) as session: resource: Any = self._retrieve_resource_and_check_roles( session, identifier, user, platform=platform ) @@ -321,8 +321,11 @@ def get_resources( ], pagination: Annotated[Pagination, Depends(Pagination)], schema: self._possible_schemas_type = "aiod", # type:ignore + user: User | None = Depends(get_user_or_none), ): - resources = self.get_resources(pagination=pagination, schema=schema, platform=platform) + resources = self.get_resources( + pagination=pagination, schema=schema, user=user, platform=platform + ) return resources return get_resources @@ -368,8 +371,11 @@ def get_resource( ), ], schema: self._possible_schemas_type = "aiod", # type:ignore + user: User | None = Depends(get_user_or_none), ): - return self.get_resource(identifier=identifier, schema=schema, platform=platform) + return self.get_resource( + identifier=identifier, schema=schema, user=user, platform=platform + ) return get_resource diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index cc479919..bb39cfd3 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -4,6 +4,7 @@ from database.model.agent.email import Email from database.model.agent.organisation import Organisation from database.model.agent.person import Person +from database.model.platform.platform_names import PlatformName from routers.resource_router import ResourceRouter from sqlmodel import Session @@ -47,7 +48,8 @@ def _mask_or_filter( """ for contact in resources: if not user or ( - (contact.platform == "drupal") and not user.has_role("full_view_drupal_resources") + (contact.platform == PlatformName.drupal) + and not user.has_role("full_view_drupal_resources") ): contact.email = [Email(name="******")] return resources diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index f03a2766..0f5e8e2c 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -1,6 +1,7 @@ from typing import Sequence from sqlmodel import Session from database.model.agent.person import Person +from database.model.platform.platform_names import PlatformName from routers.resource_router import ResourceRouter from authentication import User @@ -31,7 +32,7 @@ def _mask_or_filter( see the person's sensitive information. """ for person in resources: - if (person.platform == "drupal") and not ( + if (person.platform == PlatformName.drupal) and not ( user and user.has_role("full_view_drupal_resources") ): person.name = "******" diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index 1ca1477f..a97f1007 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -94,11 +94,24 @@ def contact2(body_concept) -> Contact: return _create_class_with_body(Contact, body) -def test_email_privacy( +@pytest.fixture( + params=[ + "/contacts/v1", + "/contacts/v1/1", + "/platforms/example/contacts/v1", + "/platforms/example/contacts/v1/fake:100", + ] +) +def endpoint_from_fixture1(request) -> str: + return request.param + + +def test_email_mask_for_not_authenticated_user( client: TestClient, mocked_privileged_token: Mock, contact: Contact, contact2: Contact, + endpoint_from_fixture1: str, ): keycloak_openid.introspect = mocked_privileged_token @@ -107,18 +120,67 @@ def test_email_privacy( session.add(contact2) session.commit() - guest_response = client.get("/contacts/v1") + guest_response = client.get(endpoint_from_fixture1) assert guest_response.status_code == 200, guest_response.json() - guest_response_json = guest_response.json() - assert len(guest_response_json) == 2, guest_response_json + guest_response_json = [guest_response.json()] + guest_response_json = ( + guest_response_json[0] if isinstance(guest_response_json[0], list) else guest_response_json + ) + assert len(guest_response_json) > 0, guest_response_json for contact_json in guest_response_json: assert contact_json["email"] == ["******"] - response = client.get("/contacts/v1/2", headers={"Authorization": "Fake token"}) + +def test_email_mask_for_authenticated_user( + client: TestClient, + mocked_privileged_token: Mock, + contact: Contact, + contact2: Contact, +): + keycloak_openid.introspect = mocked_privileged_token + headers = {"Authorization": "Fake token"} + + with DbSession() as session: + session.add(contact) + session.add(contact2) + session.commit() + + guest_response = client.get("/contacts/v1", headers=headers) + guest_response_json = guest_response.json() + assert guest_response.status_code == 200, guest_response_json + assert len(guest_response_json) == 2, guest_response_json + assert guest_response_json[0]["email"] == ["a@b.com"] + assert set(guest_response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} + + response = client.get("/contacts/v1/2", headers=headers) assert response.status_code == 200, response.json() response_json = response.json() assert set(response_json["email"]) == {"fake2@email.com", "fake@email.com"} + guest_response = client.get("/platforms/example/contacts/v1", headers=headers) + guest_response_json = guest_response.json() + assert guest_response.status_code == 200, guest_response_json + assert len(guest_response_json) == 2, guest_response_json + assert guest_response_json[0]["email"] == ["a@b.com"] + assert set(guest_response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} + + guest_response = client.get("/platforms/example/contacts/v1/fake:100", headers=headers) + guest_response_json = guest_response.json() + assert guest_response.status_code == 200, guest_response_json + assert set(guest_response_json["email"]) == {"fake2@email.com", "fake@email.com"} + + +@pytest.fixture( + params=[ + "/contacts/v1", + "/contacts/v1/1", + "/platforms/drupal/contacts/v1", + "/platforms/drupal/contacts/v1/fake:100", + ] +) +def endpoint_from_fixture2(request) -> str: + return request.param + def test_email_privacy_for_drupal( client: TestClient, @@ -126,12 +188,12 @@ def test_email_privacy_for_drupal( mocked_drupal_token: Mock, contact: Contact, platform: Platform, + endpoint_from_fixture2: str, ): with DbSession() as session: - platform.name = "drupal" - session.add(platform) contact.platform = "drupal" + contact.platform_resource_identifier = "fake:100" email = Email(name="fake@email.com") another_email = Email(name="fake2@email.com") contact.email = [email, another_email] @@ -139,15 +201,24 @@ def test_email_privacy_for_drupal( session.commit() keycloak_openid.introspect = mocked_privileged_token + headers = {"Authorization": "Fake token"} - response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) - assert response.status_code == 200, response.json() + response = client.get(endpoint_from_fixture2, headers=headers) response_json = response.json() + if isinstance(response_json, list): + response_json = response_json[0] + + assert response.status_code == 200, response_json + assert len(response_json) > 0, response_json assert response_json["email"] == ["******"] keycloak_openid.introspect = mocked_drupal_token - response = client.get("/contacts/v1/1", headers={"Authorization": "Fake token"}) - assert response.status_code == 200, response.json() + response = client.get(endpoint_from_fixture2, headers=headers) response_json = response.json() + if isinstance(response_json, list): + response_json = response_json[0] + + assert response.status_code == 200, response_json + assert len(response_json) > 0, response_json assert response_json["email"] == ["fake@email.com", "fake2@email.com"] diff --git a/src/tests/routers/resource_routers/test_router_person.py b/src/tests/routers/resource_routers/test_router_person.py index 29716e32..07f17a9b 100644 --- a/src/tests/routers/resource_routers/test_router_person.py +++ b/src/tests/routers/resource_routers/test_router_person.py @@ -1,6 +1,7 @@ import copy from unittest.mock import Mock +import pytest from starlette.testclient import TestClient from authentication import keycloak_openid @@ -51,6 +52,18 @@ def test_happy_path( assert response_json["contact_details"] == 1 +@pytest.fixture( + params=[ + "/persons/v1", + "/persons/v1/1", + "/platforms/drupal/persons/v1", + "/platforms/drupal/persons/v1/2", + ] +) +def endpoint(request) -> str: + return request.param + + def test_privacy_for_drupal( client: TestClient, mocked_privileged_token: Mock, @@ -58,6 +71,7 @@ def test_privacy_for_drupal( platform: Platform, person: Person, contact: Contact, + endpoint: str, ): """Test to ensure that only authenticated users with "full_view_drupal_resources" role can visualise fields such as name, given_name and surname of a person migrated from @@ -65,8 +79,6 @@ def test_privacy_for_drupal( """ with DbSession() as session: - platform.name = "drupal" - session.add(platform) person.platform = "drupal" person.platform_resource_identifier = "2" person.name = "Joe Doe" @@ -76,22 +88,24 @@ def test_privacy_for_drupal( session.add(contact) session.commit() - keycloak_openid.introspect = mocked_privileged_token - - response = client.get("/persons/v1/1") - assert response.status_code == 200, response.json() - - response_json = response.json() - assert response_json["name"] == "******" - assert response_json["given_name"] == "******" - assert response_json["surname"] == "******" - - keycloak_openid.introspect = mocked_drupal_token - - response = client.get("/persons/v1/1", headers={"Authorization": "Fake token"}) - assert response.status_code == 200, response.json() + headers = {"Authorization": "Fake token"} + keycloak_openid.introspect = mocked_privileged_token - response_json = response.json() - assert response_json["name"] == "Joe Doe" - assert response_json["given_name"] == "Joe" - assert response_json["surname"] == "Doe" + response = client.get(endpoint, headers=headers) + response_json = response.json() + response_json = [response_json] if isinstance(response_json, dict) else response_json + assert response.status_code == 200, response_json + for person_dict in response_json: + assert person_dict["name"] == "******" + assert person_dict["given_name"] == "******" + assert person_dict["surname"] == "******" + + keycloak_openid.introspect = mocked_drupal_token + response = client.get(endpoint, headers=headers) + response_json = response.json() + response_json = [response_json] if isinstance(response_json, dict) else response_json + assert response.status_code == 200, response_json + for person_dict in response_json: + assert person_dict["name"] == "Joe Doe" + assert person_dict["given_name"] == "Joe" + assert person_dict["surname"] == "Doe" From 4ad425ffe770edb011e91930e68149a81e4869e3 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 1 May 2024 17:23:03 +0200 Subject: [PATCH 31/49] rename method and increased time on test since it's been randomnly failing only when triggered by pre-commit --- src/routers/resource_router.py | 8 ++++---- .../test_router_dataset_generic_fields.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/routers/resource_router.py b/src/routers/resource_router.py index ffd85db9..47da64eb 100644 --- a/src/routers/resource_router.py +++ b/src/routers/resource_router.py @@ -220,7 +220,7 @@ def get_resources( if schema != "aiod" else self.resource_class_read.from_orm ) - resources: Any = self._retrieve_resources_and_check_roles( + resources: Any = self._retrieve_resources_and_post_process( session, pagination, user, platform ) return self._wrap_with_headers([convert_schema(resource) for resource in resources]) @@ -237,7 +237,7 @@ def get_resource( _raise_error_on_invalid_schema(self._possible_schemas, schema) try: with DbSession(autoflush=False) as session: - resource: Any = self._retrieve_resource_and_check_roles( + resource: Any = self._retrieve_resource_and_post_process( session, identifier, user, platform=platform ) if schema != "aiod": @@ -569,7 +569,7 @@ def _retrieve_resources( resources: Sequence = session.scalars(query).all() return resources - def _retrieve_resource_and_check_roles( + def _retrieve_resource_and_post_process( self, session: Session, identifier: int | str, @@ -585,7 +585,7 @@ def _retrieve_resource_and_check_roles( [processed_resource] = self._mask_or_filter([resource], session, user) return processed_resource - def _retrieve_resources_and_check_roles( + def _retrieve_resources_and_post_process( self, session: Session, pagination: Pagination, diff --git a/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py b/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py index fb354d2c..9f00cf0e 100644 --- a/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py +++ b/src/tests/routers/resource_routers/test_router_dataset_generic_fields.py @@ -134,8 +134,8 @@ def test_happy_path( date_created = dateutil.parser.parse(response_json["aiod_entry"]["date_created"] + "Z") date_modified = dateutil.parser.parse(response_json["aiod_entry"]["date_modified"] + "Z") - assert 0 < (date_created - datetime_create_request).total_seconds() < 0.1 - assert 0 < (date_modified - datetime_update_request).total_seconds() < 0.1 + assert 0 < (date_created - datetime_create_request).total_seconds() < 0.2 + assert 0 < (date_modified - datetime_update_request).total_seconds() < 0.2 assert response_json["platform"] == "example" assert response_json["platform_resource_identifier"] == "2" From c86a3cb2c4992d876c865e65f6480b66839d59bf Mon Sep 17 00:00:00 2001 From: Pieter Gijsbers Date: Thu, 2 May 2024 16:14:32 +0200 Subject: [PATCH 32/49] Update docker-description.md Co-authored-by: Jean Matias --- docker-description.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker-description.md b/docker-description.md index 70e09c0e..5d5676fd 100644 --- a/docker-description.md +++ b/docker-description.md @@ -1,9 +1,7 @@ # AIOD Metadata Catalogue Image for AI on Demand's (AIOD) metadata catalogue REST API, developed on [Github](https://github.com/aiondemand/AIOD-rest-api/). -This image also requires _at least_ a database setup to function, but for all features authentication (through keycloak) and -search (through elastic search) also need to be configured. -Please refer to the documentation in the readme of the ["AIOD-rest-api"](https://github.com/aiondemand/AIOD-rest-api) repository. +This image requires a properly configured database setup to function. Additionally, to have all features working, authentication (via Keycloak) and search (through Elasticsearch) must also be configured. Please refer to the documentation available in the README of the ["AIOD-rest-api"](https://github.com/aiondemand/AIOD-rest-api) repository. The following tags are available: From d3bda35a08e413cb33c7e0a6a5904dd28848dfad Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 8 May 2024 12:37:52 +0200 Subject: [PATCH 33/49] Update src/tests/routers/resource_routers/test_router_contact.py Co-authored-by: Pieter Gijsbers --- src/tests/routers/resource_routers/test_router_contact.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index a97f1007..a82fb542 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -122,10 +122,9 @@ def test_email_mask_for_not_authenticated_user( guest_response = client.get(endpoint_from_fixture1) assert guest_response.status_code == 200, guest_response.json() - guest_response_json = [guest_response.json()] - guest_response_json = ( - guest_response_json[0] if isinstance(guest_response_json[0], list) else guest_response_json - ) + guest_response_json = guest_response.json() + if not isinstance(guest_response_json, list): + guest_response_json = [guest_response_json] assert len(guest_response_json) > 0, guest_response_json for contact_json in guest_response_json: assert contact_json["email"] == ["******"] From bb208e1c71ba5726c7436d3bd7c5b80b8cbd194a Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 8 May 2024 12:49:32 +0200 Subject: [PATCH 34/49] rename guest_reponse to response in test_router_contact --- .../resource_routers/test_router_contact.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index a82fb542..34168386 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -144,29 +144,29 @@ def test_email_mask_for_authenticated_user( session.add(contact2) session.commit() - guest_response = client.get("/contacts/v1", headers=headers) - guest_response_json = guest_response.json() - assert guest_response.status_code == 200, guest_response_json - assert len(guest_response_json) == 2, guest_response_json - assert guest_response_json[0]["email"] == ["a@b.com"] - assert set(guest_response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} + response = client.get("/contacts/v1", headers=headers) + response_json = response.json() + assert response.status_code == 200, response_json + assert len(response_json) == 2, response_json + assert response_json[0]["email"] == ["a@b.com"] + assert set(response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} response = client.get("/contacts/v1/2", headers=headers) assert response.status_code == 200, response.json() response_json = response.json() assert set(response_json["email"]) == {"fake2@email.com", "fake@email.com"} - guest_response = client.get("/platforms/example/contacts/v1", headers=headers) - guest_response_json = guest_response.json() - assert guest_response.status_code == 200, guest_response_json - assert len(guest_response_json) == 2, guest_response_json - assert guest_response_json[0]["email"] == ["a@b.com"] - assert set(guest_response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} + response = client.get("/platforms/example/contacts/v1", headers=headers) + response_json = response.json() + assert response.status_code == 200, response_json + assert len(response_json) == 2, response_json + assert response_json[0]["email"] == ["a@b.com"] + assert set(response_json[1]["email"]) == {"fake2@email.com", "fake@email.com"} - guest_response = client.get("/platforms/example/contacts/v1/fake:100", headers=headers) - guest_response_json = guest_response.json() - assert guest_response.status_code == 200, guest_response_json - assert set(guest_response_json["email"]) == {"fake2@email.com", "fake@email.com"} + response = client.get("/platforms/example/contacts/v1/fake:100", headers=headers) + response_json = response.json() + assert response.status_code == 200, response_json + assert set(response_json["email"]) == {"fake2@email.com", "fake@email.com"} @pytest.fixture( From 6eebd015faf0afd7d9a73ef2870de0658de6d3a4 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 9 May 2024 11:00:12 +0200 Subject: [PATCH 35/49] rename drupal in all files --- src/database/model/platform/platform_names.py | 2 +- src/routers/resource_routers/contact_router.py | 6 +++--- src/routers/resource_routers/person_router.py | 8 ++++---- .../resource_routers/test_router_contact.py | 12 ++++++------ .../resource_routers/test_router_person.py | 16 ++++++++-------- src/tests/testutils/default_sqlalchemy.py | 4 ++-- 6 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/database/model/platform/platform_names.py b/src/database/model/platform/platform_names.py index eba71643..c43fbc6e 100644 --- a/src/database/model/platform/platform_names.py +++ b/src/database/model/platform/platform_names.py @@ -9,7 +9,7 @@ class PlatformName(str, enum.Enum): """ aiod = "aiod" - drupal = "drupal" + ai4europe_cms = "ai4europe_cms" example = "example" openml = "openml" huggingface = "huggingface" diff --git a/src/routers/resource_routers/contact_router.py b/src/routers/resource_routers/contact_router.py index bb39cfd3..e6a19b9c 100644 --- a/src/routers/resource_routers/contact_router.py +++ b/src/routers/resource_routers/contact_router.py @@ -43,13 +43,13 @@ def _mask_or_filter( ) -> Sequence[type[Contact]]: """ Only authenticated users can see the contact email. - For the old drupal platform, only users with "full_view_drupal_resources" role + For the old ai4europe_cms platform, only users with "full_view_ai4europe_cms_resources" role can view the contact emails. """ for contact in resources: if not user or ( - (contact.platform == PlatformName.drupal) - and not user.has_role("full_view_drupal_resources") + (contact.platform == PlatformName.ai4europe_cms) + and not user.has_role("full_view_ai4europe_cms_resources") ): contact.email = [Email(name="******")] return resources diff --git a/src/routers/resource_routers/person_router.py b/src/routers/resource_routers/person_router.py index 0f5e8e2c..e2d6b7a9 100644 --- a/src/routers/resource_routers/person_router.py +++ b/src/routers/resource_routers/person_router.py @@ -28,12 +28,12 @@ def _mask_or_filter( resources: Sequence[type[Person]], session: Session, user: User | None ) -> Sequence[type[Person]]: """ - For the old drupal platform, only users with "full_view_drupal_resources" role can - see the person's sensitive information. + For the old ai4europe_cms platform, only users with "full_view_ai4europe_cms_resources" + role can see the person's sensitive information. """ for person in resources: - if (person.platform == PlatformName.drupal) and not ( - user and user.has_role("full_view_drupal_resources") + if (person.platform == PlatformName.ai4europe_cms) and not ( + user and user.has_role("full_view_ai4europe_cms_resources") ): person.name = "******" person.given_name = "******" diff --git a/src/tests/routers/resource_routers/test_router_contact.py b/src/tests/routers/resource_routers/test_router_contact.py index 34168386..5931ab4d 100644 --- a/src/tests/routers/resource_routers/test_router_contact.py +++ b/src/tests/routers/resource_routers/test_router_contact.py @@ -173,25 +173,25 @@ def test_email_mask_for_authenticated_user( params=[ "/contacts/v1", "/contacts/v1/1", - "/platforms/drupal/contacts/v1", - "/platforms/drupal/contacts/v1/fake:100", + "/platforms/ai4europe_cms/contacts/v1", + "/platforms/ai4europe_cms/contacts/v1/fake:100", ] ) def endpoint_from_fixture2(request) -> str: return request.param -def test_email_privacy_for_drupal( +def test_email_privacy_for_ai4europe_cms( client: TestClient, mocked_privileged_token: Mock, - mocked_drupal_token: Mock, + mocked_ai4europe_cms_token: Mock, contact: Contact, platform: Platform, endpoint_from_fixture2: str, ): with DbSession() as session: - contact.platform = "drupal" + contact.platform = "ai4europe_cms" contact.platform_resource_identifier = "fake:100" email = Email(name="fake@email.com") another_email = Email(name="fake2@email.com") @@ -211,7 +211,7 @@ def test_email_privacy_for_drupal( assert len(response_json) > 0, response_json assert response_json["email"] == ["******"] - keycloak_openid.introspect = mocked_drupal_token + keycloak_openid.introspect = mocked_ai4europe_cms_token response = client.get(endpoint_from_fixture2, headers=headers) response_json = response.json() diff --git a/src/tests/routers/resource_routers/test_router_person.py b/src/tests/routers/resource_routers/test_router_person.py index 07f17a9b..5a810294 100644 --- a/src/tests/routers/resource_routers/test_router_person.py +++ b/src/tests/routers/resource_routers/test_router_person.py @@ -56,30 +56,30 @@ def test_happy_path( params=[ "/persons/v1", "/persons/v1/1", - "/platforms/drupal/persons/v1", - "/platforms/drupal/persons/v1/2", + "/platforms/ai4europe_cms/persons/v1", + "/platforms/ai4europe_cms/persons/v1/2", ] ) def endpoint(request) -> str: return request.param -def test_privacy_for_drupal( +def test_privacy_for_ai4europe_cms( client: TestClient, mocked_privileged_token: Mock, - mocked_drupal_token: Mock, + mocked_ai4europe_cms_token: Mock, platform: Platform, person: Person, contact: Contact, endpoint: str, ): - """Test to ensure that only authenticated users with "full_view_drupal_resources" role + """Test to ensure that only authenticated users with "full_view_ai4europe_cms_resources" role can visualise fields such as name, given_name and surname of a person migrated from - the old drupal platform. + the old ai4europe_cms platform. """ with DbSession() as session: - person.platform = "drupal" + person.platform = "ai4europe_cms" person.platform_resource_identifier = "2" person.name = "Joe Doe" person.given_name = "Joe" @@ -100,7 +100,7 @@ def test_privacy_for_drupal( assert person_dict["given_name"] == "******" assert person_dict["surname"] == "******" - keycloak_openid.introspect = mocked_drupal_token + keycloak_openid.introspect = mocked_ai4europe_cms_token response = client.get(endpoint, headers=headers) response_json = response.json() response_json = [response_json] if isinstance(response_json, dict) else response_json diff --git a/src/tests/testutils/default_sqlalchemy.py b/src/tests/testutils/default_sqlalchemy.py index 982e11a8..22f55f34 100644 --- a/src/tests/testutils/default_sqlalchemy.py +++ b/src/tests/testutils/default_sqlalchemy.py @@ -144,11 +144,11 @@ def mocked_privileged_token() -> Mock: @pytest.fixture() -def mocked_drupal_token() -> Mock: +def mocked_ai4europe_cms_token() -> Mock: roles = [ "offline_access", "uma_authorization", "default-roles-aiod", - "full_view_drupal_resources", + "full_view_ai4europe_cms_resources", ] return Mock(return_value=_user_with_roles(*roles)) From 6810c9c39b947b47c06fea5c0459b2321ed93f44 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 14 May 2024 14:08:55 +0100 Subject: [PATCH 36/49] change version tags --- authentication/Dockerfile | 4 ++-- docker-compose.yaml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/authentication/Dockerfile b/authentication/Dockerfile index 471835b6..eb385036 100644 --- a/authentication/Dockerfile +++ b/authentication/Dockerfile @@ -1,4 +1,4 @@ -FROM quay.io/keycloak/keycloak:latest as builder +FROM quay.io/keycloak/keycloak:24.0.4 as builder # Enable health and metrics support ENV KC_HEALTH_ENABLED=true @@ -12,7 +12,7 @@ WORKDIR /opt/keycloak #RUN keytool -genkeypair -storepass password -storetype PKCS12 -keyalg RSA -keysize 2048 -dname "CN=server" -alias server -ext "SAN:c=DNS:localhost,IP:127.0.0.1" -keystore conf/server.keystore #RUN /opt/keycloak/bin/kc.sh build -FROM quay.io/keycloak/keycloak:latest +FROM quay.io/keycloak/keycloak:24.0.4 COPY --from=builder /opt/keycloak/ /opt/keycloak/ # change these values to point to a running postgres instance diff --git a/docker-compose.yaml b/docker-compose.yaml index de3ba673..33fbd8ab 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -121,7 +121,7 @@ services: condition: service_healthy sqlserver: - image: mysql + image: mysql:8.4.0 container_name: sqlserver env_file: .env environment: @@ -137,7 +137,7 @@ services: retries: 30 keycloak: - image: quay.io/keycloak/keycloak + image: quay.io/keycloak/keycloak:24.0.4 container_name: keycloak env_file: .env environment: @@ -157,7 +157,7 @@ services: --import-realm nginx: - image: nginx + image: nginx:1.25.5 container_name: nginx restart: unless-stopped volumes: From cd2926b10236af0d4bd9c17649eb86dc3f1cd52c Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Tue, 14 May 2024 14:18:04 +0100 Subject: [PATCH 37/49] change mysql version tag --- docker-compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 33fbd8ab..3b6e9c43 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -121,7 +121,7 @@ services: condition: service_healthy sqlserver: - image: mysql:8.4.0 + image: mysql:8.3.0 container_name: sqlserver env_file: .env environment: From 8f199b658c434059cc0096d45327d0fae6814431 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Mon, 27 May 2024 18:10:57 +0200 Subject: [PATCH 38/49] Platform added to elastic search --- src/setup/es_setup/definitions.py | 1 + src/setup/logstash_setup/templates/sql_init.py | 1 + src/setup/logstash_setup/templates/sql_sync.py | 1 + 3 files changed, 3 insertions(+) diff --git a/src/setup/es_setup/definitions.py b/src/setup/es_setup/definitions.py index d37a807c..7fa48416 100755 --- a/src/setup/es_setup/definitions.py +++ b/src/setup/es_setup/definitions.py @@ -4,6 +4,7 @@ "date_modified": {"type": "date"}, "identifier": {"type": "long"}, "name": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, + "platform": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, "description_plain": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, "description_html": {"type": "text", "fields": {"keyword": {"type": "keyword"}}}, } diff --git a/src/setup/logstash_setup/templates/sql_init.py b/src/setup/logstash_setup/templates/sql_init.py index 7c16dd72..0e594110 100755 --- a/src/setup/logstash_setup/templates/sql_init.py +++ b/src/setup/logstash_setup/templates/sql_init.py @@ -1,6 +1,7 @@ TEMPLATE_SQL_INIT = """SELECT {{entity_name}}.identifier, {{entity_name}}.name, + {{entity_name}}.platform, text.plain as 'description_plain', text.html as 'description_html', aiod_entry.date_modified{{extra_fields}} diff --git a/src/setup/logstash_setup/templates/sql_sync.py b/src/setup/logstash_setup/templates/sql_sync.py index d2e532e1..e62a0582 100755 --- a/src/setup/logstash_setup/templates/sql_sync.py +++ b/src/setup/logstash_setup/templates/sql_sync.py @@ -1,6 +1,7 @@ TEMPLATE_SQL_SYNC = """SELECT {{entity_name}}.identifier, {{entity_name}}.name, + {{entity_name}}.platform, text.plain as 'description_plain', text.html as 'description_html', aiod_entry.date_modified{{extra_fields}} From 913967cd10d4436a8d735cf5a1f0a8c39db106aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n?= Date: Wed, 24 Apr 2024 15:47:30 +0200 Subject: [PATCH 39/49] Platform added to search routers tests --- src/tests/resources/elasticsearch/dataset_search.json | 1 + src/tests/resources/elasticsearch/event_search.json | 1 + src/tests/resources/elasticsearch/experiment_search.json | 1 + src/tests/resources/elasticsearch/ml_model_search.json | 1 + src/tests/resources/elasticsearch/news_search.json | 1 + src/tests/resources/elasticsearch/organisation_search.json | 1 + src/tests/resources/elasticsearch/project_search.json | 1 + src/tests/resources/elasticsearch/publication_search.json | 1 + src/tests/resources/elasticsearch/service_search.json | 1 + src/tests/routers/search_routers/test_search_routers.py | 1 + 10 files changed, 10 insertions(+) diff --git a/src/tests/resources/elasticsearch/dataset_search.json b/src/tests/resources/elasticsearch/dataset_search.json index b797082d..e7c2012a 100644 --- a/src/tests/resources/elasticsearch/dataset_search.json +++ b/src/tests/resources/elasticsearch/dataset_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "issn" : "20493630", "type" : "dataset", diff --git a/src/tests/resources/elasticsearch/event_search.json b/src/tests/resources/elasticsearch/event_search.json index 13fd6135..add5a17f 100644 --- a/src/tests/resources/elasticsearch/event_search.json +++ b/src/tests/resources/elasticsearch/event_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "event", "description_html" : "An html description." diff --git a/src/tests/resources/elasticsearch/experiment_search.json b/src/tests/resources/elasticsearch/experiment_search.json index 5c4a815b..e2f4e657 100644 --- a/src/tests/resources/elasticsearch/experiment_search.json +++ b/src/tests/resources/elasticsearch/experiment_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "experiment", "description_html" : "An html description." diff --git a/src/tests/resources/elasticsearch/ml_model_search.json b/src/tests/resources/elasticsearch/ml_model_search.json index de60e1b0..9d28884d 100644 --- a/src/tests/resources/elasticsearch/ml_model_search.json +++ b/src/tests/resources/elasticsearch/ml_model_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "ml_model", "description_html" : "An html description." diff --git a/src/tests/resources/elasticsearch/news_search.json b/src/tests/resources/elasticsearch/news_search.json index d3d8f0ac..098ade52 100644 --- a/src/tests/resources/elasticsearch/news_search.json +++ b/src/tests/resources/elasticsearch/news_search.json @@ -23,6 +23,7 @@ "headline" : "A headline.", "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "news", "description_html" : "An html description.", diff --git a/src/tests/resources/elasticsearch/organisation_search.json b/src/tests/resources/elasticsearch/organisation_search.json index a0540021..ebf21542 100644 --- a/src/tests/resources/elasticsearch/organisation_search.json +++ b/src/tests/resources/elasticsearch/organisation_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "organisation", "description_html" : "An html description.", diff --git a/src/tests/resources/elasticsearch/project_search.json b/src/tests/resources/elasticsearch/project_search.json index f16a0422..04166363 100644 --- a/src/tests/resources/elasticsearch/project_search.json +++ b/src/tests/resources/elasticsearch/project_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "project", "description_html" : "An html description." diff --git a/src/tests/resources/elasticsearch/publication_search.json b/src/tests/resources/elasticsearch/publication_search.json index 98b61157..56afa65f 100644 --- a/src/tests/resources/elasticsearch/publication_search.json +++ b/src/tests/resources/elasticsearch/publication_search.json @@ -22,6 +22,7 @@ "identifier" : 1, "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "issn" : "20493630", "type" : "publication", diff --git a/src/tests/resources/elasticsearch/service_search.json b/src/tests/resources/elasticsearch/service_search.json index 796f57c0..50f119ee 100644 --- a/src/tests/resources/elasticsearch/service_search.json +++ b/src/tests/resources/elasticsearch/service_search.json @@ -23,6 +23,7 @@ "slogan" : "A slogan.", "date_modified" : "2023-09-01T00:00:00.000Z", "name" : "A name.", + "platform" : "A platform.", "description_plain" : "A plain text description.", "type" : "service", "description_html" : "An html description." diff --git a/src/tests/routers/search_routers/test_search_routers.py b/src/tests/routers/search_routers/test_search_routers.py index 39deadf3..810e72e3 100644 --- a/src/tests/routers/search_routers/test_search_routers.py +++ b/src/tests/routers/search_routers/test_search_routers.py @@ -24,6 +24,7 @@ def test_search_happy_path(client: TestClient, search_router): assert resource["identifier"] == 1 assert resource["name"] == "A name." + assert resource["platform"] == "A platform." assert resource["description"]["plain"] == "A plain text description." assert resource["description"]["html"] == "An html description." assert resource["aiod_entry"]["date_modified"] == "2023-09-01T00:00:00+00:00" From f9fbde5d275b2a043b436c3beea9ef57c278c6cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n?= Date: Thu, 25 Apr 2024 13:10:16 +0200 Subject: [PATCH 40/49] Entity educational_resources added to search functionality --- src/routers/search_routers/__init__.py | 2 ++ .../search_router_educational_resources.py | 20 +++++++++++++++++++ src/setup/logstash_setup/templates/sql_rm.py | 4 +++- .../logstash_setup/templates/sql_sync.py | 2 +- 4 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 src/routers/search_routers/search_router_educational_resources.py diff --git a/src/routers/search_routers/__init__.py b/src/routers/search_routers/__init__.py index 7dcceb07..6399ca85 100644 --- a/src/routers/search_routers/__init__.py +++ b/src/routers/search_routers/__init__.py @@ -1,4 +1,5 @@ from .search_router_datasets import SearchRouterDatasets +from .search_router_educational_resources import SearchRouterEducationalResources from .search_router_events import SearchRouterEvents from .search_router_experiments import SearchRouterExperiments from .search_router_ml_models import SearchRouterMLModels @@ -11,6 +12,7 @@ router_list: list[SearchRouter] = [ SearchRouterDatasets(), + SearchRouterEducationalResources(), SearchRouterEvents(), SearchRouterExperiments(), SearchRouterMLModels(), diff --git a/src/routers/search_routers/search_router_educational_resources.py b/src/routers/search_routers/search_router_educational_resources.py new file mode 100644 index 00000000..517c2503 --- /dev/null +++ b/src/routers/search_routers/search_router_educational_resources.py @@ -0,0 +1,20 @@ +from database.model.educational_resource.educational_resource import EducationalResource +from routers.search_router import SearchRouter + + +class SearchRouterEducationalResources(SearchRouter[EducationalResource]): + @property + def es_index(self) -> str: + return "educational_resource" + + @property + def resource_name_plural(self) -> str: + return "educational_resources" + + @property + def resource_class(self): + return EducationalResource + + @property + def indexed_fields(self): + return {"name", "description_plain", "description_html"} diff --git a/src/setup/logstash_setup/templates/sql_rm.py b/src/setup/logstash_setup/templates/sql_rm.py index 335f9da3..7cfce100 100755 --- a/src/setup/logstash_setup/templates/sql_rm.py +++ b/src/setup/logstash_setup/templates/sql_rm.py @@ -1,4 +1,6 @@ -TEMPLATE_SQL_RM = """SELECT {{entity_name}}.identifier +TEMPLATE_SQL_RM = """SELECT + {{entity_name}}.identifier, + {{entity_name}}.date_deleted FROM aiod.{{entity_name}} WHERE aiod.{{entity_name}}.date_deleted IS NOT NULL AND aiod.{{entity_name}}.date_deleted > :sql_last_value diff --git a/src/setup/logstash_setup/templates/sql_sync.py b/src/setup/logstash_setup/templates/sql_sync.py index e62a0582..1475143c 100755 --- a/src/setup/logstash_setup/templates/sql_sync.py +++ b/src/setup/logstash_setup/templates/sql_sync.py @@ -4,7 +4,7 @@ {{entity_name}}.platform, text.plain as 'description_plain', text.html as 'description_html', - aiod_entry.date_modified{{extra_fields}} + aiod_entry.date_modified as 'date_modified'{{extra_fields}} FROM aiod.{{entity_name}} INNER JOIN aiod.aiod_entry ON aiod.{{entity_name}}.aiod_entry_identifier=aiod.aiod_entry.identifier LEFT JOIN aiod.text ON aiod.{{entity_name}}.description_identifier=aiod.text.identifier From 3a48024fed4c50600b085f90ff84bfec6cb4cc58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n?= Date: Thu, 25 Apr 2024 13:27:47 +0200 Subject: [PATCH 41/49] Entity educational_resources added to search router tests --- .../educational_resource_search.json | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 src/tests/resources/elasticsearch/educational_resource_search.json diff --git a/src/tests/resources/elasticsearch/educational_resource_search.json b/src/tests/resources/elasticsearch/educational_resource_search.json new file mode 100644 index 00000000..8e6cbf8e --- /dev/null +++ b/src/tests/resources/elasticsearch/educational_resource_search.json @@ -0,0 +1,36 @@ +{ + "took" : 1, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : null, + "hits" : [ + { + "_index" : "educational_resource", + "_id" : "educational_resource_1", + "_score" : null, + "_source" : { + "identifier" : 1, + "date_modified" : "2023-09-01T00:00:00.000Z", + "name" : "A name.", + "platform" : "A platform.", + "description_plain" : "A plain text description.", + "description_html" : "An html description.", + "type" : "educational_resource" + }, + "sort" : [ + 1 + ] + } + ] + } +} From 76e095d53873fc418b59d060df43924d46f6e560 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 14:47:47 +0200 Subject: [PATCH 42/49] bugfix huggingface validator --- .../validators/huggingface_validators.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/database/validators/huggingface_validators.py b/src/database/validators/huggingface_validators.py index 33203cfa..d37fb094 100644 --- a/src/database/validators/huggingface_validators.py +++ b/src/database/validators/huggingface_validators.py @@ -1,4 +1,5 @@ import re +from huggingface_hub.utils import validate_repo_id REPO_ID_ILLEGAL_CHARACTERS = re.compile(r"[^0-9a-zA-Z-_./]+") MSG_PREFIX = "The platform_resource_identifier for HuggingFace should be a valid repo_id. " @@ -18,19 +19,5 @@ def throw_error_on_invalid_identifier(platform_resource_identifier: str): https://huggingface.co/docs/huggingface_hub/package_reference/utilities#huggingface_hub.utils.validate_repo_id """ repo_id = platform_resource_identifier - if REPO_ID_ILLEGAL_CHARACTERS.search(repo_id): - msg = "A repo_id should only contain [a-zA-Z0-9] or ”-”, ”_”, ”.”" - raise ValueError(MSG_PREFIX + msg) - if not (1 < len(repo_id) < 96): - msg = "A repo_id should be between 1 and 96 characters." - raise ValueError(MSG_PREFIX + msg) - if repo_id.count("/") > 1: - msg = ( - "For new repositories, there should be a single forward slash in the repo_id (" - "namespace/repo_name). Legacy repositories are without a namespace. This repo_id has " - "too many forward slashes." - ) - raise ValueError(MSG_PREFIX + msg) - if ".." in repo_id: - msg = "A repo_id may not contain multiple consecutive dots." - raise ValueError(MSG_PREFIX + msg) + validate_repo_id(repo_id=repo_id) + \ No newline at end of file From ddf45eac3b68d78ce56ec721d22eea1d7f9a347d Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 16:34:20 +0200 Subject: [PATCH 43/49] update tests as huggingface validator is updated --- .../resource_routers/test_router_dataset.py | 8 +++++--- .../huggingface/test_dataset_uploader.py | 20 +++++++++---------- .../validators/test_huggingface_validators.py | 19 +++++++++--------- 3 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/tests/routers/resource_routers/test_router_dataset.py b/src/tests/routers/resource_routers/test_router_dataset.py index c92ae95e..f299aea5 100644 --- a/src/tests/routers/resource_routers/test_router_dataset.py +++ b/src/tests/routers/resource_routers/test_router_dataset.py @@ -7,6 +7,7 @@ from authentication import keycloak_openid from database.model.agent.person import Person from database.session import DbSession +from huggingface_hub.utils._validators import HFValidationError def test_happy_path( @@ -59,14 +60,15 @@ def test_post_invalid_huggingface_identifier( ): keycloak_openid.userinfo = mocked_privileged_token - body = {"name": "name", "platform": "huggingface", "platform_resource_identifier": "a"} + body = {"name": "name", "platform": "huggingface", "platform_resource_identifier": ""} response = client.post("/datasets/v1", json=body, headers={"Authorization": "Fake token"}) assert response.status_code == status.HTTP_422_UNPROCESSABLE_ENTITY, response.json() assert ( response.json()["detail"][0]["msg"] - == "The platform_resource_identifier for HuggingFace should be a valid repo_id. A repo_id " - "should be between 1 and 96 characters." + == "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are" + " forbidden, '-' and '.' cannot start or end the name, max length is 96:" + f" '{body['platform_resource_identifier']}'." ) diff --git a/src/tests/uploader/huggingface/test_dataset_uploader.py b/src/tests/uploader/huggingface/test_dataset_uploader.py index fc86f36d..2a7e36fe 100644 --- a/src/tests/uploader/huggingface/test_dataset_uploader.py +++ b/src/tests/uploader/huggingface/test_dataset_uploader.py @@ -196,8 +196,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "The platform_resource_identifier for HuggingFace should be a valid repo_id. " - "A repo_id should only contain [a-zA-Z0-9] or ”-”, ”_”, ”.”" + + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: " + "'user/Test name with ?'." ), ), ), @@ -221,10 +221,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "The platform_resource_identifier for HuggingFace should be a valid repo_id. " - "For new repositories, there should be a single forward slash in the repo_id " - "(namespace/repo_name). Legacy repositories are without a namespace. This " - "repo_id has too many forward slashes." + + "Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'user/data/set'. " + "Use `repo_type` argument if needed." ), ), ), @@ -235,8 +233,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "The namespace (the first part of the platform_resource_identifier) should be" - " equal to the username, but wrong-namespace != user." + + "The namespace (the first part of the platform_resource_identifier) should be " + "equal to the username, but wrong-namespace != user." ), ), ), @@ -247,8 +245,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "The platform_resource_identifier for HuggingFace should be a valid repo_id. " - "A repo_id should be between 1 and 96 characters." + +"Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: " + "'user/" + "a" * 200 + "'." ), ), ), @@ -261,6 +259,6 @@ def test_validate_repo_id(username: str, dataset_name: str, expected_error: HTTP else: with pytest.raises(type(expected_error)) as exception_info: huggingface_uploader._validate_repo_id(dataset_name, username) - + print(exception_info.value.detail) assert exception_info.value.status_code == expected_error.status_code assert exception_info.value.detail == expected_error.detail diff --git a/src/tests/validators/test_huggingface_validators.py b/src/tests/validators/test_huggingface_validators.py index 95b83ebd..ca309f0c 100644 --- a/src/tests/validators/test_huggingface_validators.py +++ b/src/tests/validators/test_huggingface_validators.py @@ -12,24 +12,24 @@ ( "user/data/set", ValueError( - "The platform_resource_identifier for HuggingFace should be a valid repo_id. For " - "new repositories, there should be a single forward slash in the repo_id " - "(namespace/repo_name). Legacy repositories are without a namespace. This repo_id " - "has too many forward slashes." + "Repo id must be in the form 'repo_name' or 'namespace/repo_name': " + "'user/data/set'. Use `repo_type` argument if needed." ), ), ( - "a", + "", ValueError( - "The platform_resource_identifier for HuggingFace should be a valid repo_id. A " - "repo_id should be between 1 and 96 characters." + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' " + "cannot start or end the name, max length is 96: " + "''." ), ), ( "user/" + "a" * 200, ValueError( - "The platform_resource_identifier for HuggingFace should be a valid repo_id. A " - "repo_id should be between 1 and 96 characters." + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' " + "cannot start or end the name, max length is 96: " + "'user/" + "a" * 200 + "'." ), ), ], @@ -40,4 +40,5 @@ def test_identifier(identifier: str, expected_error: ValueError | None): else: with pytest.raises(type(expected_error)) as exception_info: huggingface_validators.throw_error_on_invalid_identifier(identifier) + print(exception_info.value.args[0]) assert exception_info.value.args[0] == expected_error.args[0] From db3dd8a524fe6f1d0477287ce64cdf640261f332 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 16:42:34 +0200 Subject: [PATCH 44/49] formatting --- .../resource_routers/test_router_dataset.py | 5 ++--- .../huggingface/test_dataset_uploader.py | 16 +++++++++------- .../validators/test_huggingface_validators.py | 9 ++++----- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/tests/routers/resource_routers/test_router_dataset.py b/src/tests/routers/resource_routers/test_router_dataset.py index f299aea5..9c82f818 100644 --- a/src/tests/routers/resource_routers/test_router_dataset.py +++ b/src/tests/routers/resource_routers/test_router_dataset.py @@ -7,7 +7,6 @@ from authentication import keycloak_openid from database.model.agent.person import Person from database.session import DbSession -from huggingface_hub.utils._validators import HFValidationError def test_happy_path( @@ -67,8 +66,8 @@ def test_post_invalid_huggingface_identifier( assert ( response.json()["detail"][0]["msg"] == "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are" - " forbidden, '-' and '.' cannot start or end the name, max length is 96:" - f" '{body['platform_resource_identifier']}'." + " forbidden, '-' and '.' cannot start or end the name, max length is 96:" + f" '{body['platform_resource_identifier']}'." ) diff --git a/src/tests/uploader/huggingface/test_dataset_uploader.py b/src/tests/uploader/huggingface/test_dataset_uploader.py index 2a7e36fe..f1a923b2 100644 --- a/src/tests/uploader/huggingface/test_dataset_uploader.py +++ b/src/tests/uploader/huggingface/test_dataset_uploader.py @@ -196,7 +196,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: " + + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are " + "forbidden, '-' and '.' cannot start or end the name, max length is 96: " "'user/Test name with ?'." ), ), @@ -221,8 +222,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "Repo id must be in the form 'repo_name' or 'namespace/repo_name': 'user/data/set'. " - "Use `repo_type` argument if needed." + + "Repo id must be in the form 'repo_name' or 'namespace/repo_name': " + "'user/data/set'. Use `repo_type` argument if needed." ), ), ), @@ -233,8 +234,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - + "The namespace (the first part of the platform_resource_identifier) should be " - "equal to the username, but wrong-namespace != user." + + "The namespace (the first part of the platform_resource_identifier) " + "should be equal to the username, but wrong-namespace != user." ), ), ), @@ -245,7 +246,8 @@ def test_wrong_platform(client: TestClient, mocked_privileged_token: Mock, datas status_code=status.HTTP_400_BAD_REQUEST, detail=( ERROR_MSG_PREFIX - +"Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' cannot start or end the name, max length is 96: " + + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are " + "forbidden, '-' and '.' cannot start or end the name, max length is 96: " "'user/" + "a" * 200 + "'." ), ), @@ -259,6 +261,6 @@ def test_validate_repo_id(username: str, dataset_name: str, expected_error: HTTP else: with pytest.raises(type(expected_error)) as exception_info: huggingface_uploader._validate_repo_id(dataset_name, username) - print(exception_info.value.detail) + assert exception_info.value.status_code == expected_error.status_code assert exception_info.value.detail == expected_error.detail diff --git a/src/tests/validators/test_huggingface_validators.py b/src/tests/validators/test_huggingface_validators.py index ca309f0c..a6b60dba 100644 --- a/src/tests/validators/test_huggingface_validators.py +++ b/src/tests/validators/test_huggingface_validators.py @@ -19,16 +19,16 @@ ( "", ValueError( - "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' " - "cannot start or end the name, max length is 96: " + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are " + "forbidden, '-' and '.' cannot start or end the name, max length is 96: " "''." ), ), ( "user/" + "a" * 200, ValueError( - "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are forbidden, '-' and '.' " - "cannot start or end the name, max length is 96: " + "Repo id must use alphanumeric chars or '-', '_', '.', '--' and '..' are " + "forbidden, '-' and '.' cannot start or end the name, max length is 96: " "'user/" + "a" * 200 + "'." ), ), @@ -40,5 +40,4 @@ def test_identifier(identifier: str, expected_error: ValueError | None): else: with pytest.raises(type(expected_error)) as exception_info: huggingface_validators.throw_error_on_invalid_identifier(identifier) - print(exception_info.value.args[0]) assert exception_info.value.args[0] == expected_error.args[0] From 116ccd25b2c8bc0cc077ea7e8b14faf5f4871144 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 16:46:46 +0200 Subject: [PATCH 45/49] code cleanup --- src/database/validators/huggingface_validators.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/database/validators/huggingface_validators.py b/src/database/validators/huggingface_validators.py index d37fb094..ed806a1e 100644 --- a/src/database/validators/huggingface_validators.py +++ b/src/database/validators/huggingface_validators.py @@ -1,10 +1,6 @@ import re from huggingface_hub.utils import validate_repo_id -REPO_ID_ILLEGAL_CHARACTERS = re.compile(r"[^0-9a-zA-Z-_./]+") -MSG_PREFIX = "The platform_resource_identifier for HuggingFace should be a valid repo_id. " - - def throw_error_on_invalid_identifier(platform_resource_identifier: str): """ Throw a ValueError on an invalid repository identifier. @@ -20,4 +16,3 @@ def throw_error_on_invalid_identifier(platform_resource_identifier: str): """ repo_id = platform_resource_identifier validate_repo_id(repo_id=repo_id) - \ No newline at end of file From 4e6adc82528dd54197a43a4e53983e69ae636b55 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 16:48:41 +0200 Subject: [PATCH 46/49] code cleanup --- src/database/validators/huggingface_validators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/database/validators/huggingface_validators.py b/src/database/validators/huggingface_validators.py index ed806a1e..4ab2ff0b 100644 --- a/src/database/validators/huggingface_validators.py +++ b/src/database/validators/huggingface_validators.py @@ -1,4 +1,3 @@ -import re from huggingface_hub.utils import validate_repo_id def throw_error_on_invalid_identifier(platform_resource_identifier: str): From 9089195d908a562ebdcb62df9289df7a720235d2 Mon Sep 17 00:00:00 2001 From: taniya-das Date: Tue, 18 Jun 2024 16:55:18 +0200 Subject: [PATCH 47/49] formatting --- src/database/validators/huggingface_validators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/database/validators/huggingface_validators.py b/src/database/validators/huggingface_validators.py index 4ab2ff0b..c7696d18 100644 --- a/src/database/validators/huggingface_validators.py +++ b/src/database/validators/huggingface_validators.py @@ -1,5 +1,6 @@ from huggingface_hub.utils import validate_repo_id + def throw_error_on_invalid_identifier(platform_resource_identifier: str): """ Throw a ValueError on an invalid repository identifier. From 5a6526db18decf54355e26f50ff104d93c173b31 Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Wed, 19 Jun 2024 09:57:59 +0100 Subject: [PATCH 48/49] update docstring --- src/database/validators/huggingface_validators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/database/validators/huggingface_validators.py b/src/database/validators/huggingface_validators.py index c7696d18..556c92ee 100644 --- a/src/database/validators/huggingface_validators.py +++ b/src/database/validators/huggingface_validators.py @@ -8,8 +8,8 @@ def throw_error_on_invalid_identifier(platform_resource_identifier: str): Valid repo_ids: Between 1 and 96 characters. Either “repo_name” or “namespace/repo_name” - [a-zA-Z0-9] or ”-”, ”_”, ”.” - ”—” and ”..” are forbidden + [a-zA-Z0-9] or ”-”, ”_”, ”.”. + The following sequences ”--” and ”..” are forbidden. Refer to: https://huggingface.co/docs/huggingface_hub/package_reference/utilities#huggingface_hub.utils.validate_repo_id From 8cd19b02bd521105b6285c6149b4949348337f1a Mon Sep 17 00:00:00 2001 From: Jean Matias Date: Thu, 20 Jun 2024 08:32:06 +0100 Subject: [PATCH 49/49] Change version tag - 1.3.20240619 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index abce2405..d1ca2ab8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "aiod_metadata_catalogue" description = "A Metadata Catalogue for AI on Demand " -version = "1.3.20240308" +version = "1.3.20240619" requires-python = ">=3.11" authors = [ { name = "Adrián Alcolea" },