From e63a74f1979185fb224bfb00804a411291da6de8 Mon Sep 17 00:00:00 2001 From: erosselli <67162025+erosselli@users.noreply.github.com> Date: Tue, 16 Jul 2024 09:39:49 -0300 Subject: [PATCH] PROD-1898 Provide API support for (optional) pagination of the `/system` endpoint and /datasets (#5071) --- CHANGELOG.md | 1 + .../api/api/v1/endpoints/generic_overrides.py | 64 ++++ src/fides/api/api/v1/endpoints/system.py | 53 ++- src/fides/api/app_setup.py | 32 ++ src/fides/api/schemas/filter_params.py | 14 + src/fides/api/util/filter_utils.py | 123 +++++++ src/fides/core/api.py | 25 +- tests/conftest.py | 54 ++- tests/ctl/core/test_api.py | 307 ++++++++++++++++++ tests/fixtures/application_fixtures.py | 10 +- .../v1/endpoints/test_dataset_endpoints.py | 145 ++++++++- tests/ops/util/test_filter_utils.py | 51 +++ 12 files changed, 859 insertions(+), 20 deletions(-) create mode 100644 src/fides/api/api/v1/endpoints/generic_overrides.py create mode 100644 src/fides/api/schemas/filter_params.py create mode 100644 src/fides/api/util/filter_utils.py create mode 100644 tests/ops/util/test_filter_utils.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 319a816cb96..1fada4a4446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ The types of changes are: ### Changed - Updated the sample dataset for the Amplitude integration [#5063](https://github.com/ethyca/fides/pull/5063) - Messaging page now shows a notice if you have properties without any templates [#5077](https://github.com/ethyca/fides/pull/5077) +- Endpoints for listing systems (GET /system) and datasets (GET /dataset) now support optional pagination [#5071](https://github.com/ethyca/fides/pull/5071) ### Developer Experience - Upgrade to React 18 and Chakra 2, including other dependencies [#5036](https://github.com/ethyca/fides/pull/5036) diff --git a/src/fides/api/api/v1/endpoints/generic_overrides.py b/src/fides/api/api/v1/endpoints/generic_overrides.py new file mode 100644 index 00000000000..b7701997a6e --- /dev/null +++ b/src/fides/api/api/v1/endpoints/generic_overrides.py @@ -0,0 +1,64 @@ +from typing import List, Optional, Union + +from fastapi import APIRouter, Depends, Query, Security +from fastapi_pagination import Page, Params +from fastapi_pagination.ext.async_sqlalchemy import paginate as async_paginate +from fideslang.models import Dataset +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.sql.expression import select + +from fides.api.db.crud import list_resource +from fides.api.db.ctl_session import get_async_db +from fides.api.oauth.utils import verify_oauth_client +from fides.api.schemas.filter_params import FilterParams +from fides.api.util.filter_utils import apply_filters_to_query +from fides.common.api.scope_registry import DATASET_READ +from fides.common.api.v1.urn_registry import V1_URL_PREFIX + +from fides.api.models.sql_models import ( # type: ignore[attr-defined] # isort: skip + Dataset as CtlDataset, +) + +# We create routers to override specific methods in those defined in generic.py +# when we need more custom implementations for only some of the methods in a router. + +dataset_router = APIRouter(tags=["Dataset"], prefix=V1_URL_PREFIX) + + +@dataset_router.get( + "/dataset", + dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])], + response_model=Union[Page[Dataset], List[Dataset]], + name="List datasets (optionally paginated)", +) +async def list_dataset_paginated( + db: AsyncSession = Depends(get_async_db), + size: Optional[int] = Query(None, ge=1, le=100), + page: Optional[int] = Query(None, ge=1), + search: Optional[str] = Query(None), + data_categories: Optional[List[str]] = Query(None), +) -> Union[Page[Dataset], List[Dataset]]: + """ + Get a list of all of the Datasets. + If any pagination parameters (size or page) are provided, then the response will be paginated + & provided filters (search, data_categories) will be applied. + Otherwise all Datasets will be returned (this may be a slow operation if there are many datasets, + so using the pagination parameters is recommended). + """ + if page or size: + query = select(CtlDataset) + filter_params = FilterParams(search=search, data_categories=data_categories) + filtered_query = apply_filters_to_query( + query=query, + search_model=CtlDataset, + taxonomy_model=CtlDataset, + filter_params=filter_params, + ) + pagination_params = Params(page=page or 1, size=size or 50) + return await async_paginate(db, filtered_query, pagination_params) + + return await list_resource(CtlDataset, db) + + +GENERIC_OVERRIDES_ROUTER = APIRouter() +GENERIC_OVERRIDES_ROUTER.include_router(dataset_router) diff --git a/src/fides/api/api/v1/endpoints/system.py b/src/fides/api/api/v1/endpoints/system.py index 982b4b21641..df219473484 100644 --- a/src/fides/api/api/v1/endpoints/system.py +++ b/src/fides/api/api/v1/endpoints/system.py @@ -1,14 +1,16 @@ -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union -from fastapi import Depends, HTTPException, Response, Security +from fastapi import Depends, HTTPException, Query, Response, Security from fastapi_pagination import Page, Params from fastapi_pagination.bases import AbstractPage +from fastapi_pagination.ext.async_sqlalchemy import paginate as async_paginate from fastapi_pagination.ext.sqlalchemy import paginate from fideslang.models import System as SystemSchema from fideslang.validation import FidesKey from loguru import logger from pydantic.types import conlist from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy.future import select from sqlalchemy.orm import Session from starlette import status from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND @@ -30,7 +32,10 @@ ) from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType from fides.api.models.fides_user import FidesUser -from fides.api.models.sql_models import System # type:ignore[attr-defined] +from fides.api.models.sql_models import ( # type:ignore[attr-defined] + PrivacyDeclaration, + System, +) from fides.api.oauth.system_manager_oauth_util import ( verify_oauth_client_for_system_from_fides_key, verify_oauth_client_for_system_from_request_body_cli, @@ -49,6 +54,7 @@ from fides.api.schemas.connection_configuration.saas_config_template_values import ( SaasConnectionTemplateValues, ) +from fides.api.schemas.filter_params import FilterParams from fides.api.schemas.system import BasicSystemResponse, SystemResponse from fides.api.util.api_router import APIRouter from fides.api.util.connection_util import ( @@ -58,6 +64,7 @@ patch_connection_configs, validate_secrets, ) +from fides.api.util.filter_utils import apply_filters_to_query from fides.common.api.scope_registry import ( CONNECTION_CREATE_OR_UPDATE, CONNECTION_DELETE, @@ -366,13 +373,47 @@ async def create( scopes=[SYSTEM_READ], ) ], - response_model=List[BasicSystemResponse], - name="List", + response_model=Union[List[BasicSystemResponse], Page[BasicSystemResponse]], + name="List systems (optionally paginated)", ) async def ls( # pylint: disable=invalid-name db: AsyncSession = Depends(get_async_db), + size: Optional[int] = Query(None, ge=1, le=100), + page: Optional[int] = Query(None, ge=1), + search: Optional[str] = None, + data_uses: Optional[List[FidesKey]] = Query(None), + data_categories: Optional[List[FidesKey]] = Query(None), + data_subjects: Optional[List[FidesKey]] = Query(None), ) -> List: - """Get a list of all of the resources of this type.""" + """Get a list of all of the Systems. + If any pagination parameters (size or page) are provided, then the response will be paginated + & provided filters (search, taxonomy fields) will be applied. + Otherwise all Systems will be returned (this may be a slow operation if there are many systems, + so using the pagination parameters is recommended). + """ + if size or page: + pagination_params = Params(page=page or 1, size=size or 50) + # Need to join with PrivacyDeclaration in order to be able to filter + # by data use, data category, and data subject + query = select(System).outerjoin( + PrivacyDeclaration, System.id == PrivacyDeclaration.system_id + ) + filter_params = FilterParams( + search=search, + data_uses=data_uses, + data_categories=data_categories, + data_subjects=data_subjects, + ) + filtered_query = apply_filters_to_query( + query=query, + filter_params=filter_params, + search_model=System, + taxonomy_model=PrivacyDeclaration, + ) + # Add a distinct so we only get one row per system + duplicates_removed = filtered_query.distinct(System.id) + return await async_paginate(db, duplicates_removed, pagination_params) + return await list_resource(System, db) diff --git a/src/fides/api/app_setup.py b/src/fides/api/app_setup.py index 0e6cf0d9fc6..ea80c5c2361 100644 --- a/src/fides/api/app_setup.py +++ b/src/fides/api/app_setup.py @@ -7,6 +7,7 @@ from typing import List from fastapi import APIRouter, FastAPI +from fastapi.routing import APIRoute from loguru import logger from redis.exceptions import RedisError, ResponseError from slowapi.errors import RateLimitExceeded # type: ignore @@ -18,6 +19,7 @@ from fides.api.api.v1 import CTL_ROUTER from fides.api.api.v1.api import api_router from fides.api.api.v1.endpoints.admin import ADMIN_ROUTER +from fides.api.api.v1.endpoints.generic_overrides import GENERIC_OVERRIDES_ROUTER from fides.api.api.v1.endpoints.health import HEALTH_ROUTER from fides.api.api.v1.exception_handlers import ExceptionHandlers from fides.api.common_exceptions import FunctionalityNotConfigured, RedisConnectionError @@ -57,6 +59,7 @@ ROUTERS = [CTL_ROUTER, api_router, DB_ROUTER] +OVERRIDING_ROUTERS = [GENERIC_OVERRIDES_ROUTER] def create_fides_app( @@ -80,6 +83,8 @@ def create_fides_app( for router in routers: fastapi_app.include_router(router) + override_generic_routers(OVERRIDING_ROUTERS, fastapi_app) + if security_env == "dev": # This removes auth requirements for specific endpoints fastapi_app.dependency_overrides[verify_oauth_client_prod] = get_root_client @@ -96,6 +101,33 @@ def create_fides_app( return fastapi_app +def override_generic_routers( + overriding_routers: List[APIRouter], base_router: FastAPI +) -> None: + """ + Remove generic routes in favor of their more specific implementations, if available. + """ + for i, existing_route in reversed(list(enumerate(base_router.routes))): + if not isinstance(existing_route, APIRoute): + continue + for new_router in overriding_routers: + for new_route in new_router.routes: + if not isinstance(new_route, APIRoute): # pragma: no cover + continue + if ( + existing_route.methods == new_route.methods + and existing_route.path == new_route.path + ): + logger.debug( + "Removing generic route: {} {}", + existing_route.methods, + existing_route.path, + ) + del base_router.routes[i] + for router in overriding_routers: + base_router.include_router(router) + + def log_startup() -> None: """Log application startup and other information.""" logger.info(f"Starting Fides - v{VERSION}") diff --git a/src/fides/api/schemas/filter_params.py b/src/fides/api/schemas/filter_params.py new file mode 100644 index 00000000000..ba1bd80940c --- /dev/null +++ b/src/fides/api/schemas/filter_params.py @@ -0,0 +1,14 @@ +from typing import List, Optional + +from pydantic import BaseModel + + +class FilterParams(BaseModel): + """ + Generic parameters for filtering queries. + """ + + search: Optional[str] = None + data_uses: Optional[List[str]] = None + data_categories: Optional[List[str]] = None + data_subjects: Optional[List[str]] = None diff --git a/src/fides/api/util/filter_utils.py b/src/fides/api/util/filter_utils.py new file mode 100644 index 00000000000..2da83ae1a49 --- /dev/null +++ b/src/fides/api/util/filter_utils.py @@ -0,0 +1,123 @@ +from typing import List, Optional, Type + +from sqlalchemy import and_, func, or_ +from sqlalchemy.sql.elements import BooleanClauseList +from sqlalchemy.sql.selectable import Select + +from fides.api.models.sql_models import FidesBase # type: ignore[attr-defined] +from fides.api.schemas.filter_params import FilterParams + + +class MissingTaxonomyField(ValueError): + pass + + +# FIXME: this code is basically the same as the one in filter_datamap_query +# in the fidesplus repo, but slightly more generic. Ideally we want to replace that with using this +# so we don't duplicate this logic in two different places +def apply_filters_to_query( + query: Select, + filter_params: FilterParams, + search_model: Type[FidesBase], # Model to search on + taxonomy_model: Optional[ + Type[FidesBase] + ], # Model that has the taxonomy fields to filter on +) -> Select: + """ + Function to filter a given query by given filter params. + The search term is used as a filter on the search_model name and fides_key, as well as its id. + Taxonomy filters are applied to the taxonomy_model if provided. + The search_model and taxonomy_model may be the same model, e.g if the lookup is on one table, + or may be different, e.g if the query is performing a join between two tables. + Returns the filtered query. + """ + + # Perform a text search on the search_model's name, fides_key and id + if filter_params.search: + query = query.where( + and_( + or_( + func.lower(search_model.name).like( + f"%{filter_params.search.lower()}%" + ), + search_model.fides_key == filter_params.search, + search_model.id == filter_params.search, + ) + ) + ) + + if not taxonomy_model: + return query + + # We match the name of the field in FilterParams to the name of the field in the taxonomy_model, + # which can be represented by either a single element field or a collection field + taxonomy_field_information = { + "data_categories": { + "single": "data_category", + "collection": "data_categories", + }, + "data_subjects": { + "single": "data_subject", + "collection": "data_subjects", + }, + "data_uses": { + "single": "data_use", + "collection": "data_uses", + }, + } + + # Filter the fields so we only use the ones that have been provided in the filter params + available_fields_info = { + field: field_info + for field, field_info in taxonomy_field_information.items() + if getattr(filter_params, field) + } + + taxonomy_filter_conditions: List[BooleanClauseList] = [] + + for field, field_info in available_fields_info.items(): + single_field_name = field_info["single"] + collection_field_name = field_info["collection"] + + # If the taxonomy_model doesn't have either a single or collection field matching this field + # we raise an error since it makes no sense to pass in the field as part of the filter params + if not hasattr(taxonomy_model, single_field_name) and not hasattr( + taxonomy_model, collection_field_name + ): + raise MissingTaxonomyField( + f"Model {taxonomy_model.__name__} does not have a {single_field_name} or {collection_field_name} field, but filter_params.{field} is not empty" + ) + + single_field_conditions = [] + collection_field_conditions = [] + + # For single fields, we match each element provided in the filter params field + # against the field in the taxonomy model using like, since model field is a single element + # e.g a single data category represented as a string + if hasattr(taxonomy_model, single_field_name): + single_field_conditions = [ + getattr(taxonomy_model, single_field_name).like(element + "%") + for element in getattr(filter_params, field) + ] + + # For collection fields, we match each element provided in the filter params field + # against the field in the taxonomy model using contains, since model field is + # a collection of elements, e.g a list of data categories + if hasattr(taxonomy_model, collection_field_name): + collection_field_conditions = [ + getattr(taxonomy_model, collection_field_name).contains([element]) + for element in getattr(filter_params, field) + ] + + # We join all conditions with an OR, so that we retrieve rows that match + # either in their single or collection fields + all_field_conditions = or_( + *single_field_conditions, *collection_field_conditions + ) + taxonomy_filter_conditions.append(all_field_conditions) + + # Finally, we filter the query for taxonomy_model instances that match all the conditions + if taxonomy_filter_conditions: + query = query.where(and_(*taxonomy_filter_conditions)) + + return query diff --git a/src/fides/core/api.py b/src/fides/core/api.py index dd69811e7b6..9ca53e2a452 100644 --- a/src/fides/core/api.py +++ b/src/fides/core/api.py @@ -1,6 +1,6 @@ """A wrapper to make calling the API consistent across fides.""" -from typing import Dict, List +from typing import Dict, List, Optional, Union import requests @@ -11,12 +11,26 @@ def generate_resource_url( url: str, resource_type: str = "", resource_id: str = "", + query_params: Optional[Union[Dict[str, str], Dict[str, List[str]]]] = None, ) -> str: """ Generate a resource's URL using a base url, the resource type, and [optionally] the resource's ID. """ - return f"{url}{API_PREFIX}/{resource_type}/{resource_id}" + base_url = f"{url}{API_PREFIX}/{resource_type}/{resource_id}" + + if not query_params: + return base_url + + processed_query_params = [] + for key, value in query_params.items(): + if isinstance(value, list): + processed_query_params.extend([f"{key}={v}" for v in value]) + else: + processed_query_params.append(f"{key}={value}") + + query_string = "&".join(processed_query_params) + return f"{url}{API_PREFIX}/{resource_type}/{resource_id}?{query_string}" def get( @@ -57,12 +71,15 @@ def delete( def ls( # pylint: disable=invalid-name - url: str, resource_type: str, headers: Dict[str, str] + url: str, + resource_type: str, + headers: Dict[str, str], + query_params: Optional[Dict[str, str]] = None, ) -> requests.Response: """ Get a list of all of the resources of a certain type. """ - resource_url = generate_resource_url(url, resource_type) + resource_url = generate_resource_url(url, resource_type, query_params=query_params) return requests.get(resource_url, headers=headers) diff --git a/tests/conftest.py b/tests/conftest.py index 9b8d3066d52..287504a3362 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1247,6 +1247,49 @@ def system(db: Session) -> System: return system +@pytest.fixture(scope="function") +def system_with_cleanup(db: Session) -> Generator[System, None, None]: + system = System.create( + db=db, + data={ + "fides_key": f"system_key-f{uuid4()}", + "name": f"system-{uuid4()}", + "description": "fixture-made-system", + "organization_fides_key": "default_organization", + "system_type": "Service", + }, + ) + + privacy_declaration = PrivacyDeclaration.create( + db=db, + data={ + "name": "Collect data for marketing", + "system_id": system.id, + "data_categories": ["user.device.cookie_id"], + "data_use": "marketing.advertising", + "data_subjects": ["customer"], + "dataset_references": None, + "egress": None, + "ingress": None, + }, + ) + + Cookies.create( + db=db, + data={ + "name": "test_cookie", + "path": "/", + "privacy_declaration_id": privacy_declaration.id, + "system_id": system.id, + }, + check_name=False, + ) + + db.refresh(system) + yield system + db.delete(system) + + @pytest.fixture(scope="function") def system_with_dataset_references(db: Session) -> System: ctl_dataset = CtlDataset.create_from_dataset_dict( @@ -1351,7 +1394,7 @@ def privacy_declaration_with_dataset_references(db: Session) -> System: @pytest.fixture(scope="function") -def system_multiple_decs(db: Session, system: System) -> System: +def system_multiple_decs(db: Session, system: System) -> Generator[System, None, None]: """ Add an additional PrivacyDeclaration onto the base System to test scenarios with multiple PrivacyDeclarations on a given system @@ -1371,15 +1414,15 @@ def system_multiple_decs(db: Session, system: System) -> System: ) db.refresh(system) - return system + yield system @pytest.fixture(scope="function") -def system_third_party_sharing(db: Session) -> System: +def system_third_party_sharing(db: Session) -> Generator[System, None, None]: system_third_party_sharing = System.create( db=db, data={ - "fides_key": f"system_key-f{uuid4()}", + "fides_key": f"system_third_party_sharing-f{uuid4()}", "name": f"system-{uuid4()}", "description": "fixture-made-system", "organization_fides_key": "default_organization", @@ -1401,7 +1444,8 @@ def system_third_party_sharing(db: Session) -> System: }, ) db.refresh(system_third_party_sharing) - return system_third_party_sharing + yield system_third_party_sharing + db.delete(system_third_party_sharing) @pytest.fixture(scope="function") diff --git a/tests/ctl/core/test_api.py b/tests/ctl/core/test_api.py index bf90581940e..17e5b3a9623 100644 --- a/tests/ctl/core/test_api.py +++ b/tests/ctl/core/test_api.py @@ -13,6 +13,7 @@ from fideslang.models import PrivacyDeclaration as PrivacyDeclarationSchema from fideslang.models import System as SystemSchema from pytest import MonkeyPatch +from sqlalchemy import text from sqlalchemy.orm import Session from starlette.status import ( HTTP_200_OK, @@ -1262,6 +1263,312 @@ def test_data_stewards_included_in_response( assert "last_name" in steward +@pytest.mark.unit +class TestSystemList: + @pytest.fixture(scope="function", autouse=True) + def remove_all_systems(self, db) -> None: + """Remove any systems (and privacy declarations) before test execution for clean state""" + for privacy_declaration in PrivacyDeclaration.all(db): + privacy_declaration.delete(db) + for system in System.all(db): + system.delete(db) + + def test_list_no_pagination(self, test_config, system_with_cleanup): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + ) + + assert result.status_code == 200 + result_json = result.json() + + assert len(result_json) == 1 + assert result_json[0]["fides_key"] == system_with_cleanup.fides_key + + def test_list_with_pagination( + self, + test_config, + system_with_cleanup, + tcf_system, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + }, + ) + + assert result.status_code == 200 + result_json = result.json() + + assert result_json["page"] == 1 + assert result_json["size"] == 5 + assert result_json["total"] == 2 + assert len(result_json["items"]) == 2 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_with_cleanup.fides_key + assert sorted_items[1]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_default_page( + self, + test_config, + system_with_cleanup, + tcf_system, + ): + # We don't pass in the page but we pass in the size, + # so we should get a paginated response with the default page number (1) + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "size": 5, + }, + ) + + assert result.status_code == 200 + result_json = result.json() + + assert result_json["page"] == 1 + assert result_json["size"] == 5 + assert result_json["total"] == 2 + assert len(result_json["items"]) == 2 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_with_cleanup.fides_key + assert sorted_items[1]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_default_size( + self, + test_config, + system_with_cleanup, + tcf_system, + ): + # We don't pass in the size but we pass in the page, + # so we should get a paginated response with the default size (50) + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + }, + ) + + assert result.status_code == 200 + result_json = result.json() + + assert result_json["page"] == 1 + assert result_json["size"] == 50 + assert result_json["total"] == 2 + assert len(result_json["items"]) == 2 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_with_cleanup.fides_key + assert sorted_items[1]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_and_search( + self, + test_config, + system_with_cleanup, + tcf_system, + system_third_party_sharing, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={"page": 1, "size": 5, "search": "tcf"}, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 1 + assert len(result_json["items"]) == 1 + assert result_json["items"][0]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_and_data_uses_filter( + self, + test_config, + system_multiple_decs, + tcf_system, + system_third_party_sharing, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + "data_uses": ["third_party_sharing"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 2 + assert len(result_json["items"]) == 2 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + + assert sorted_items[0]["fides_key"] == system_multiple_decs.fides_key + assert sorted_items[1]["fides_key"] == system_third_party_sharing.fides_key + + def test_list_with_pagination_and_multiple_data_uses_filter( + self, + test_config, + system_multiple_decs, + tcf_system, + system_third_party_sharing, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + "data_uses": ["third_party_sharing", "essential.fraud_detection"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 3 + assert len(result_json["items"]) == 3 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_multiple_decs.fides_key + assert sorted_items[1]["fides_key"] == system_third_party_sharing.fides_key + assert sorted_items[2]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_and_data_categories_filter( + self, + test_config, + system_with_cleanup, + tcf_system, + system_third_party_sharing, + system_with_no_uses, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + "data_categories": ["user.device.cookie_id"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 3 + assert len(result_json["items"]) == 3 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_with_cleanup.fides_key + assert sorted_items[1]["fides_key"] == system_third_party_sharing.fides_key + assert sorted_items[2]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_and_data_subjects_filter( + self, + test_config, + system_with_cleanup, + tcf_system, + system_third_party_sharing, + system_with_no_uses, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + "data_subjects": ["customer"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 3 + assert len(result_json["items"]) == 3 + + sorted_items = sorted(result_json["items"], key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == system_with_cleanup.fides_key + assert sorted_items[1]["fides_key"] == system_third_party_sharing.fides_key + assert sorted_items[2]["fides_key"] == tcf_system.fides_key + + def test_list_with_pagination_and_multiple_filters( + self, + test_config, + system_with_cleanup, + tcf_system, + system_third_party_sharing, + system_with_no_uses, + ): + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + # TCF System has different privacy declarations, that together have all these fields + "data_uses": ["essential.fraud_detection"], + "data_subjects": ["customer"], + "data_categories": ["user"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 1 + assert len(result_json["items"]) == 1 + + assert result_json["items"][0]["fides_key"] == tcf_system.fides_key + + @pytest.mark.skip("Until we re-visit filter implementation") + def test_list_with_pagination_and_multiple_filters_2( + self, + test_config, + system_with_cleanup, + tcf_system, + system_third_party_sharing, + system_with_no_uses, + db, + ): + + db.que + result = _api.ls( + url=test_config.cli.server_url, + headers=test_config.user.auth_header, + resource_type="system", + query_params={ + "page": 1, + "size": 5, + # TCF system has a single privacy declaration with all these fields + "data_uses": ["essential.fraud_detection"], + "data_subjects": ["customer"], + "data_categories": ["user.device.cookie_id"], + }, + ) + + assert result.status_code == 200 + result_json = result.json() + assert result_json["total"] == 1 + assert len(result_json["items"]) == 1 + + assert result_json["items"][0]["fides_key"] == tcf_system.fides_key + + @pytest.mark.unit class TestSystemUpdate: updated_system_name = "Updated System Name" diff --git a/tests/fixtures/application_fixtures.py b/tests/fixtures/application_fixtures.py index 5a680db279c..00d2aea76fe 100644 --- a/tests/fixtures/application_fixtures.py +++ b/tests/fixtures/application_fixtures.py @@ -3090,7 +3090,7 @@ def allow_custom_privacy_request_fields_in_request_execution_disabled(): @pytest.fixture(scope="function") -def system_with_no_uses(db: Session) -> System: +def system_with_no_uses(db: Session) -> Generator[System, None, None]: system = System.create( db=db, data={ @@ -3101,11 +3101,12 @@ def system_with_no_uses(db: Session) -> System: "system_type": "Service", }, ) - return system + yield system + db.delete(system) @pytest.fixture(scope="function") -def tcf_system(db: Session) -> System: +def tcf_system(db: Session) -> Generator[System, None, None]: system = System.create( db=db, data={ @@ -3151,7 +3152,8 @@ def tcf_system(db: Session) -> System: ) db.refresh(system) - return system + yield system + db.delete(system) @pytest.fixture(scope="function") diff --git a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py index 9c0ffa298f1..3cc2fd1775d 100644 --- a/tests/ops/api/v1/endpoints/test_dataset_endpoints.py +++ b/tests/ops/api/v1/endpoints/test_dataset_endpoints.py @@ -1,8 +1,9 @@ import json import uuid -from typing import Dict, List, Optional +from typing import Dict, Generator, List, Optional from unittest import mock from unittest.mock import Mock +from uuid import uuid4 import pydash import pytest @@ -16,6 +17,7 @@ from fides.api.models.connectionconfig import ConnectionConfig from fides.api.models.datasetconfig import DatasetConfig +from fides.api.models.sql_models import Dataset as CtlDataset from fides.common.api.scope_registry import ( CTL_DATASET_READ, DATASET_CREATE_OR_UPDATE, @@ -1967,3 +1969,144 @@ def test_delete_dataset( ) is None ) + + +class TestListDataset: + @pytest.fixture + def dataset_with_categories(self, db: Session) -> Generator[CtlDataset, None, None]: + dataset = CtlDataset.create_from_dataset_dict( + db, + { + "fides_key": f"dataset_key-f{uuid4()}", + "data_categories": ["user.contact.email", "user.contact.phone_number"], + "collections": [ + { + "name": "customer", + "fields": [ + { + "name": "email", + "data_categories": ["user.contact.email"], + }, + {"name": "first_name"}, + ], + } + ], + }, + ) + yield dataset + db.delete(dataset) + + def test_list_dataset_no_pagination( + self, + api_client: TestClient, + generate_auth_header, + ctl_dataset, + saas_ctl_dataset, + ): + auth_header = generate_auth_header(scopes=[DATASET_READ]) + response = api_client.get(f"{V1_URL_PREFIX}/dataset", headers=auth_header) + + assert response.status_code == 200 + + response_json = response.json() + assert len(response_json) == 2 + + sorted_items = sorted(response_json, key=lambda x: x["fides_key"]) + assert sorted_items[0]["fides_key"] == ctl_dataset.fides_key + assert sorted_items[1]["fides_key"] == saas_ctl_dataset.fides_key + + def test_list_dataset_with_pagination( + self, + api_client: TestClient, + generate_auth_header, + ctl_dataset, + saas_ctl_dataset, + ): + auth_header = generate_auth_header(scopes=[DATASET_READ]) + response = api_client.get( + f"{V1_URL_PREFIX}/dataset?page=1&size=5", headers=auth_header + ) + + assert response.status_code == 200 + response_json = response.json() + + assert response_json["total"] == 2 + sorted_items = sorted(response_json["items"], key=lambda x: x["fides_key"]) + + assert len(sorted_items) == 2 + assert sorted_items[0]["fides_key"] == ctl_dataset.fides_key + assert sorted_items[1]["fides_key"] == saas_ctl_dataset.fides_key + + def test_list_dataset_with_pagination_default_page( + self, + api_client: TestClient, + generate_auth_header, + ctl_dataset, + saas_ctl_dataset, + ): + auth_header = generate_auth_header(scopes=[DATASET_READ]) + # We don't pass in the page but we pass in the size, + # so we should get a paginated response with the default page number (1) + response = api_client.get( + f"{V1_URL_PREFIX}/dataset?size=5", headers=auth_header + ) + + assert response.status_code == 200 + response_json = response.json() + + assert response_json["total"] == 2 + assert response_json["page"] == 1 + + sorted_items = sorted(response_json["items"], key=lambda x: x["fides_key"]) + assert len(sorted_items) == 2 + assert sorted_items[0]["fides_key"] == ctl_dataset.fides_key + assert sorted_items[1]["fides_key"] == saas_ctl_dataset.fides_key + + def test_list_dataset_with_pagination_default_size( + self, + api_client: TestClient, + generate_auth_header, + ctl_dataset, + saas_ctl_dataset, + ): + auth_header = generate_auth_header(scopes=[DATASET_READ]) + # We don't pass in the size but we pass in the page, + # so we should get a paginated response with the default size (50) + response = api_client.get( + f"{V1_URL_PREFIX}/dataset?page=1", headers=auth_header + ) + + assert response.status_code == 200 + response_json = response.json() + + assert response_json["total"] == 2 + assert response_json["page"] == 1 + assert response_json["size"] == 50 + + sorted_items = sorted(response_json["items"], key=lambda x: x["fides_key"]) + assert len(sorted_items) == 2 + assert sorted_items[0]["fides_key"] == ctl_dataset.fides_key + assert sorted_items[1]["fides_key"] == saas_ctl_dataset.fides_key + + def test_list_dataset_with_pagination_and_filters( + self, + api_client: TestClient, + generate_auth_header, + ctl_dataset, + saas_ctl_dataset, + dataset_with_categories, + ): + auth_header = generate_auth_header(scopes=[DATASET_READ]) + response = api_client.get( + f"{V1_URL_PREFIX}/dataset?page=1&size=1&data_categories=user.contact.email", + headers=auth_header, + ) + + assert response.status_code == 200 + response_json = response.json() + + assert response_json["total"] == 1 + assert len(response_json["items"]) == 1 + assert ( + response_json["items"][0]["fides_key"] == dataset_with_categories.fides_key + ) diff --git a/tests/ops/util/test_filter_utils.py b/tests/ops/util/test_filter_utils.py new file mode 100644 index 00000000000..3880df756f8 --- /dev/null +++ b/tests/ops/util/test_filter_utils.py @@ -0,0 +1,51 @@ +import pytest +from sqlalchemy.orm import Session + +from fides.api.models.sql_models import System +from fides.api.schemas.filter_params import FilterParams +from fides.api.util.filter_utils import MissingTaxonomyField, apply_filters_to_query + + +class TestApplyFiltersToQuery: + """ + Most of the logic in apply_filters_to_query is already tested through + the tests of the endpoints that use it (e.g list systems, list datasets). + These tests focus on the edge cases that are not covered by the other test cases. + """ + + def test_does_not_apply_filters_if_no_taxonomy_model( + self, db: Session, system_with_cleanup, tcf_system + ): + query = db.query(System) + filter_params = FilterParams(data_categories=["user.device.cookie_id"]) + + filtered_query = apply_filters_to_query( + query=query, + filter_params=filter_params, + search_model=System, + taxonomy_model=None, + ) + + # tcf_system has the data_category "user.device.cookie_id", while system_with_cleanup doesn't + # since the taxonomy_model isn't provided, we expect the query to return all systems + query_results = db.execute(filtered_query) + assert len(query_results.all()) == 2 # Should have both systems + + def test_raises_exception_if_taxonomy_field_is_not_in_taxonomy_model( + self, db: Session, tcf_system + ): + with pytest.raises(MissingTaxonomyField) as exc: + query = db.query(System) + filter_params = FilterParams(data_categories=["user.device.cookie_id"]) + + apply_filters_to_query( + query=query, + filter_params=filter_params, + search_model=System, + taxonomy_model=System, + ) + + assert ( + str(exc.value) + == "Model System does not have a data_category or data_categories field, but filter_params.data_categories is not empty" + )