Skip to content

Commit

Permalink
PROD-1898 Provide API support for (optional) pagination of the `/syst…
Browse files Browse the repository at this point in the history
…em` endpoint and /datasets (#5071)
  • Loading branch information
erosselli committed Jul 16, 2024
1 parent 75b7dda commit e63a74f
Show file tree
Hide file tree
Showing 12 changed files with 859 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ The types of changes are:
### Changed
- Updated the sample dataset for the Amplitude integration [#5063](https://github.com/ethyca/fides/pull/5063)
- Messaging page now shows a notice if you have properties without any templates [#5077](https://github.com/ethyca/fides/pull/5077)
- Endpoints for listing systems (GET /system) and datasets (GET /dataset) now support optional pagination [#5071](https://github.com/ethyca/fides/pull/5071)

### Developer Experience
- Upgrade to React 18 and Chakra 2, including other dependencies [#5036](https://github.com/ethyca/fides/pull/5036)
Expand Down
64 changes: 64 additions & 0 deletions src/fides/api/api/v1/endpoints/generic_overrides.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from typing import List, Optional, Union

from fastapi import APIRouter, Depends, Query, Security
from fastapi_pagination import Page, Params
from fastapi_pagination.ext.async_sqlalchemy import paginate as async_paginate
from fideslang.models import Dataset
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.sql.expression import select

from fides.api.db.crud import list_resource
from fides.api.db.ctl_session import get_async_db
from fides.api.oauth.utils import verify_oauth_client
from fides.api.schemas.filter_params import FilterParams
from fides.api.util.filter_utils import apply_filters_to_query
from fides.common.api.scope_registry import DATASET_READ
from fides.common.api.v1.urn_registry import V1_URL_PREFIX

from fides.api.models.sql_models import ( # type: ignore[attr-defined] # isort: skip
Dataset as CtlDataset,
)

# We create routers to override specific methods in those defined in generic.py
# when we need more custom implementations for only some of the methods in a router.

dataset_router = APIRouter(tags=["Dataset"], prefix=V1_URL_PREFIX)


@dataset_router.get(
"/dataset",
dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])],
response_model=Union[Page[Dataset], List[Dataset]],
name="List datasets (optionally paginated)",
)
async def list_dataset_paginated(
db: AsyncSession = Depends(get_async_db),
size: Optional[int] = Query(None, ge=1, le=100),
page: Optional[int] = Query(None, ge=1),
search: Optional[str] = Query(None),
data_categories: Optional[List[str]] = Query(None),
) -> Union[Page[Dataset], List[Dataset]]:
"""
Get a list of all of the Datasets.
If any pagination parameters (size or page) are provided, then the response will be paginated
& provided filters (search, data_categories) will be applied.
Otherwise all Datasets will be returned (this may be a slow operation if there are many datasets,
so using the pagination parameters is recommended).
"""
if page or size:
query = select(CtlDataset)
filter_params = FilterParams(search=search, data_categories=data_categories)
filtered_query = apply_filters_to_query(
query=query,
search_model=CtlDataset,
taxonomy_model=CtlDataset,
filter_params=filter_params,
)
pagination_params = Params(page=page or 1, size=size or 50)
return await async_paginate(db, filtered_query, pagination_params)

return await list_resource(CtlDataset, db)


GENERIC_OVERRIDES_ROUTER = APIRouter()
GENERIC_OVERRIDES_ROUTER.include_router(dataset_router)
53 changes: 47 additions & 6 deletions src/fides/api/api/v1/endpoints/system.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from typing import Dict, List, Optional
from typing import Dict, List, Optional, Union

from fastapi import Depends, HTTPException, Response, Security
from fastapi import Depends, HTTPException, Query, Response, Security
from fastapi_pagination import Page, Params
from fastapi_pagination.bases import AbstractPage
from fastapi_pagination.ext.async_sqlalchemy import paginate as async_paginate
from fastapi_pagination.ext.sqlalchemy import paginate
from fideslang.models import System as SystemSchema
from fideslang.validation import FidesKey
from loguru import logger
from pydantic.types import conlist
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy.orm import Session
from starlette import status
from starlette.status import HTTP_200_OK, HTTP_204_NO_CONTENT, HTTP_404_NOT_FOUND
Expand All @@ -30,7 +32,10 @@
)
from fides.api.models.connectionconfig import ConnectionConfig, ConnectionType
from fides.api.models.fides_user import FidesUser
from fides.api.models.sql_models import System # type:ignore[attr-defined]
from fides.api.models.sql_models import ( # type:ignore[attr-defined]
PrivacyDeclaration,
System,
)
from fides.api.oauth.system_manager_oauth_util import (
verify_oauth_client_for_system_from_fides_key,
verify_oauth_client_for_system_from_request_body_cli,
Expand All @@ -49,6 +54,7 @@
from fides.api.schemas.connection_configuration.saas_config_template_values import (
SaasConnectionTemplateValues,
)
from fides.api.schemas.filter_params import FilterParams
from fides.api.schemas.system import BasicSystemResponse, SystemResponse
from fides.api.util.api_router import APIRouter
from fides.api.util.connection_util import (
Expand All @@ -58,6 +64,7 @@
patch_connection_configs,
validate_secrets,
)
from fides.api.util.filter_utils import apply_filters_to_query
from fides.common.api.scope_registry import (
CONNECTION_CREATE_OR_UPDATE,
CONNECTION_DELETE,
Expand Down Expand Up @@ -366,13 +373,47 @@ async def create(
scopes=[SYSTEM_READ],
)
],
response_model=List[BasicSystemResponse],
name="List",
response_model=Union[List[BasicSystemResponse], Page[BasicSystemResponse]],
name="List systems (optionally paginated)",
)
async def ls( # pylint: disable=invalid-name
db: AsyncSession = Depends(get_async_db),
size: Optional[int] = Query(None, ge=1, le=100),
page: Optional[int] = Query(None, ge=1),
search: Optional[str] = None,
data_uses: Optional[List[FidesKey]] = Query(None),
data_categories: Optional[List[FidesKey]] = Query(None),
data_subjects: Optional[List[FidesKey]] = Query(None),
) -> List:
"""Get a list of all of the resources of this type."""
"""Get a list of all of the Systems.
If any pagination parameters (size or page) are provided, then the response will be paginated
& provided filters (search, taxonomy fields) will be applied.
Otherwise all Systems will be returned (this may be a slow operation if there are many systems,
so using the pagination parameters is recommended).
"""
if size or page:
pagination_params = Params(page=page or 1, size=size or 50)
# Need to join with PrivacyDeclaration in order to be able to filter
# by data use, data category, and data subject
query = select(System).outerjoin(
PrivacyDeclaration, System.id == PrivacyDeclaration.system_id
)
filter_params = FilterParams(
search=search,
data_uses=data_uses,
data_categories=data_categories,
data_subjects=data_subjects,
)
filtered_query = apply_filters_to_query(
query=query,
filter_params=filter_params,
search_model=System,
taxonomy_model=PrivacyDeclaration,
)
# Add a distinct so we only get one row per system
duplicates_removed = filtered_query.distinct(System.id)
return await async_paginate(db, duplicates_removed, pagination_params)

return await list_resource(System, db)


Expand Down
32 changes: 32 additions & 0 deletions src/fides/api/app_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List

from fastapi import APIRouter, FastAPI
from fastapi.routing import APIRoute
from loguru import logger
from redis.exceptions import RedisError, ResponseError
from slowapi.errors import RateLimitExceeded # type: ignore
Expand All @@ -18,6 +19,7 @@
from fides.api.api.v1 import CTL_ROUTER
from fides.api.api.v1.api import api_router
from fides.api.api.v1.endpoints.admin import ADMIN_ROUTER
from fides.api.api.v1.endpoints.generic_overrides import GENERIC_OVERRIDES_ROUTER
from fides.api.api.v1.endpoints.health import HEALTH_ROUTER
from fides.api.api.v1.exception_handlers import ExceptionHandlers
from fides.api.common_exceptions import FunctionalityNotConfigured, RedisConnectionError
Expand Down Expand Up @@ -57,6 +59,7 @@


ROUTERS = [CTL_ROUTER, api_router, DB_ROUTER]
OVERRIDING_ROUTERS = [GENERIC_OVERRIDES_ROUTER]


def create_fides_app(
Expand All @@ -80,6 +83,8 @@ def create_fides_app(
for router in routers:
fastapi_app.include_router(router)

override_generic_routers(OVERRIDING_ROUTERS, fastapi_app)

if security_env == "dev":
# This removes auth requirements for specific endpoints
fastapi_app.dependency_overrides[verify_oauth_client_prod] = get_root_client
Expand All @@ -96,6 +101,33 @@ def create_fides_app(
return fastapi_app


def override_generic_routers(
overriding_routers: List[APIRouter], base_router: FastAPI
) -> None:
"""
Remove generic routes in favor of their more specific implementations, if available.
"""
for i, existing_route in reversed(list(enumerate(base_router.routes))):
if not isinstance(existing_route, APIRoute):
continue
for new_router in overriding_routers:
for new_route in new_router.routes:
if not isinstance(new_route, APIRoute): # pragma: no cover
continue
if (
existing_route.methods == new_route.methods
and existing_route.path == new_route.path
):
logger.debug(
"Removing generic route: {} {}",
existing_route.methods,
existing_route.path,
)
del base_router.routes[i]
for router in overriding_routers:
base_router.include_router(router)


def log_startup() -> None:
"""Log application startup and other information."""
logger.info(f"Starting Fides - v{VERSION}")
Expand Down
14 changes: 14 additions & 0 deletions src/fides/api/schemas/filter_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from typing import List, Optional

from pydantic import BaseModel


class FilterParams(BaseModel):
"""
Generic parameters for filtering queries.
"""

search: Optional[str] = None
data_uses: Optional[List[str]] = None
data_categories: Optional[List[str]] = None
data_subjects: Optional[List[str]] = None
123 changes: 123 additions & 0 deletions src/fides/api/util/filter_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from typing import List, Optional, Type

from sqlalchemy import and_, func, or_
from sqlalchemy.sql.elements import BooleanClauseList
from sqlalchemy.sql.selectable import Select

from fides.api.models.sql_models import FidesBase # type: ignore[attr-defined]
from fides.api.schemas.filter_params import FilterParams


class MissingTaxonomyField(ValueError):
pass


# FIXME: this code is basically the same as the one in filter_datamap_query
# in the fidesplus repo, but slightly more generic. Ideally we want to replace that with using this
# so we don't duplicate this logic in two different places
def apply_filters_to_query(
query: Select,
filter_params: FilterParams,
search_model: Type[FidesBase], # Model to search on
taxonomy_model: Optional[
Type[FidesBase]
], # Model that has the taxonomy fields to filter on
) -> Select:
"""
Function to filter a given query by given filter params.
The search term is used as a filter on the search_model name and fides_key, as well as its id.
Taxonomy filters are applied to the taxonomy_model if provided.
The search_model and taxonomy_model may be the same model, e.g if the lookup is on one table,
or may be different, e.g if the query is performing a join between two tables.
Returns the filtered query.
"""

# Perform a text search on the search_model's name, fides_key and id
if filter_params.search:
query = query.where(
and_(
or_(
func.lower(search_model.name).like(
f"%{filter_params.search.lower()}%"
),
search_model.fides_key == filter_params.search,
search_model.id == filter_params.search,
)
)
)

if not taxonomy_model:
return query

# We match the name of the field in FilterParams to the name of the field in the taxonomy_model,
# which can be represented by either a single element field or a collection field
taxonomy_field_information = {
"data_categories": {
"single": "data_category",
"collection": "data_categories",
},
"data_subjects": {
"single": "data_subject",
"collection": "data_subjects",
},
"data_uses": {
"single": "data_use",
"collection": "data_uses",
},
}

# Filter the fields so we only use the ones that have been provided in the filter params
available_fields_info = {
field: field_info
for field, field_info in taxonomy_field_information.items()
if getattr(filter_params, field)
}

taxonomy_filter_conditions: List[BooleanClauseList] = []

for field, field_info in available_fields_info.items():
single_field_name = field_info["single"]
collection_field_name = field_info["collection"]

# If the taxonomy_model doesn't have either a single or collection field matching this field
# we raise an error since it makes no sense to pass in the field as part of the filter params
if not hasattr(taxonomy_model, single_field_name) and not hasattr(
taxonomy_model, collection_field_name
):
raise MissingTaxonomyField(
f"Model {taxonomy_model.__name__} does not have a {single_field_name} or {collection_field_name} field, but filter_params.{field} is not empty"
)

single_field_conditions = []
collection_field_conditions = []

# For single fields, we match each element provided in the filter params field
# against the field in the taxonomy model using like, since model field is a single element
# e.g a single data category represented as a string
if hasattr(taxonomy_model, single_field_name):
single_field_conditions = [
getattr(taxonomy_model, single_field_name).like(element + "%")
for element in getattr(filter_params, field)
]

# For collection fields, we match each element provided in the filter params field
# against the field in the taxonomy model using contains, since model field is
# a collection of elements, e.g a list of data categories
if hasattr(taxonomy_model, collection_field_name):
collection_field_conditions = [
getattr(taxonomy_model, collection_field_name).contains([element])
for element in getattr(filter_params, field)
]

# We join all conditions with an OR, so that we retrieve rows that match
# either in their single or collection fields
all_field_conditions = or_(
*single_field_conditions, *collection_field_conditions
)
taxonomy_filter_conditions.append(all_field_conditions)

# Finally, we filter the query for taxonomy_model instances that match all the conditions
if taxonomy_filter_conditions:
query = query.where(and_(*taxonomy_filter_conditions))

return query
Loading

0 comments on commit e63a74f

Please sign in to comment.