-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PROD-1898 Provide API support for (optional) pagination of the `/syst…
…em` endpoint and /datasets (#5071)
- Loading branch information
Showing
12 changed files
with
859 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
from typing import List, Optional, Union | ||
|
||
from fastapi import APIRouter, Depends, Query, Security | ||
from fastapi_pagination import Page, Params | ||
from fastapi_pagination.ext.async_sqlalchemy import paginate as async_paginate | ||
from fideslang.models import Dataset | ||
from sqlalchemy.ext.asyncio import AsyncSession | ||
from sqlalchemy.sql.expression import select | ||
|
||
from fides.api.db.crud import list_resource | ||
from fides.api.db.ctl_session import get_async_db | ||
from fides.api.oauth.utils import verify_oauth_client | ||
from fides.api.schemas.filter_params import FilterParams | ||
from fides.api.util.filter_utils import apply_filters_to_query | ||
from fides.common.api.scope_registry import DATASET_READ | ||
from fides.common.api.v1.urn_registry import V1_URL_PREFIX | ||
|
||
from fides.api.models.sql_models import ( # type: ignore[attr-defined] # isort: skip | ||
Dataset as CtlDataset, | ||
) | ||
|
||
# We create routers to override specific methods in those defined in generic.py | ||
# when we need more custom implementations for only some of the methods in a router. | ||
|
||
dataset_router = APIRouter(tags=["Dataset"], prefix=V1_URL_PREFIX) | ||
|
||
|
||
@dataset_router.get( | ||
"/dataset", | ||
dependencies=[Security(verify_oauth_client, scopes=[DATASET_READ])], | ||
response_model=Union[Page[Dataset], List[Dataset]], | ||
name="List datasets (optionally paginated)", | ||
) | ||
async def list_dataset_paginated( | ||
db: AsyncSession = Depends(get_async_db), | ||
size: Optional[int] = Query(None, ge=1, le=100), | ||
page: Optional[int] = Query(None, ge=1), | ||
search: Optional[str] = Query(None), | ||
data_categories: Optional[List[str]] = Query(None), | ||
) -> Union[Page[Dataset], List[Dataset]]: | ||
""" | ||
Get a list of all of the Datasets. | ||
If any pagination parameters (size or page) are provided, then the response will be paginated | ||
& provided filters (search, data_categories) will be applied. | ||
Otherwise all Datasets will be returned (this may be a slow operation if there are many datasets, | ||
so using the pagination parameters is recommended). | ||
""" | ||
if page or size: | ||
query = select(CtlDataset) | ||
filter_params = FilterParams(search=search, data_categories=data_categories) | ||
filtered_query = apply_filters_to_query( | ||
query=query, | ||
search_model=CtlDataset, | ||
taxonomy_model=CtlDataset, | ||
filter_params=filter_params, | ||
) | ||
pagination_params = Params(page=page or 1, size=size or 50) | ||
return await async_paginate(db, filtered_query, pagination_params) | ||
|
||
return await list_resource(CtlDataset, db) | ||
|
||
|
||
GENERIC_OVERRIDES_ROUTER = APIRouter() | ||
GENERIC_OVERRIDES_ROUTER.include_router(dataset_router) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from typing import List, Optional | ||
|
||
from pydantic import BaseModel | ||
|
||
|
||
class FilterParams(BaseModel): | ||
""" | ||
Generic parameters for filtering queries. | ||
""" | ||
|
||
search: Optional[str] = None | ||
data_uses: Optional[List[str]] = None | ||
data_categories: Optional[List[str]] = None | ||
data_subjects: Optional[List[str]] = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
from typing import List, Optional, Type | ||
|
||
from sqlalchemy import and_, func, or_ | ||
from sqlalchemy.sql.elements import BooleanClauseList | ||
from sqlalchemy.sql.selectable import Select | ||
|
||
from fides.api.models.sql_models import FidesBase # type: ignore[attr-defined] | ||
from fides.api.schemas.filter_params import FilterParams | ||
|
||
|
||
class MissingTaxonomyField(ValueError): | ||
pass | ||
|
||
|
||
# FIXME: this code is basically the same as the one in filter_datamap_query | ||
# in the fidesplus repo, but slightly more generic. Ideally we want to replace that with using this | ||
# so we don't duplicate this logic in two different places | ||
def apply_filters_to_query( | ||
query: Select, | ||
filter_params: FilterParams, | ||
search_model: Type[FidesBase], # Model to search on | ||
taxonomy_model: Optional[ | ||
Type[FidesBase] | ||
], # Model that has the taxonomy fields to filter on | ||
) -> Select: | ||
""" | ||
Function to filter a given query by given filter params. | ||
The search term is used as a filter on the search_model name and fides_key, as well as its id. | ||
Taxonomy filters are applied to the taxonomy_model if provided. | ||
The search_model and taxonomy_model may be the same model, e.g if the lookup is on one table, | ||
or may be different, e.g if the query is performing a join between two tables. | ||
Returns the filtered query. | ||
""" | ||
|
||
# Perform a text search on the search_model's name, fides_key and id | ||
if filter_params.search: | ||
query = query.where( | ||
and_( | ||
or_( | ||
func.lower(search_model.name).like( | ||
f"%{filter_params.search.lower()}%" | ||
), | ||
search_model.fides_key == filter_params.search, | ||
search_model.id == filter_params.search, | ||
) | ||
) | ||
) | ||
|
||
if not taxonomy_model: | ||
return query | ||
|
||
# We match the name of the field in FilterParams to the name of the field in the taxonomy_model, | ||
# which can be represented by either a single element field or a collection field | ||
taxonomy_field_information = { | ||
"data_categories": { | ||
"single": "data_category", | ||
"collection": "data_categories", | ||
}, | ||
"data_subjects": { | ||
"single": "data_subject", | ||
"collection": "data_subjects", | ||
}, | ||
"data_uses": { | ||
"single": "data_use", | ||
"collection": "data_uses", | ||
}, | ||
} | ||
|
||
# Filter the fields so we only use the ones that have been provided in the filter params | ||
available_fields_info = { | ||
field: field_info | ||
for field, field_info in taxonomy_field_information.items() | ||
if getattr(filter_params, field) | ||
} | ||
|
||
taxonomy_filter_conditions: List[BooleanClauseList] = [] | ||
|
||
for field, field_info in available_fields_info.items(): | ||
single_field_name = field_info["single"] | ||
collection_field_name = field_info["collection"] | ||
|
||
# If the taxonomy_model doesn't have either a single or collection field matching this field | ||
# we raise an error since it makes no sense to pass in the field as part of the filter params | ||
if not hasattr(taxonomy_model, single_field_name) and not hasattr( | ||
taxonomy_model, collection_field_name | ||
): | ||
raise MissingTaxonomyField( | ||
f"Model {taxonomy_model.__name__} does not have a {single_field_name} or {collection_field_name} field, but filter_params.{field} is not empty" | ||
) | ||
|
||
single_field_conditions = [] | ||
collection_field_conditions = [] | ||
|
||
# For single fields, we match each element provided in the filter params field | ||
# against the field in the taxonomy model using like, since model field is a single element | ||
# e.g a single data category represented as a string | ||
if hasattr(taxonomy_model, single_field_name): | ||
single_field_conditions = [ | ||
getattr(taxonomy_model, single_field_name).like(element + "%") | ||
for element in getattr(filter_params, field) | ||
] | ||
|
||
# For collection fields, we match each element provided in the filter params field | ||
# against the field in the taxonomy model using contains, since model field is | ||
# a collection of elements, e.g a list of data categories | ||
if hasattr(taxonomy_model, collection_field_name): | ||
collection_field_conditions = [ | ||
getattr(taxonomy_model, collection_field_name).contains([element]) | ||
for element in getattr(filter_params, field) | ||
] | ||
|
||
# We join all conditions with an OR, so that we retrieve rows that match | ||
# either in their single or collection fields | ||
all_field_conditions = or_( | ||
*single_field_conditions, *collection_field_conditions | ||
) | ||
taxonomy_filter_conditions.append(all_field_conditions) | ||
|
||
# Finally, we filter the query for taxonomy_model instances that match all the conditions | ||
if taxonomy_filter_conditions: | ||
query = query.where(and_(*taxonomy_filter_conditions)) | ||
|
||
return query |
Oops, something went wrong.