Skip to content

Commit

Permalink
[Issue #1467] Add order_by support for more fields (#1481)
Browse files Browse the repository at this point in the history
## Summary
Fixes #1467

### Time to review: __10 mins__

## Changes proposed
Added the ability to order_by several more fields

Updates to test structure, continuing a pattern from the parent PR

## Context for reviewers
Except for relevancy (which is its own separate ticket if we do it prior
to a search index), this should cover all the sorting needs for search
in the near term.

One callout is that sorting always shoves null values to the end now,
regardless of whether they are ascending or descending (by default,
Postgres considers null to be the largest value).

## Additional information
Did some local testing of performance with the queries we actually use,
and sorting by any of these fields has effectively the same performance
cost, and adding an index to the un-indexed ones didn't seem to change
that.

---------

Co-authored-by: nava-platform-bot <platform-admins@navapbc.com>
  • Loading branch information
chouinar and nava-platform-bot authored Mar 19, 2024
1 parent fb33b83 commit 4686472
Show file tree
Hide file tree
Showing 5 changed files with 340 additions and 65 deletions.
4 changes: 4 additions & 0 deletions api/openapi.generated.yml
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,10 @@ components:
enum:
- opportunity_id
- opportunity_number
- opportunity_title
- post_date
- close_date
- agency_code
description: The field to sort the response by
sort_direction:
description: Whether to sort the response ascending or descending
Expand Down
10 changes: 9 additions & 1 deletion api/src/api/opportunities_v0_1/opportunity_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,15 @@ class OpportunitySearchRequestSchema(Schema):

pagination = fields.Nested(
generate_pagination_schema(
"OpportunityPaginationSchema", ["opportunity_id", "opportunity_number"]
"OpportunityPaginationSchema",
[
"opportunity_id",
"opportunity_number",
"opportunity_title",
"post_date",
"close_date",
"agency_code",
],
),
required=True,
)
141 changes: 111 additions & 30 deletions api/src/services/opportunities_v0_1/search_opportunities.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import logging
from typing import Sequence, Tuple
from typing import Any, Sequence, Tuple

from pydantic import BaseModel, Field
from sqlalchemy import Select, asc, desc, or_, select
from sqlalchemy.orm import noload, selectinload
from sqlalchemy import Select, asc, desc, nulls_last, or_, select
from sqlalchemy.orm import InstrumentedAttribute, noload, selectinload

import src.adapters.db as db
from src.db.models.opportunity_models import (
Expand Down Expand Up @@ -35,9 +35,19 @@ class SearchOpportunityParams(BaseModel):
filters: SearchOpportunityFilters | None = Field(default=None)


def _add_query_filters(
stmt: Select[tuple[Opportunity]], query: str | None
) -> Select[tuple[Opportunity]]:
def _join_stmt_to_current_summary(stmt: Select[tuple[Any]]) -> Select[tuple[Any]]:
# Utility method to add this join to a select statement as we do this in a few places
#
# We need to add joins so that the where/order_by clauses
# can query against the tables that are relevant for these filters
return stmt.join(CurrentOpportunitySummary).join(
OpportunitySummary,
CurrentOpportunitySummary.opportunity_summary_id
== OpportunitySummary.opportunity_summary_id,
)


def _add_query_filters(stmt: Select[tuple[Any]], query: str | None) -> Select[tuple[Any]]:
if query is None or len(query) == 0:
return stmt

Expand All @@ -47,20 +57,11 @@ def _add_query_filters(


def _add_filters(
stmt: Select[tuple[Opportunity]], filters: SearchOpportunityFilters | None
) -> Select[tuple[Opportunity]]:
stmt: Select[tuple[Any]], filters: SearchOpportunityFilters | None
) -> Select[tuple[Any]]:
if filters is None:
return stmt

# We need to add joins so that the where clauses
# can query against the tables that are relevant for these filters
# Current + Opportunity Summary are always needed so just add them here
stmt = stmt.join(CurrentOpportunitySummary).join(
OpportunitySummary,
CurrentOpportunitySummary.opportunity_summary_id
== OpportunitySummary.opportunity_summary_id,
)

if filters.opportunity_status is not None:
one_of_opportunity_statuses = filters.opportunity_status.get("one_of")

Expand Down Expand Up @@ -113,37 +114,117 @@ def _add_filters(
return stmt


def _add_order_by(
stmt: Select[tuple[Opportunity]], pagination: PaginationParams
) -> Select[tuple[Opportunity]]:
# This generates an order by command like:
#
# ORDER BY opportunity.opportunity_id DESC NULLS LAST

# This determines whether we use ascending or descending when building the query
sort_fn = asc if pagination.is_ascending else desc

match pagination.order_by:
case "opportunity_id":
field: InstrumentedAttribute = Opportunity.opportunity_id
case "opportunity_number":
field = Opportunity.opportunity_number
case "opportunity_title":
field = Opportunity.opportunity_title
case "post_date":
field = OpportunitySummary.post_date
# Need to add joins to the query stmt to order by field from opportunity summary
stmt = _join_stmt_to_current_summary(stmt)
case "close_date":
field = OpportunitySummary.close_date
# Need to add joins to the query stmt to order by field from opportunity summary
stmt = _join_stmt_to_current_summary(stmt)
case "agency_code":
field = Opportunity.agency
case _:
# If this exception happens, it means our API schema
# allows for values we don't have implemented. This
# means we can't determine how to sort / need to correct
# the mismatch.
msg = f"Unconfigured sort_by parameter {pagination.order_by} provided, cannot determine how to sort."
raise Exception(msg)

# Any values that are null will automatically be sorted to the end
return stmt.order_by(nulls_last(sort_fn(field)))


def search_opportunities(
db_session: db.Session, raw_search_params: dict
) -> Tuple[Sequence[Opportunity], PaginationInfo]:
search_params = SearchOpportunityParams.model_validate(raw_search_params)

sort_fn = asc if search_params.pagination.is_ascending else desc
stmt = (
select(Opportunity)
# TODO - when we want to sort by non-opportunity table fields we'll need to change this
.order_by(sort_fn(getattr(Opportunity, search_params.pagination.order_by))).where(
"""
We create an inner query which handles all of the filtering and returns
a set of opportunity IDs for the outer query to filter against. This query
ends up looking like (varying based on exact filters):
SELECT DISTINCT
opportunity.opportunity_id
FROM opportunity
JOIN current_opportunity_summary ON opportunity.opportunity_id = current_opportunity_summary.opportunity_id
JOIN opportunity_summary ON current_opportunity_summary.opportunity_summary_id = opportunity_summary.opportunity_summary_id
JOIN link_opportunity_summary_funding_instrument ON opportunity_summary.opportunity_summary_id = link_opportunity_summary_funding_instrument.opportunity_summary_id
JOIN link_opportunity_summary_funding_category ON opportunity_summary.opportunity_summary_id = link_opportunity_summary_funding_category.opportunity_summary_id
JOIN link_opportunity_summary_applicant_type ON opportunity_summary.opportunity_summary_id = link_opportunity_summary_applicant_type.opportunity_summary_id
WHERE
opportunity.is_draft IS FALSE
AND(EXISTS (
SELECT
1 FROM current_opportunity_summary
WHERE
opportunity.opportunity_id = current_opportunity_summary.opportunity_id))
AND link_opportunity_summary_funding_instrument.funding_instrument_id IN(1, 2, 3, 4))
"""
inner_stmt = (
select(Opportunity.opportunity_id).where(
Opportunity.is_draft.is_(False)
) # Only ever return non-drafts
# Filter anything without a current opportunity summary
.where(Opportunity.current_opportunity_summary != None) # noqa: E711
# Distinct the opportunity IDs returned so that the outer query
# has fewer results to query against
.distinct()
)

# Current + Opportunity Summary are always needed so just add them here
inner_stmt = _join_stmt_to_current_summary(inner_stmt)
inner_stmt = _add_query_filters(inner_stmt, search_params.query)
inner_stmt = _add_filters(inner_stmt, search_params.filters)

#
#
"""
The outer query handles sorting and filters against the inner query described above.
This ends up looking like (joins to current opportunity if ordering by other fields):
SELECT
opportunity.opportunity_id,
opportunity.opportunity_title,
-- and so on for the opportunity table fields
FROM opportunity
WHERE
opportunity.opportunity_id in ( /* the above subquery */ )
ORDER BY
opportunity.opportunity_id DESC NULLS LAST
LIMIT 25 OFFSET 100
"""
stmt = (
select(Opportunity).where(Opportunity.opportunity_id.in_(inner_stmt))
# selectinload makes it so all relationships are loaded and attached to the Opportunity
# records that we end up fetching. It emits a separate "select * from table where opportunity_id in (x, y ,z)"
# for each relationship. This is used instead of joinedload as it ends up more performant for complex models
# Note that we set all_opportunity_summaries to noload as we don't need it in this API, and it would make the queries load more than necessary
#
# See: https://docs.sqlalchemy.org/en/20/orm/queryguide/relationships.html#what-kind-of-loading-to-use
.options(selectinload("*"), noload(Opportunity.all_opportunity_summaries))
# Distinct is necessary as the joins may add duplicate rows when multiple one-to-many relationships match
# While SQLAlchemy will unique those rows for us, the SQL query still ends up with far less than the limit
# we specify as that is done outside of the DB.
# By having distinct, we do that ourselves in the query so that the limit we specify will be the actual amount
# of records returned (assuming there at least that number to return)
.distinct()
)

stmt = _add_query_filters(stmt, search_params.query)
stmt = _add_filters(stmt, search_params.filters)
stmt = _add_order_by(stmt, search_params.pagination)

paginator: Paginator[Opportunity] = Paginator(
Opportunity, stmt, db_session, page_size=search_params.pagination.page_size
Expand Down
65 changes: 65 additions & 0 deletions api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import tests.src.db.models.factories as factories
from src.db import models
from src.db.models.lookup.sync_lookup_values import sync_lookup_values
from src.db.models.opportunity_models import Opportunity
from src.util.local import load_local_env_vars
from tests.lib import db_testing

Expand Down Expand Up @@ -190,3 +191,67 @@ def mock_s3_bucket_resource(mock_s3):
@pytest.fixture
def mock_s3_bucket(mock_s3_bucket_resource):
yield mock_s3_bucket_resource.name


####################
# Class-based testing
####################


class BaseTestClass:
"""
A base class to derive a test class from. This lets
us have a set of fixtures with a scope greater than
an individual test, but that need to be more granular than
session scoping.
Useful for avoiding repetition in setup of tests which
can be clearer or provide better performance.
See: https://docs.pytest.org/en/7.1.x/how-to/fixtures.html#fixture-scopes
For example:
class TestExampleClass(BaseTestClass):
@pytest.fixture(scope="class")
def setup_data(db_session):
# note that the db_session here would be the one created in this class
# as it will pull from the class scope instead
examples = ExampleFactory.create_batch(size=100)
"""

@pytest.fixture(scope="class")
def db_session(self, db_client, monkeypatch_class):
# Note this shadows the db_session fixture for tests in this class
with db_client.get_session() as db_session:
yield db_session

@pytest.fixture(scope="class")
def enable_factory_create(self, monkeypatch_class, db_session):
"""
Allows the create method of factories to be called. By default, the create
throws an exception to prevent accidental creation of database objects for tests
that do not need persistence. This fixture only allows the create method to be
called for the current class of tests. Each test that needs to call Factory.create should pull in
this fixture.
"""
monkeypatch_class.setattr(factories, "_db_session", db_session)

@pytest.fixture(scope="class")
def truncate_opportunities(self, db_session):
"""
Use this fixture when you want to truncate the opportunity table
and handle deleting all related records.
As this is at the class scope, this will only run once for a given
class implementation.
"""

opportunities = db_session.query(Opportunity).all()
for opp in opportunities:
db_session.delete(opp)

# Force the deletes to the DB
db_session.commit()
Loading

0 comments on commit 4686472

Please sign in to comment.