Skip to content

Commit

Permalink
[Issue #2729] OpenSearch should return an accurate count of total res…
Browse files Browse the repository at this point in the history
…ults (#2730)

## Summary
Fixes #2729

### Time to review: __3 mins__

## Changes proposed
Set `track_total_hits` to True when calling OpenSearch

## Context for reviewers
While this field says it has possible performance cost due to needing to
count all the records, we also request a count for various facet counts
anyways, so I imagine this won't matter at all.

## Additional information
https://opensearch.org/docs/latest/api-reference/search/

I loaded ~16k records into my local search index. Querying it with no
filters returns this pagination info now:
```json
 {
    "order_by": "opportunity_id",
    "page_offset": 1,
    "page_size": 25,
    "sort_direction": "ascending",
    "total_pages": 676,
    "total_records": 16884
  }
```
  • Loading branch information
chouinar authored Nov 7, 2024
1 parent e513579 commit fe2e37d
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
12 changes: 12 additions & 0 deletions api/src/adapters/search/opensearch_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def __init__(self) -> None:

self.sort_values: list[dict[str, dict[str, str]]] = []

self._track_total_hits: bool = True

self.must: list[dict] = []
self.filters: list[dict] = []

Expand Down Expand Up @@ -133,6 +135,15 @@ def sort_by(self, sort_values: list[typing.Tuple[str, SortDirection]]) -> typing

return self

def track_total_hits(self, track_total_hits: bool) -> typing.Self:
"""
Whether or not to track the total number of hits in the response accurately.
By default OpenSearch will stop counting after 10k records are counted.
"""
self._track_total_hits = track_total_hits
return self

def simple_query(self, query: str, fields: list[str]) -> typing.Self:
"""
Adds a simple_query_string which queries against the provided fields.
Expand Down Expand Up @@ -238,6 +249,7 @@ def build(self) -> dict:
# Always include the scores in the response objects
# even if we're sorting by non-relevancy
"track_scores": True,
"track_total_hits": self._track_total_hits,
}

# Add sorting if any was provided
Expand Down
3 changes: 3 additions & 0 deletions api/src/services/opportunities_v1/search_opportunities.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,9 @@ def _add_aggregations(builder: search.SearchQueryBuilder) -> None:
def _get_search_request(params: SearchOpportunityParams) -> dict:
builder = search.SearchQueryBuilder()

# Make sure total hit count gets counted for more than 10k records
builder.track_total_hits(True)

# Pagination
builder.pagination(
page_size=params.pagination.page_size, page_number=params.pagination.page_offset
Expand Down
12 changes: 11 additions & 1 deletion api/tests/src/adapters/search/test_opensearch_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,12 @@ def seed_data(self, search_client, search_index):
def test_query_builder_empty(self, search_client, search_index):
builder = SearchQueryBuilder()

assert builder.build() == {"size": 25, "from": 0, "track_scores": True}
assert builder.build() == {
"size": 25,
"from": 0,
"track_scores": True,
"track_total_hits": True,
}

validate_valid_request(search_client, search_index, builder, FULL_DATA)

Expand Down Expand Up @@ -265,6 +270,7 @@ def test_query_builder_pagination_and_sorting(
"size": page_size,
"from": page_size * (page_number - 1),
"track_scores": True,
"track_total_hits": True,
"sort": expected_sort,
}

Expand Down Expand Up @@ -369,6 +375,7 @@ def test_query_builder_filter_terms(
"size": 25,
"from": 0,
"track_scores": True,
"track_total_hits": True,
"query": {"bool": {"filter": expected_terms}},
}

Expand Down Expand Up @@ -429,6 +436,7 @@ def test_query_builder_filter_date_range(
"size": 25,
"from": 0,
"track_scores": True,
"track_total_hits": True,
"query": {"bool": {"filter": [{"range": {"publication_date": expected_ranges}}]}},
}

Expand Down Expand Up @@ -474,6 +482,7 @@ def test_query_builder_filter_int_range(
"size": 25,
"from": 0,
"track_scores": True,
"track_total_hits": True,
"query": {"bool": {"filter": [{"range": {"page_count": expected_ranges}}]}},
}

Expand Down Expand Up @@ -633,6 +642,7 @@ def test_query_builder_simple_query_and_aggregations(
"size": 25,
"from": 0,
"track_scores": True,
"track_total_hits": True,
"query": {
"bool": {
"must": [
Expand Down

0 comments on commit fe2e37d

Please sign in to comment.