Skip to content

Commit

Permalink
Increase ES throttling rate in Ingestion Server (#3644)
Browse files Browse the repository at this point in the history
  • Loading branch information
krysal authored Jan 12, 2024
1 parent ab09a8a commit 3b3b907
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions ingestion_server/ingestion_server/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
DATABASE_PASSWORD = config("DATABASE_PASSWORD", default="deploy")
DATABASE_NAME = config("DATABASE_NAME", default="openledger")

# See https://www.elastic.co/guide/en/elasticsearch/reference/8.8/docs-reindex.html#docs-reindex-throttle
ES_FILTERED_INDEX_THROTTLING_RATE = config(
"ES_FILTERED_INDEX_THROTTLING_RATE", default=20_000, cast=int
)

# The number of database records to load in memory at once.
DB_BUFFER_SIZE = config("DB_BUFFER_SIZE", default=100000, cast=int)

Expand Down Expand Up @@ -486,7 +491,7 @@ def create_and_populate_filtered_index(
final alias used.
"""
# Allow relying on the model-name-based alias by
# not suppliying `origin_index_suffix`
# not supplying `origin_index_suffix`
source_index = (
f"{model_name}-{origin_index_suffix}" if origin_index_suffix else model_name
)
Expand Down Expand Up @@ -523,9 +528,7 @@ def create_and_populate_filtered_index(
},
slices="auto",
wait_for_completion=True,
# 10k derived from in-production testing
# See https://github.com/WordPress/openverse/issues/2963
requests_per_second=15_000,
requests_per_second=ES_FILTERED_INDEX_THROTTLING_RATE,
# Temporary workaround to allow the action to complete.
request_timeout=48 * 3600,
)
Expand Down

0 comments on commit 3b3b907

Please sign in to comment.