From 3b3b907bec5eff29fd4d03f979452e170716cf28 Mon Sep 17 00:00:00 2001 From: Krystle Salazar Date: Fri, 12 Jan 2024 19:44:12 -0400 Subject: [PATCH] Increase ES throttling rate in Ingestion Server (#3644) --- ingestion_server/ingestion_server/indexer.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/ingestion_server/ingestion_server/indexer.py b/ingestion_server/ingestion_server/indexer.py index cb6e035e3a9..34bed6a1498 100644 --- a/ingestion_server/ingestion_server/indexer.py +++ b/ingestion_server/ingestion_server/indexer.py @@ -46,6 +46,11 @@ DATABASE_PASSWORD = config("DATABASE_PASSWORD", default="deploy") DATABASE_NAME = config("DATABASE_NAME", default="openledger") +# See https://www.elastic.co/guide/en/elasticsearch/reference/8.8/docs-reindex.html#docs-reindex-throttle +ES_FILTERED_INDEX_THROTTLING_RATE = config( + "ES_FILTERED_INDEX_THROTTLING_RATE", default=20_000, cast=int +) + # The number of database records to load in memory at once. DB_BUFFER_SIZE = config("DB_BUFFER_SIZE", default=100000, cast=int) @@ -486,7 +491,7 @@ def create_and_populate_filtered_index( final alias used. """ # Allow relying on the model-name-based alias by - # not suppliying `origin_index_suffix` + # not supplying `origin_index_suffix` source_index = ( f"{model_name}-{origin_index_suffix}" if origin_index_suffix else model_name ) @@ -523,9 +528,7 @@ def create_and_populate_filtered_index( }, slices="auto", wait_for_completion=True, - # 10k derived from in-production testing - # See https://github.com/WordPress/openverse/issues/2963 - requests_per_second=15_000, + requests_per_second=ES_FILTERED_INDEX_THROTTLING_RATE, # Temporary workaround to allow the action to complete. request_timeout=48 * 3600, )