From 7131b3502ca05453102d9cd15d164d2089331367 Mon Sep 17 00:00:00 2001 From: Staci Mullins <63313398+stacimc@users.noreply.github.com> Date: Mon, 11 Dec 2023 17:37:47 -0800 Subject: [PATCH] Fix incorrect default production data refresh limit of 0 (#3515) * Only cap the number of records to index if a limit has been set * Use None instead of 0 to signify no record limit for clarity --- .../ingestion_server/distributed_reindex_scheduler.py | 8 +++++--- ingestion_server/ingestion_server/utils/config.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ingestion_server/ingestion_server/distributed_reindex_scheduler.py b/ingestion_server/ingestion_server/distributed_reindex_scheduler.py index 54d4d0178fc..9bd3eb4919e 100644 --- a/ingestion_server/ingestion_server/distributed_reindex_scheduler.py +++ b/ingestion_server/ingestion_server/distributed_reindex_scheduler.py @@ -42,9 +42,11 @@ def _assign_work(db_conn, workers, model_name, table_name, target_index): cur.execute(est_records_query) estimated_records = cur.fetchone()[0] - records_per_worker = math.floor( - min(estimated_records, get_record_limit()) / len(workers) - ) + # If a record_limit has been set, cap the number of records to be indexed. + if record_limit := get_record_limit(): + estimated_records = min(estimated_records, record_limit) + + records_per_worker = math.floor(estimated_records / len(workers)) worker_url_template = "http://{}:8002" # Wait for the workers to start. diff --git a/ingestion_server/ingestion_server/utils/config.py b/ingestion_server/ingestion_server/utils/config.py index 45c2aaf583e..11b45e56883 100644 --- a/ingestion_server/ingestion_server/utils/config.py +++ b/ingestion_server/ingestion_server/utils/config.py @@ -14,6 +14,6 @@ def get_record_limit(): environment = config("ENVIRONMENT", default="local").lower() if environment in {"prod", "production"}: - return 0 + return None return 100_000