Skip to content

Commit

Permalink
more debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Kuo (Danswer) committed Jan 6, 2025
1 parent 7f81947 commit 701d701
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 11 deletions.
26 changes: 15 additions & 11 deletions backend/onyx/background/celery/tasks/vespa/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from datetime import timezone
from http import HTTPStatus
from typing import Any
from typing import cast

import httpx
Expand Down Expand Up @@ -766,7 +767,7 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:

time_start = time.monotonic()

timings: dict[str, float] = {}
timings: dict[str, Any] = {}
timings["start"] = time_start

r = get_redis_client(tenant_id=tenant_id)
Expand Down Expand Up @@ -826,27 +827,33 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
f"permissions_upsert={n_permissions_upsert} "
)
timings["queues"] = time.monotonic() - phase_start
timings["queues_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

# scan and monitor activity to completion
phase_start = time.monotonic()
lock_beat.reacquire()
if r.exists(RedisConnectorCredentialPair.get_fence_key()):
monitor_connector_taskset(r)
timings["connector"] = time.monotonic() - phase_start
timings["connector_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + "*"):
lock_beat.reacquire()
monitor_connector_deletion_taskset(tenant_id, key_bytes, r)

timings["connector_deletion"] = time.monotonic() - phase_start
timings["connector_deletion_ttl"] = r.ttl(
OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK
)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"):
lock_beat.reacquire()
with get_session_with_tenant(tenant_id) as db_session:
monitor_document_set_taskset(tenant_id, key_bytes, r, db_session)
timings["document_set"] = time.monotonic() - phase_start
timings["documentset"] = time.monotonic() - phase_start
timings["documentset_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"):
Expand All @@ -859,20 +866,23 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
with get_session_with_tenant(tenant_id) as db_session:
monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session)
timings["usergroup"] = time.monotonic() - phase_start
timings["usergroup_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"):
lock_beat.reacquire()
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session)
timings["pruning"] = time.monotonic() - phase_start
timings["pruning_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"):
lock_beat.reacquire()
with get_session_with_tenant(tenant_id) as db_session:
monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session)
timings["indexing"] = time.monotonic() - phase_start
timings["indexing_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

phase_start = time.monotonic()
for key_bytes in r.scan_iter(RedisConnectorPermissionSync.FENCE_PREFIX + "*"):
Expand All @@ -881,6 +891,8 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
monitor_ccpair_permissions_taskset(tenant_id, key_bytes, r, db_session)

timings["permissions"] = time.monotonic() - phase_start
timings["permissions_ttl"] = r.ttl(OnyxRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK)

except SoftTimeLimitExceeded:
task_logger.info(
"Soft time limit exceeded, task is being terminated gracefully."
Expand All @@ -889,18 +901,10 @@ def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool:
if lock_beat.owned():
lock_beat.release()
else:
t = timings
task_logger.error(
"monitor_vespa_sync - Lock not owned on completion: "
f"tenant={tenant_id} "
f"queues={t.get('queues')} "
f"connector={t.get('connector')} "
f"connector_deletion={t.get('connector_deletion')} "
f"document_set={t.get('document_set')} "
f"usergroup={t.get('usergroup')} "
f"pruning={t.get('pruning')} "
f"indexing={t.get('indexing')} "
f"permissions={t.get('permissions')}"
f"timings={timings}"
)
redis_lock_dump(lock_beat, r)

Expand Down
1 change: 1 addition & 0 deletions backend/onyx/redis/redis_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ def __getattribute__(self, item: str) -> Any:
"hexists",
"hset",
"hdel",
"ttl",
] # Regular methods that need simple prefixing

if item == "scan_iter":
Expand Down

0 comments on commit 701d701

Please sign in to comment.