Skip to content

Commit

Permalink
update heartbeat checks in scheduler (#3896)
Browse files Browse the repository at this point in the history
* update heartbeart checks in scheduler

* scale heartbeart by number of workers
  • Loading branch information
quasiben authored Jun 18, 2020
1 parent acb0f08 commit 920af0f
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions distributed/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -5303,7 +5303,9 @@ def _reevaluate_occupancy_worker(self, ws):
async def check_worker_ttl(self):
now = time()
for ws in self.workers.values():
if ws.last_seen < now - self.worker_ttl:
if (ws.last_seen < now - self.worker_ttl) and (
10 * heartbeat_interval(len(self.workers))
):
logger.warning(
"Worker failed to heartbeat within %s seconds. Closing: %s",
self.worker_ttl,
Expand Down Expand Up @@ -5571,7 +5573,8 @@ def heartbeat_interval(n):
elif n < 200:
return 2
else:
return 5
# no more than 200 hearbeats a second scaled by workers
return n / 200 + 1


class KilledWorker(Exception):
Expand Down

0 comments on commit 920af0f

Please sign in to comment.