From 787c9144ca80ed3ccf990152102d256c65bcb640 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Tue, 29 Oct 2024 19:13:50 +0530 Subject: [PATCH 1/2] perf(gthread): Use request timeout / 2 for epoll timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gthread calls epoll_wait (and 2 other syscalls) every second because it specifies timeout to be 1 second. ``` λ sudo strace -p `pgrep -f "gunicorn: worker" | head -n1` strace: Process 30815 attached epoll_wait(7, [], 1, 666) = 0 getppid() = 30800 utimensat(6, NULL, [{tv_sec=3157, tv_nsec=198136276} /* 1970-01-01T06:22:37.198136276+0530 */, {tv_sec=3157, tv_nsec=198136276} /* 1970-01-01T06:22:37.198136276+0530 */], 0) = 0 epoll_wait(7, [], 1, 1000) = 0 getppid() = 30800 utimensat(6, NULL, [{tv_sec=3158, tv_nsec=204192934} /* 1970-01-01T06:22:38.204192934+0530 */, {tv_sec=3158, tv_nsec=204192934} /* 1970-01-01T06:22:38.204192934+0530 */], 0) = 0 epoll_wait(7, [], 1, 1000) = 0 getppid() = 30800 utimensat(6, NULL, [{tv_sec=3159, tv_nsec=210145196} /* 1970-01-01T06:22:39.210145196+0530 */, {tv_sec=3159, tv_nsec=210145196} /* 1970-01-01T06:22:39.210145196+0530 */], 0) = 0 epoll_wait(7, [], 1, 1000) = 0 getppid() = 30800 utimensat(6, NULL, [{tv_sec=3160, tv_nsec=215517372} /* 1970-01-01T06:22:40.215517372+0530 */, {tv_sec=3160, tv_nsec=215517372} /* 1970-01-01T06:22:40.215517372+0530 */], 0) = 0 epoll_wait(7, ^Cstrace: Process 30815 detached ``` Timing out every second wakes up the process and loads it on CPU even if there is nothing to service. This can be detrimental when you have total workers >> total cores and multi-tenant setup where certain tenants might be sitting idle. (but not "idle enough" because of 1s polling timeout) This can possibly keep a completed future in queue for a small while, but I don't see any obious problem with it except few bytes of extra memory usage. I could be wrong here. fixes https://github.com/benoitc/gunicorn/issues/3317 --- gunicorn/workers/gthread.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index 7a23228cd..db4e4d8fe 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -207,7 +207,7 @@ def run(self): # can we accept more connections? if self.nr_conns < self.worker_connections: # wait for an event - events = self.poller.select(1.0) + events = self.poller.select(self.timeout) for key, _ in events: callback = key.data callback(key.fileobj) From 974c6e5a7324a3b38b553587dfd20faaa2f86ce5 Mon Sep 17 00:00:00 2001 From: Ankush Menat Date: Wed, 30 Oct 2024 14:17:32 +0530 Subject: [PATCH 2/2] fix(gthread): Consider keepalived connections while blocking When worker goes idle, two problems can occur: 1. keepalived connections might stay open for request_timeout / 2. That can be significantly longer than keep-alive timeout. 2. When next request comes up after idle period, all keepalived sockets must be cleaned up first before we can start responding to requests. This will increase TTFB. After this change: 1. keepalived sockets are cleaned up with frequency of *at least* keepalive timeout. 2. TTFB is minimally affected. 3. Worst case: worker goes idle with keepalived connection that was almost hitting timeout . That connection will be kept alive for ~2x keepalive timeout. --- THANKS | 1 + docs/source/settings.rst | 3 +++ gunicorn/workers/gthread.py | 7 ++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/THANKS b/THANKS index 9f4c6b6b9..a08cef7da 100644 --- a/THANKS +++ b/THANKS @@ -20,6 +20,7 @@ Andreas Stührk Andrew Burdo Andrew Svetlov Anil V +Ankush Menat Antoine Girard Anton Vlasenko Artur Kruchinin diff --git a/docs/source/settings.rst b/docs/source/settings.rst index e1e91fa76..511dc69fb 100644 --- a/docs/source/settings.rst +++ b/docs/source/settings.rst @@ -1793,3 +1793,6 @@ set this to a higher value. ``sync`` worker does not support persistent connections and will ignore this option. +.. note:: + When the worker becomes idle, some connections may remain open for + up to twice the specified keepalive value. diff --git a/gunicorn/workers/gthread.py b/gunicorn/workers/gthread.py index db4e4d8fe..57955c982 100644 --- a/gunicorn/workers/gthread.py +++ b/gunicorn/workers/gthread.py @@ -206,8 +206,13 @@ def run(self): # can we accept more connections? if self.nr_conns < self.worker_connections: + # Block for new events until worker times out or keepalive timeout. + select_timeout = self.timeout or 1.0 + if self._keep: + select_timeout = min(select_timeout, self.cfg.keepalive) + # wait for an event - events = self.poller.select(self.timeout) + events = self.poller.select(select_timeout) for key, _ in events: callback = key.data callback(key.fileobj)