From 3b9d914fc8ab29cb323bf603766259577ace817a Mon Sep 17 00:00:00 2001 From: Sviatoslav Sydorenko Date: Tue, 12 Mar 2024 19:16:24 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Handle=20stray=20exceptions=20in?= =?UTF-8?q?=20worker=20threads?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch aims to prevent a situation when the working threads are killed by arbitrary exceptions that bubble up to their entry point layers that aren't handled properly or at all. This was causing DoS in many situations, including TLS errors and attempts to close the underlying sockets erroring out. Fixes #358 Ref #375 Resolves #365 --- cheroot/workers/threadpool.py | 71 ++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/cheroot/workers/threadpool.py b/cheroot/workers/threadpool.py index 3437d9bda9..de01cb123f 100644 --- a/cheroot/workers/threadpool.py +++ b/cheroot/workers/threadpool.py @@ -6,6 +6,7 @@ """ import collections +import logging import threading import time import socket @@ -118,7 +119,52 @@ def run(self): keep_conn_open = False try: keep_conn_open = conn.communicate() + except ConnectionError as connection_error: + keep_conn_open = False # Drop the connection cleanly + self.server.error_log( + 'Got a connection error while handling a ' + f'connection from {conn.remote_addr !s}:' + f'{conn.remote_port !s} ({connection_error !s})', + level=logging.INFO, + ) + continue + except (KeyboardInterrupt, SystemExit) as shutdown_request: + # Shutdown request + keep_conn_open = False # Drop the connection cleanly + self.server.error_log( + 'Got a server shutdown request while handling a ' + f'connection from {conn.remote_addr !s}:' + f'{conn.remote_port !s} ({shutdown_request !s})', + level=logging.DEBUG, + ) + raise SystemExit( + str(shutdown_request), + ) from shutdown_request + except Exception as unhandled_error: + # NOTE: Only a shutdown request should bubble up to the + # NOTE: external cleanup code. Otherwise, this thread dies. + # NOTE: If this were to happen, the threadpool would still + # NOTE: list a dead thread without knowing its state. And + # NOTE: the calling code would fail to schedule processing + # NOTE: of new requests. + self.server.error_log( + 'Unhandled error while processing an incoming ' + f'connection {unhandled_error !r}', + level=logging.ERROR, + traceback=True, + ) + continue # Prevent the thread from dying finally: + # NOTE: Any exceptions coming from within `finally` may + # NOTE: kill the thread, causing the threadpool to only + # NOTE: contain references to dead threads rendering the + # NOTE: server defunct, effectively meaning a DoS. + # NOTE: Ideally, things called here should process + # NOTE: everything recoverable internally. Any unhandled + # NOTE: errors will bubble up into the outer try/except + # NOTE: block. They will be treated as fatal and turned + # NOTE: into server shutdown requests and then reraised + # NOTE: unconditionally. if keep_conn_open: self.server.put_conn(conn) else: @@ -130,8 +176,29 @@ def run(self): self.work_time += time.time() - self.start_time self.start_time = None self.conn = None - except (KeyboardInterrupt, SystemExit) as ex: - self.server.interrupt = ex + except (KeyboardInterrupt, SystemExit) as interrupt_exc: + interrupt_cause = interrupt_exc.__cause__ or interrupt_exc + self.server.error_log( + f'Setting the server interrupt flag to {interrupt_cause !r}', + level=logging.DEBUG, + ) + self.server.interrupt = interrupt_cause + except BaseException as underlying_exc: # noqa: WPS424 + # NOTE: This is the last resort logging with the last dying breath + # NOTE: of the worker. It is only reachable when exceptions happen + # NOTE: in the `finally` branch of the internal try/except block. + self.server.error_log( + 'A fatal exception happened. Setting the server interrupt flag' + f'to {underlying_exc !r} and giving up.' + '\N{NEW LINE}\N{NEW LINE}' + 'Please, report this on the Cheroot tracker at ' + ', ' + 'providing a full reproducer with as much context and details as possible.', + level=logging.CRITICAL, + traceback=True, + ) + self.server.interrupt = underlying_exc + raise class ThreadPool: