From 9952110d6649cdc52d273a09229321bb4a1dc393 Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 19:53:45 -0500 Subject: [PATCH 01/14] Ensure mp Pool is not reused to avoid broken pipes from shutdown --- snekbox/utils/timed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snekbox/utils/timed.py b/snekbox/utils/timed.py index 02388ff0..1221df0d 100644 --- a/snekbox/utils/timed.py +++ b/snekbox/utils/timed.py @@ -29,7 +29,7 @@ def timed( """ if kwds is None: kwds = {} - with multiprocessing.Pool(1) as pool: + with multiprocessing.Pool(1, maxtasksperchild=1) as pool: result = pool.apply_async(func, args, kwds) try: return result.get(timeout) From 3ea786fe9ae42e35f52a6d31a392f4966f62f1c2 Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 19:54:17 -0500 Subject: [PATCH 02/14] Reduce file parse timeout from 8 to 5 seconds --- snekbox/nsjail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index f014850f..76154ae3 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -56,7 +56,7 @@ def __init__( memfs_home: str = "home", memfs_output: str = "home", files_limit: int | None = 100, - files_timeout: float | None = 8, + files_timeout: float | None = 5, files_pattern: str = "**/[!_]*", ): """ From 2abd6cdedb2c2d855eb31a0c6bab8c0a7a0413e3 Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 19:55:20 -0500 Subject: [PATCH 03/14] Handle recursion error cases from glob --- snekbox/nsjail.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 76154ae3..6f6a1551 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -277,6 +277,13 @@ def python3( timeout=self.files_timeout, ) log.info(f"Found {len(attachments)} files.") + except RecursionError: + log.info("Recursion error while parsing attachments") + return EvalResult( + args, + None, + "FileParsingError: Exceeded directory depth limit while parsing attachments", + ) except TimeoutError as e: log.info(f"Exceeded time limit while parsing attachments: {e}") return EvalResult( From 46dd83036e28dd2b0aa0179afafd9e3867bff37d Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 19:56:26 -0500 Subject: [PATCH 04/14] Add `timeout` to `MemFS.files` and `MemFS.files_list` for more cooperative cancellation --- snekbox/memfs.py | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/snekbox/memfs.py b/snekbox/memfs.py index ddea9a99..57ba69c1 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -1,7 +1,9 @@ """Memory filesystem for snekbox.""" from __future__ import annotations +import glob import logging +import time import warnings import weakref from collections.abc import Generator @@ -125,6 +127,7 @@ def files( limit: int, pattern: str = "**/*", exclude_files: dict[Path, float] | None = None, + timeout: float | None = None, ) -> Generator[FileAttachment, None, None]: """ Yields FileAttachments for files found in the output directory. @@ -135,12 +138,17 @@ def files( exclude_files: A dict of Paths and last modified times. Files will be excluded if their last modified time is equal to the provided value. + timeout: The maximum time for the file parsing. If exceeded, + a TimeoutError will be raised. """ - count = 0 - for file in self.output.rglob(pattern): - # Ignore hidden directories or files - if any(part.startswith(".") for part in file.parts): - log.info(f"Skipping hidden path {file!s}") + start_time = time.monotonic() + added = 0 + files = glob.iglob(pattern, root_dir=str(self.output), recursive=True, include_hidden=False) + for file in (Path(self.output, f) for f in files): + if timeout and (time.monotonic() - start_time) > timeout: + raise TimeoutError("File parsing timeout exceeded in MemFS.files") + + if not file.is_file(): continue if exclude_files and (orig_time := exclude_files.get(file)): @@ -150,14 +158,13 @@ def files( log.info(f"Skipping {file.name!r} as it has not been modified") continue - if count > limit: + if added > limit: log.info(f"Max attachments {limit} reached, skipping remaining files") break - if file.is_file(): - count += 1 - log.info(f"Found valid file for upload {file.name!r}") - yield FileAttachment.from_path(file, relative_to=self.output) + added += 1 + log.info(f"Found valid file for upload {file.name!r}") + yield FileAttachment.from_path(file, relative_to=self.output) def files_list( self, @@ -165,6 +172,7 @@ def files_list( pattern: str, exclude_files: dict[Path, float] | None = None, preload_dict: bool = False, + timeout: float | None = None, ) -> list[FileAttachment]: """ Return a sorted list of file paths within the output directory. @@ -176,15 +184,20 @@ def files_list( Files will be excluded if their last modified time is equal to the provided value. preload_dict: Whether to preload as_dict property data. + timeout: The maximum time for the file parsing. If exceeded, + a TimeoutError will be raised. Returns: List of FileAttachments sorted lexically by path name. """ + start_time = time.monotonic() res = sorted( self.files(limit=limit, pattern=pattern, exclude_files=exclude_files), key=lambda f: f.path, ) if preload_dict: for file in res: + if timeout and (time.monotonic() - start_time) > timeout: + raise TimeoutError("File parsing timeout exceeded in MemFS.files_list") # Loads the cached property as attribute _ = file.as_dict return res From 99d6b969587be7d276c1aa980d9304474e9d1494 Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 19:56:48 -0500 Subject: [PATCH 05/14] Provide files_timeout to `MemFS.files_list` call --- snekbox/nsjail.py | 1 + 1 file changed, 1 insertion(+) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 6f6a1551..8bbcf227 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -273,6 +273,7 @@ def python3( { "preload_dict": True, "exclude_files": files_written, + "timeout": self.files_timeout, }, timeout=self.files_timeout, ) From 6601b36940a9a64834d8368bac515706e26f4ef9 Mon Sep 17 00:00:00 2001 From: Ionite Date: Sat, 11 Mar 2023 20:02:27 -0500 Subject: [PATCH 06/14] Add unit test for deeply nested path file parsing Using example for reproducing issue #172 --- tests/test_nsjail.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_nsjail.py b/tests/test_nsjail.py index 456046b3..c701d3a2 100644 --- a/tests/test_nsjail.py +++ b/tests/test_nsjail.py @@ -227,6 +227,29 @@ def test_file_parsing_timeout(self): ) self.assertEqual(result.stderr, None) + def test_file_parsing_depth_limit(self): + code = dedent( + """ + import os + + x = "" + for _ in range(1000): + x += "a/" + os.mkdir(x) + + open(f"{x}test.txt", "w").write("test") + """ + ).strip() + + nsjail = NsJail(memfs_instance_size=32 * Size.MiB, files_timeout=5) + result = nsjail.python3(["-c", code]) + self.assertEqual(result.returncode, None) + self.assertEqual( + result.stdout, + "FileParsingError: Exceeded directory depth limit while parsing attachments", + ) + self.assertEqual(result.stderr, None) + def test_file_write_error(self): """Test errors during file write.""" result = self.nsjail.python3( From 7ca391715068d5bc7627265f83953b7cb3851b71 Mon Sep 17 00:00:00 2001 From: Ionite Date: Mon, 13 Mar 2023 14:13:37 -0400 Subject: [PATCH 07/14] Add SIGALRM based time limit --- snekbox/nsjail.py | 25 ++++++++++++++----------- snekbox/utils/timed.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 8bbcf227..77cd3211 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -15,7 +15,7 @@ from snekbox.memfs import MemFS from snekbox.process import EvalResult from snekbox.snekio import FileAttachment -from snekbox.utils.timed import timed +from snekbox.utils.timed import time_limit __all__ = ("NsJail",) @@ -267,16 +267,14 @@ def python3( # Parse attachments with time limit try: - attachments = timed( - MemFS.files_list, - (fs, self.files_limit, self.files_pattern), - { - "preload_dict": True, - "exclude_files": files_written, - "timeout": self.files_timeout, - }, - timeout=self.files_timeout, - ) + with time_limit(self.files_timeout): + attachments = fs.files_list( + limit=self.files_limit, + pattern=self.files_pattern, + preload_dict=True, + exclude_files=files_written, + timeout=self.files_timeout, + ) log.info(f"Found {len(attachments)} files.") except RecursionError: log.info("Recursion error while parsing attachments") @@ -290,6 +288,11 @@ def python3( return EvalResult( args, None, "TimeoutError: Exceeded time limit while parsing attachments" ) + except Exception as e: + log.error(f"Unexpected {type(e).__name__} while parse attachments: {e}") + return EvalResult( + args, None, "FileParsingError: Unknown error while parsing attachments" + ) log_lines = nsj_log.read().decode("utf-8").splitlines() if not log_lines and returncode == 255: diff --git a/snekbox/utils/timed.py b/snekbox/utils/timed.py index 1221df0d..bac299d2 100644 --- a/snekbox/utils/timed.py +++ b/snekbox/utils/timed.py @@ -1,12 +1,14 @@ """Calling functions with time limits.""" import multiprocessing -from collections.abc import Callable, Iterable, Mapping +import signal +from collections.abc import Callable, Generator, Iterable, Mapping +from contextlib import contextmanager from typing import Any, TypeVar _T = TypeVar("_T") _V = TypeVar("_V") -__all__ = ("timed",) +__all__ = ("timed", "time_limit") def timed( @@ -35,3 +37,27 @@ def timed( return result.get(timeout) except multiprocessing.TimeoutError as e: raise TimeoutError(f"Call to {func.__name__} timed out after {timeout} seconds.") from e + + +@contextmanager +def time_limit(timeout: int | None = None) -> Generator[None, None, None]: + """ + Decorator to call a function with a time limit. Uses SIGALRM, requires a UNIX system. + + Args: + timeout: Timeout limit in seconds. + + Raises: + TimeoutError: If the function call takes longer than `timeout` seconds. + """ + + def signal_handler(signum, frame): + raise TimeoutError(f"time_limit call timed out after {timeout} seconds.") + + signal.signal(signal.SIGALRM, signal_handler) + signal.alarm(timeout) + + try: + yield + finally: + signal.alarm(0) From 0ef25529797d80ab8270123cc18b604c45bdf150 Mon Sep 17 00:00:00 2001 From: Ionite Date: Mon, 13 Mar 2023 14:18:56 -0400 Subject: [PATCH 08/14] Remove unused timed function --- snekbox/utils/timed.py | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/snekbox/utils/timed.py b/snekbox/utils/timed.py index bac299d2..756b2bcc 100644 --- a/snekbox/utils/timed.py +++ b/snekbox/utils/timed.py @@ -1,42 +1,13 @@ """Calling functions with time limits.""" -import multiprocessing import signal -from collections.abc import Callable, Generator, Iterable, Mapping +from collections.abc import Generator from contextlib import contextmanager -from typing import Any, TypeVar +from typing import TypeVar _T = TypeVar("_T") _V = TypeVar("_V") -__all__ = ("timed", "time_limit") - - -def timed( - func: Callable[[_T], _V], - args: Iterable = (), - kwds: Mapping[str, Any] | None = None, - timeout: float | None = None, -) -> _V: - """ - Call a function with a time limit. - - Args: - func: Function to call. - args: Arguments for function. - kwds: Keyword arguments for function. - timeout: Timeout limit in seconds. - - Raises: - TimeoutError: If the function call takes longer than `timeout` seconds. - """ - if kwds is None: - kwds = {} - with multiprocessing.Pool(1, maxtasksperchild=1) as pool: - result = pool.apply_async(func, args, kwds) - try: - return result.get(timeout) - except multiprocessing.TimeoutError as e: - raise TimeoutError(f"Call to {func.__name__} timed out after {timeout} seconds.") from e +__all__ = ("time_limit",) @contextmanager @@ -51,7 +22,7 @@ def time_limit(timeout: int | None = None) -> Generator[None, None, None]: TimeoutError: If the function call takes longer than `timeout` seconds. """ - def signal_handler(signum, frame): + def signal_handler(_signum, _frame): raise TimeoutError(f"time_limit call timed out after {timeout} seconds.") signal.signal(signal.SIGALRM, signal_handler) From fb01ba2d335dfd05af0a42d863eaea6ce2324ef6 Mon Sep 17 00:00:00 2001 From: Ionite Date: Wed, 15 Mar 2023 04:40:01 -0400 Subject: [PATCH 09/14] Update snekbox/utils/timed.py Co-authored-by: Mark <1515135+MarkKoz@users.noreply.github.com> --- snekbox/utils/timed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snekbox/utils/timed.py b/snekbox/utils/timed.py index 756b2bcc..11d126c1 100644 --- a/snekbox/utils/timed.py +++ b/snekbox/utils/timed.py @@ -13,7 +13,7 @@ @contextmanager def time_limit(timeout: int | None = None) -> Generator[None, None, None]: """ - Decorator to call a function with a time limit. Uses SIGALRM, requires a UNIX system. + Decorator to call a function with a time limit. Args: timeout: Timeout limit in seconds. From 6b0dba59e1d7a168e97531e5bd16ca04e5ed615a Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 16 Mar 2023 00:00:02 -0400 Subject: [PATCH 10/14] Use log.exception for catch all --- snekbox/nsjail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index 77cd3211..b6616a48 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -289,7 +289,7 @@ def python3( args, None, "TimeoutError: Exceeded time limit while parsing attachments" ) except Exception as e: - log.error(f"Unexpected {type(e).__name__} while parse attachments: {e}") + log.exception(f"Unexpected {type(e).__name__} while parse attachments", exc_info=e) return EvalResult( args, None, "FileParsingError: Unknown error while parsing attachments" ) From e9242175ced6584084b6a4a1c77d0319b49e2cc0 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 16 Mar 2023 00:05:05 -0400 Subject: [PATCH 11/14] Rename `count` variable --- snekbox/memfs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/snekbox/memfs.py b/snekbox/memfs.py index 57ba69c1..c3401464 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -142,7 +142,7 @@ def files( a TimeoutError will be raised. """ start_time = time.monotonic() - added = 0 + count = 0 files = glob.iglob(pattern, root_dir=str(self.output), recursive=True, include_hidden=False) for file in (Path(self.output, f) for f in files): if timeout and (time.monotonic() - start_time) > timeout: @@ -158,11 +158,11 @@ def files( log.info(f"Skipping {file.name!r} as it has not been modified") continue - if added > limit: + if count > limit: log.info(f"Max attachments {limit} reached, skipping remaining files") break - added += 1 + count += 1 log.info(f"Found valid file for upload {file.name!r}") yield FileAttachment.from_path(file, relative_to=self.output) From 64417197b58e3945f0b857c7fa648dcbe28254c4 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 16 Mar 2023 00:33:05 -0400 Subject: [PATCH 12/14] Include TimeoutError in raises docstring field --- snekbox/memfs.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/snekbox/memfs.py b/snekbox/memfs.py index c3401464..991766bd 100644 --- a/snekbox/memfs.py +++ b/snekbox/memfs.py @@ -138,8 +138,9 @@ def files( exclude_files: A dict of Paths and last modified times. Files will be excluded if their last modified time is equal to the provided value. - timeout: The maximum time for the file parsing. If exceeded, - a TimeoutError will be raised. + timeout: Maximum time in seconds for file parsing. + Raises: + TimeoutError: If file parsing exceeds timeout. """ start_time = time.monotonic() count = 0 @@ -184,10 +185,11 @@ def files_list( Files will be excluded if their last modified time is equal to the provided value. preload_dict: Whether to preload as_dict property data. - timeout: The maximum time for the file parsing. If exceeded, - a TimeoutError will be raised. + timeout: Maximum time in seconds for file parsing. Returns: List of FileAttachments sorted lexically by path name. + Raises: + TimeoutError: If file parsing exceeds timeout. """ start_time = time.monotonic() res = sorted( From 1af5bf0a92a5a65346460590054c1834ff7b7ccc Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 16 Mar 2023 01:10:57 -0400 Subject: [PATCH 13/14] Add unit tests for time_limit --- tests/test_timed.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/test_timed.py diff --git a/tests/test_timed.py b/tests/test_timed.py new file mode 100644 index 00000000..e46bd374 --- /dev/null +++ b/tests/test_timed.py @@ -0,0 +1,30 @@ +import math +import time +from unittest import TestCase + +from snekbox.utils.timed import time_limit + + +class TimedTests(TestCase): + def test_sleep(self): + """Test that a sleep can be interrupted.""" + _finished = False + start = time.perf_counter() + with self.assertRaises(TimeoutError): + with time_limit(1): + time.sleep(2) + _finished = True + end = time.perf_counter() + self.assertLess(end - start, 2) + self.assertFalse(_finished) + + def test_iter(self): + """Test that a long-running built-in function can be interrupted.""" + _result = 0 + start = time.perf_counter() + with self.assertRaises(TimeoutError): + with time_limit(1): + _result = math.factorial(2**30) + end = time.perf_counter() + self.assertEqual(_result, 0) + self.assertLess(end - start, 2) From de1327dd6a497d488ef500c23ae4684792b24c19 Mon Sep 17 00:00:00 2001 From: Ionite Date: Thu, 16 Mar 2023 01:17:59 -0400 Subject: [PATCH 14/14] Update type hint for `files_timeout` to be `int` to be compatible with `timed.time_limit` --- snekbox/nsjail.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snekbox/nsjail.py b/snekbox/nsjail.py index b6616a48..f64830ac 100644 --- a/snekbox/nsjail.py +++ b/snekbox/nsjail.py @@ -56,7 +56,7 @@ def __init__( memfs_home: str = "home", memfs_output: str = "home", files_limit: int | None = 100, - files_timeout: float | None = 5, + files_timeout: int | None = 5, files_pattern: str = "**/[!_]*", ): """