feat: Configure codejail and run safety check at startup

- Initialize codejail at startup, if `CODE_JAIL` is set - Run safety checks at startup, locking out the API if the checks fail If codejail isn't properly configured, it defaults to running code unsafely. To prevent this from affecting the service, we run a smoke test at startup to check if there's anything just *drastically* wrong. If this check does not pass, two things happen: - The healthcheck endpoint will never return a 200 OK - The code-exec endpoint will refuse with a 500 error Supporting changes: - Define an explicit AppConfig for the api subpackage so that we can hook into the `ready()` mechanism - Wrap `safe_exec` to prevent codejail eagerly setting `UNSAFE=True` at module load time. (Not clear why this doesn't affect edx-platform; maybe something to do with app vs. middleware load order.) Filed openedx/codejail#225 for possibly fixing this. - `safe_exec` wrapper also performs a deepcopy to allow callers to reason about the globals dict more easily. Other changes: - Clean up healthcheck docstring (mostly just trim it down) - Lint cleanup Part of edx/edx-arch-experiments#927
timmc-edx · Feb 6, 2025 · eca968a · eca968a
1 parent 9037139
commit eca968a
Show file tree

Hide file tree

Showing 8 changed files with 230 additions and 22 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -14,6 +14,13 @@ Change Log
 Unreleased
 **********
 
+0.3.1 - 2025-02-06
+******************
+Changed
+=======
+* Codejail is now properly configured if ``CODE_JAIL`` Django settings are supplied
+* Service will refuse to execute code if basic smoke tests fail at startup. Also, healthcheck endpoint will remain unhealthy.
+
 0.3.0 - 2025-01-30
 ******************
 

diff --git a/codejail_service/__init__.py b/codejail_service/__init__.py
@@ -1,4 +1,4 @@
 """
 codejail-service module.
 """
-__version__ = '0.3.0'
+__version__ = '0.3.1'
diff --git a/codejail_service/apps/api/apps.py b/codejail_service/apps/api/apps.py
@@ -0,0 +1,36 @@
+"""
+Config for main API.
+"""
+
+import logging
+
+from codejail.django_integration_utils import apply_django_settings
+from django.apps import AppConfig
+from django.conf import settings
+
+from codejail_service.startup_check import run_startup_safety_check
+
+log = logging.getLogger(__name__)
+
+
+class CodejailApiConfig(AppConfig):
+    """
+    AppConfig for API views.
+
+    The only reason we need this to be an app is so that we can hook into the
+    ready() callback at startup. Any other mechanism would be fine too.
+    """
+    name = "codejail_service.apps.api"
+
+    def ready(self):
+        lib_settings = getattr(settings, 'CODE_JAIL', None)
+        if lib_settings is None:
+            # Should only happen when tests are being run
+            log.warning("Missing CODE_JAIL settings")
+        else:
+            # Codejail needs this at startup
+            apply_django_settings(settings.CODE_JAIL)
+
+        # Perform self-check and initialize status for healthcheck and
+        # code-exec views to consult.
+        run_startup_safety_check()
diff --git a/codejail_service/apps/api/v0/tests/test_views.py b/codejail_service/apps/api/v0/tests/test_views.py
@@ -5,11 +5,14 @@
 import json
 import textwrap
 from os import path
+from unittest.mock import patch
 
 import ddt
 from django.test import TestCase, override_settings
 from rest_framework.test import APIClient
 
+from codejail_service import startup_check
+
 
 @override_settings(
     ROOT_URLCONF='codejail_service.urls',
@@ -21,6 +24,9 @@ class TestExecService(TestCase):
 
     def setUp(self):
         super().setUp()
+        # We can't configure apparmor for regular unit tests, so just
+        # pretend startup was OK and the sandbox is working.
+        startup_check.STARTUP_SAFETY_CHECK_OK = True
         self.standard_params = {'code': 'retval = 3 + 4', 'globals_dict': {}}
 
     def _test_codejail_api(self, *, params=None, files=None, exp_status, exp_body):
@@ -51,6 +57,13 @@ def test_feature_disabled(self):
             exp_status=500, exp_body={'error': "Codejail service not enabled"},
         )
 
+    @patch('codejail_service.apps.api.v0.views.is_exec_safe', return_value=None)
+    def test_unhealthy(self, _mock_is_safe_exec):
+        """Code-exec prevented by failing startup checks."""
+        self._test_codejail_api(
+            exp_status=500, exp_body={'error': "Codejail service is not correctly configured"},
+        )
+
     def test_success(self):
         """Regular successful call."""
         self._test_codejail_api(

diff --git a/codejail_service/apps/api/v0/views.py b/codejail_service/apps/api/v0/views.py
@@ -4,16 +4,17 @@
 
 import json
 import logging
-from copy import deepcopy
 
-from codejail.safe_exec import SafeExecException, safe_exec
 from edx_toggles.toggles import SettingToggle
 from jsonschema.exceptions import best_match as json_error_best_match
 from jsonschema.validators import Draft202012Validator
 from rest_framework.decorators import api_view, parser_classes
 from rest_framework.parsers import FormParser, MultiPartParser
 from rest_framework.response import Response
 
+from codejail_service.codejail import safe_exec
+from codejail_service.startup_check import is_exec_safe
+
 log = logging.getLogger(__name__)
 
 # Schema for the JSON passed in the v0 API's 'payload' field.
@@ -93,6 +94,9 @@ def code_exec(request):
     if not CODEJAIL_ENABLED.is_enabled():
         return Response({'error': "Codejail service not enabled"}, status=500)
 
+    if not is_exec_safe():
+        return Response({'error': "Codejail service is not correctly configured"}, status=500)
+
     params_json = request.data['payload']
     params = json.loads(params_json)
 
@@ -116,17 +120,17 @@ def code_exec(request):
     if unsafely:
         return Response({'error': "Refusing codejail execution with unsafely=true"}, status=400)
 
-    output_globals_dict = deepcopy(input_globals_dict)  # Output dict will be mutated by safe_exec
     try:
-        safe_exec(
+        # This wrapped version of safe_exec doesn't mutate the globals dict
+        output_globals_dict = safe_exec(
             complete_code,
-            output_globals_dict,
+            input_globals_dict,
             python_path=python_path,
             extra_files=extra_files,
             limit_overrides_context=limit_overrides_context,
             slug=slug,
         )
-    except SafeExecException as e:
+    except BaseException as e:
         log.debug("Codejail execution failed for {slug=} with: {e}")
         return Response({'emsg': str(e)})
 

diff --git a/codejail_service/apps/core/views.py b/codejail_service/apps/core/views.py
@@ -4,28 +4,24 @@
 from django.http import JsonResponse
 from edx_django_utils.monitoring import ignore_transaction
 
-logger = logging.getLogger(__name__)
+from codejail_service.startup_check import is_exec_safe
 
+logger = logging.getLogger(__name__)
 
-def health(_):
-    """Allows a load balancer to verify this service is up.
 
-    Checks the status of the database connection on which this service relies.
+def health(_request):
+    """
+    Allows a load balancer to verify this service is up and healthy.
 
     Returns:
-        HttpResponse: 200 if the service is available, with JSON data indicating the health of each required service
-        HttpResponse: 503 if the service is unavailable, with JSON data indicating the health of each required service
-
-    Example:
-        >>> response = requests.get('https://codejail-service.edx.org/health')
-        >>> response.status_code
-        200
-        >>> response.content
-        '{"overall_status": "OK", "detailed_status": {"database_status": "OK", "lms_status": "OK"}}'
+        HttpResponse: 200 if the service is available
+        HttpResponse: 503 if the service is unavailable
     """
 
     # Ignores health check in performance monitoring so as to not artifically inflate our response time metrics
     ignore_transaction()
 
-    # Always "healthy", for now -- no state to look at. Revisit later.
-    return JsonResponse({'status': 'OK'})
+    if is_exec_safe():
+        return JsonResponse({'status': 'OK'}, status=200)
+    else:
+        return JsonResponse({'status': 'UNAVAILABLE'}, status=503)
diff --git a/codejail_service/codejail.py b/codejail_service/codejail.py
@@ -0,0 +1,28 @@
+"""
+Wrappers and utilities for codejail library.
+"""
+
+from copy import deepcopy
+
+
+def safe_exec(code, input_globals, **kwargs):
+    """
+    Call safe_exec and work around several of its problems.
+
+    Returns new globals dictionary that results from execution.
+    (Does not mutate input.)
+    """
+    # This needs to be a lazy import because as soon as codejail's
+    # safe_exec module loads, it immediately makes a decision about
+    # whether to run in always-unsafe mode.
+    #
+    # See https://github.com/openedx/codejail/issues/225 for maybe
+    # fixing this.
+
+    # pylint: disable=import-outside-toplevel
+    from codejail.safe_exec import safe_exec as real_safe_exec
+
+    # Prevent mutation of input
+    output_globals = deepcopy(input_globals)
+    real_safe_exec(code, output_globals, **kwargs)
+    return output_globals
diff --git a/codejail_service/startup_check.py b/codejail_service/startup_check.py
@@ -0,0 +1,124 @@
+"""
+State and accessors for a safety check that is run at startup.
+"""
+
+import logging
+
+from codejail_service.codejail import safe_exec
+
+log = logging.getLogger(__name__)
+
+# Results of the safety check that was performed at startup.
+#
+# Expected values:
+#
+# - None: The check has not yet been performed
+# - True: No issues were detected
+# - False: A fundamental safety or function issue was detected
+#
+# Any other value indicates that the safety check failed in an
+# unexpected way, perhaps raising an exception.
+#
+# The *only value* that indicates that it is safe to receive code-exec
+# calls is `True`.
+STARTUP_SAFETY_CHECK_OK = None
+
+
+def is_exec_safe():
+    """
+    Return True if and only if it is safe to accept code-exec calls.
+    """
+    return STARTUP_SAFETY_CHECK_OK is True
+
+
+def run_startup_safety_check():
+    """
+    Perform a sandboxing safety check.
+
+    Determines if the service is running with an acceptable configuration.
+    This is *not* a full test suite, just a basic check that codejail
+    is actually configured in sandboxing mode.
+
+    This just initializes state. Afterwards, is_exec_safe can be called.
+    """
+    global STARTUP_SAFETY_CHECK_OK
+
+    # App initialization can happen multiple times; just run checks once.
+    if STARTUP_SAFETY_CHECK_OK is not None:
+        return
+
+    tests = [
+        {
+            "name": "Basic code execution",
+            "fn": _test_basic_function,
+        },
+        {
+            "name": "Sandbox escape by disk access",
+            "fn": _test_escape_disk,
+        },
+        {
+            "name": "Sandbox escape by child process",
+            "fn": _test_escape_subprocess,
+        },
+    ]
+
+    any_failed = False
+    for test in tests:
+        try:
+            result = test['fn']()
+        except BaseException as e:
+            result = f"Uncaught exception from test: {e!r}"
+
+        if result is True:
+            log.info(f"Startup test {test['name']!r} passed")
+        else:
+            any_failed = True
+            log.error(f"Startup test {test['name']!r} failed with: {result!r}")
+
+    STARTUP_SAFETY_CHECK_OK = not any_failed
+
+
+def _test_basic_function():
+    """
+    Test for basic code execution (math).
+    """
+    globals_out = safe_exec("x = x + 1", {'x': 16})
+
+    if 'x' not in globals_out:
+        return "x not in returned globals"
+    if globals_out['x'] != 17:
+        return f"returned global x != 17 (was {globals_out['x']})"
+
+    return True
+
+
+def _test_escape_disk():
+    """
+    Test for sandbox escape by reading from files outside of sandbox.
+    """
+    try:
+        globals_out = safe_exec("import os; ret = os.listdir('/')", {})
+        return f"Expected error, but code ran successfully. Globals: {globals_out!r}"
+    except BaseException as e:
+        if "Permission denied" in repr(e):
+            return True
+        else:
+            return f"Expected permission error, but got: {e!r}"
+
+
+def _test_escape_subprocess():
+    """
+    Test for sandbox escape by creating a child process.
+    """
+    try:
+        globals_out = safe_exec(
+            "import subprocess;"
+            "ret = subprocess.check_output('echo $((6 * 7))', shell=True)",
+            {},
+        )
+        return f"Expected error, but code ran successfully. Globals: {globals_out!r}"
+    except BaseException as e:
+        if "Permission denied" in repr(e):
+            return True
+        else:
+            return f"Expected permission error, but got: {e!r}"