From 42fe182ee0eec3f7383c981e6d5880b09e5524a3 Mon Sep 17 00:00:00 2001
From: Tim McCormack <tmccormack@edx.org>
Date: Mon, 3 Feb 2025 21:31:55 +0000
Subject: [PATCH] feat: Configure codejail and run safety check at startup

- Initialize codejail at startup, if `CODE_JAIL` is set
- Run safety checks at startup, locking out the API if the checks fail

If codejail isn't properly configured, it defaults to running code
unsafely. To prevent this from affecting the service, we run a smoke test
at startup to check if there's anything just *drastically* wrong.

If this check does not pass, two things happen:

- The healthcheck endpoint will never return a 200 OK
- The code-exec endpoint will refuse with a 500 error

Supporting changes:

- Define an explicit AppConfig for the api subpackage so that we can hook
  into the `ready()` mechanism
- Wrap `safe_exec` to prevent codejail eagerly setting `UNSAFE=True`
  at module load time. (Not clear why this doesn't affect
  edx-platform; maybe something to do with app vs. middleware load
  order.) Filed https://github.com/openedx/codejail/issues/225 for
  possibly fixing this.
- `safe_exec` wrapper also provides deepcopy to allow callers to
  reason about the globals dict more easily.

Other changes:

- Clean up healthcheck docstring (mostly just trim it down)
- Lint cleanup

Part of https://github.com/edx/edx-arch-experiments/issues/927
---
 CHANGELOG.rst                                 |   7 +
 codejail_service/__init__.py                  |   2 +-
 codejail_service/apps/api/apps.py             |  36 +++++
 .../apps/api/v0/tests/test_views.py           |  13 ++
 codejail_service/apps/api/v0/views.py         |  16 ++-
 codejail_service/apps/core/views.py           |  26 ++--
 codejail_service/codejail.py                  |  28 ++++
 codejail_service/startup_check.py             | 124 ++++++++++++++++++
 8 files changed, 230 insertions(+), 22 deletions(-)
 create mode 100644 codejail_service/apps/api/apps.py
 create mode 100644 codejail_service/codejail.py
 create mode 100644 codejail_service/startup_check.py

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 26ecb19..f6420c4 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -14,6 +14,13 @@ Change Log
 Unreleased
 **********
 
+0.3.1 - 2025-02-06
+******************
+Changed
+=======
+* Codejail is now properly configured if ``CODE_JAIL`` Django settings are supplied
+* Service will refuse to execute code if basic smoke tests fail at startup. Also, healthcheck endpoint will remain unhealthy.
+
 0.3.0 - 2025-01-30
 ******************
 
diff --git a/codejail_service/__init__.py b/codejail_service/__init__.py
index 77e1738..7e0c591 100644
--- a/codejail_service/__init__.py
+++ b/codejail_service/__init__.py
@@ -1,4 +1,4 @@
 """
 codejail-service module.
 """
-__version__ = '0.3.0'
+__version__ = '0.3.1'
diff --git a/codejail_service/apps/api/apps.py b/codejail_service/apps/api/apps.py
new file mode 100644
index 0000000..ce36740
--- /dev/null
+++ b/codejail_service/apps/api/apps.py
@@ -0,0 +1,36 @@
+"""
+Config for main API.
+"""
+
+import logging
+
+from codejail.django_integration_utils import apply_django_settings
+from django.apps import AppConfig
+from django.conf import settings
+
+from codejail_service.startup_check import run_startup_safety_check
+
+log = logging.getLogger(__name__)
+
+
+class CodejailApiConfig(AppConfig):
+    """
+    AppConfig for API views.
+
+    The only reason we need this to be an app is so that we can hook into the
+    ready() callback at startup. Any other mechanism would be fine too.
+    """
+    name = "codejail_service.apps.api"
+
+    def ready(self):
+        lib_settings = getattr(settings, 'CODE_JAIL', None)
+        if lib_settings is None:
+            # Should only happen when tests are being run
+            log.warning("Missing CODE_JAIL settings")
+        else:
+            # Codejail needs this at startup
+            apply_django_settings(settings.CODE_JAIL)
+
+        # Perform self-check and initialize status for healthcheck and
+        # code-exec views to consult.
+        run_startup_safety_check()
diff --git a/codejail_service/apps/api/v0/tests/test_views.py b/codejail_service/apps/api/v0/tests/test_views.py
index 3be4dcd..7d2b1ac 100644
--- a/codejail_service/apps/api/v0/tests/test_views.py
+++ b/codejail_service/apps/api/v0/tests/test_views.py
@@ -5,11 +5,14 @@
 import json
 import textwrap
 from os import path
+from unittest.mock import patch
 
 import ddt
 from django.test import TestCase, override_settings
 from rest_framework.test import APIClient
 
+from codejail_service import startup_check
+
 
 @override_settings(
     ROOT_URLCONF='codejail_service.urls',
@@ -21,6 +24,9 @@ class TestExecService(TestCase):
 
     def setUp(self):
         super().setUp()
+        # We can't configure apparmor for regular unit tests, so just
+        # pretend startup was OK and the sandbox is working.
+        startup_check.STARTUP_SAFETY_CHECK_OK = True
         self.standard_params = {'code': 'retval = 3 + 4', 'globals_dict': {}}
 
     def _test_codejail_api(self, *, params=None, files=None, exp_status, exp_body):
@@ -51,6 +57,13 @@ def test_feature_disabled(self):
             exp_status=500, exp_body={'error': "Codejail service not enabled"},
         )
 
+    @patch('codejail_service.apps.api.v0.views.is_exec_safe', return_value=None)
+    def test_unhealthy(self, _mock_is_safe_exec):
+        """Code-exec prevented by failing startup checks."""
+        self._test_codejail_api(
+            exp_status=500, exp_body={'error': "Codejail service is not correctly configured"},
+        )
+
     def test_success(self):
         """Regular successful call."""
         self._test_codejail_api(
diff --git a/codejail_service/apps/api/v0/views.py b/codejail_service/apps/api/v0/views.py
index 1350b77..94874ee 100644
--- a/codejail_service/apps/api/v0/views.py
+++ b/codejail_service/apps/api/v0/views.py
@@ -4,9 +4,7 @@
 
 import json
 import logging
-from copy import deepcopy
 
-from codejail.safe_exec import SafeExecException, safe_exec
 from edx_toggles.toggles import SettingToggle
 from jsonschema.exceptions import best_match as json_error_best_match
 from jsonschema.validators import Draft202012Validator
@@ -14,6 +12,9 @@
 from rest_framework.parsers import FormParser, MultiPartParser
 from rest_framework.response import Response
 
+from codejail_service.codejail import safe_exec
+from codejail_service.startup_check import is_exec_safe
+
 log = logging.getLogger(__name__)
 
 # Schema for the JSON passed in the v0 API's 'payload' field.
@@ -93,6 +94,9 @@ def code_exec(request):
     if not CODEJAIL_ENABLED.is_enabled():
         return Response({'error': "Codejail service not enabled"}, status=500)
 
+    if not is_exec_safe():
+        return Response({'error': "Codejail service is not correctly configured"}, status=500)
+
     params_json = request.data['payload']
     params = json.loads(params_json)
 
@@ -116,17 +120,17 @@ def code_exec(request):
     if unsafely:
         return Response({'error': "Refusing codejail execution with unsafely=true"}, status=400)
 
-    output_globals_dict = deepcopy(input_globals_dict)  # Output dict will be mutated by safe_exec
     try:
-        safe_exec(
+        # This wrapped version of safe_exec doesn't mutate the globals dict
+        output_globals_dict = safe_exec(
             complete_code,
-            output_globals_dict,
+            input_globals_dict,
             python_path=python_path,
             extra_files=extra_files,
             limit_overrides_context=limit_overrides_context,
             slug=slug,
         )
-    except SafeExecException as e:
+    except BaseException as e:
         log.debug("Codejail execution failed for {slug=} with: {e}")
         return Response({'emsg': str(e)})
 
diff --git a/codejail_service/apps/core/views.py b/codejail_service/apps/core/views.py
index 25f418b..63b7f21 100644
--- a/codejail_service/apps/core/views.py
+++ b/codejail_service/apps/core/views.py
@@ -4,28 +4,24 @@
 from django.http import JsonResponse
 from edx_django_utils.monitoring import ignore_transaction
 
-logger = logging.getLogger(__name__)
+from codejail_service.startup_check import is_exec_safe
 
+logger = logging.getLogger(__name__)
 
-def health(_):
-    """Allows a load balancer to verify this service is up.
 
-    Checks the status of the database connection on which this service relies.
+def health(_request):
+    """
+    Allows a load balancer to verify this service is up and healthy.
 
     Returns:
-        HttpResponse: 200 if the service is available, with JSON data indicating the health of each required service
-        HttpResponse: 503 if the service is unavailable, with JSON data indicating the health of each required service
-
-    Example:
-        >>> response = requests.get('https://codejail-service.edx.org/health')
-        >>> response.status_code
-        200
-        >>> response.content
-        '{"overall_status": "OK", "detailed_status": {"database_status": "OK", "lms_status": "OK"}}'
+        HttpResponse: 200 if the service is available
+        HttpResponse: 503 if the service is unavailable
     """
 
     # Ignores health check in performance monitoring so as to not artifically inflate our response time metrics
     ignore_transaction()
 
-    # Always "healthy", for now -- no state to look at. Revisit later.
-    return JsonResponse({'status': 'OK'})
+    if is_exec_safe():
+        return JsonResponse({'status': 'OK'}, status=200)
+    else:
+        return JsonResponse({'status': 'UNAVAILABLE'}, status=503)
diff --git a/codejail_service/codejail.py b/codejail_service/codejail.py
new file mode 100644
index 0000000..fdf6ca2
--- /dev/null
+++ b/codejail_service/codejail.py
@@ -0,0 +1,28 @@
+"""
+Wrappers and utilities for codejail library.
+"""
+
+from copy import deepcopy
+
+
+def safe_exec(code, input_globals, **kwargs):
+    """
+    Call safe_exec and work around several of its problems.
+
+    Returns new globals dictionary that results from execution.
+    (Does not mutate input.)
+    """
+    # This needs to be a lazy import because as soon as codejail's
+    # safe_exec module loads, it immediately makes a decision about
+    # whether to run in always-unsafe mode.
+    #
+    # See https://github.com/openedx/codejail/issues/225 for maybe
+    # fixing this.
+
+    # pylint: disable=import-outside-toplevel
+    from codejail.safe_exec import safe_exec as real_safe_exec
+
+    # Prevent mutation of input
+    output_globals = deepcopy(input_globals)
+    real_safe_exec(code, output_globals, **kwargs)
+    return output_globals
diff --git a/codejail_service/startup_check.py b/codejail_service/startup_check.py
new file mode 100644
index 0000000..b98b701
--- /dev/null
+++ b/codejail_service/startup_check.py
@@ -0,0 +1,124 @@
+"""
+State and accessors for a safety check that is run at startup.
+"""
+
+import logging
+
+from codejail_service.codejail import safe_exec
+
+log = logging.getLogger(__name__)
+
+# Results of the safety check that was performed at startup.
+#
+# Expected values:
+#
+# - None: The check has not yet been performed
+# - True: No issues were detected
+# - False: A fundamental safety or function issue was detected
+#
+# Any other value indicates that the safety check failed in an
+# unexpected way, perhaps raising an exception.
+#
+# The *only value* that indicates that it is safe to receive code-exec
+# calls is `True`.
+STARTUP_SAFETY_CHECK_OK = None
+
+
+def is_exec_safe():
+    """
+    Return True if and only if it is safe to accept code-exec calls.
+    """
+    return STARTUP_SAFETY_CHECK_OK is True
+
+
+def run_startup_safety_check():
+    """
+    Perform a sandboxing safety check.
+
+    Determines if the service is running with an acceptable configuration.
+    This is *not* a full test suite, just a basic check that codejail
+    is actually configured in sandboxing mode.
+
+    This just initializes state. Afterwards, is_exec_safe can be called.
+    """
+    global STARTUP_SAFETY_CHECK_OK
+
+    # App initialization can happen multiple times; just run checks once.
+    if STARTUP_SAFETY_CHECK_OK is not None:
+        return
+
+    tests = [
+        {
+            "name": "Basic code execution",
+            "fn": _test_basic_function,
+        },
+        {
+            "name": "Sandbox escape by disk access",
+            "fn": _test_escape_disk,
+        },
+        {
+            "name": "Sandbox escape by child process",
+            "fn": _test_escape_subprocess,
+        },
+    ]
+
+    any_failed = False
+    for test in tests:
+        try:
+            result = test['fn']()
+        except BaseException as e:
+            result = f"Uncaught exception from test: {e!r}"
+
+        if result is True:
+            log.info(f"Startup test {test['name']!r} passed")
+        else:
+            any_failed = True
+            log.error(f"Startup test {test['name']!r} failed with: {result!r}")
+
+    STARTUP_SAFETY_CHECK_OK = not any_failed
+
+
+def _test_basic_function():
+    """
+    Test for basic code execution (math).
+    """
+    globals_out = safe_exec("x = x + 1", {'x': 16})
+
+    if 'x' not in globals_out:
+        return "x not in returned globals"
+    if globals_out['x'] != 17:
+        return f"returned global x != 17 (was {globals_out['x']})"
+
+    return True
+
+
+def _test_escape_disk():
+    """
+    Test for sandbox escape by reading from files outside of sandbox.
+    """
+    try:
+        globals_out = safe_exec("import os; ret = os.listdir('/')", {})
+        return f"Expected error, but code ran successfully. Globals: {globals_out!r}"
+    except BaseException as e:
+        if "Permission denied" in repr(e):
+            return True
+        else:
+            return f"Expected permission error, but got: {e!r}"
+
+
+def _test_escape_subprocess():
+    """
+    Test for sandbox escape by creating a child process.
+    """
+    try:
+        globals_out = safe_exec(
+            "import subprocess;"
+            "ret = subprocess.check_output('echo $((6 * 7))', shell=True)",
+            {},
+        )
+        return f"Expected error, but code ran successfully. Globals: {globals_out!r}"
+    except BaseException as e:
+        if "Permission denied" in repr(e):
+            return True
+        else:
+            return f"Expected permission error, but got: {e!r}"