feat!: require OPENEDX_LEARNING setting for Content storage

WARNING: This commit will break any file-backed Content entries you currently have. Though you will likely only have them if you've used the very recently created add_assets_to_component command. This commit switches us away from using the default_storage and requires the project to set an OPENEDX_LEARNING settings dictionary with a MEDIA key like: OPENEDX_LEARNING = { 'MEDIA': { "BACKEND": "django.core.files.storage.FileSystemStorage", "OPTIONS": { "location": "/openedx/media-private/openedx-learning", } } } If no such setting is present, Content operations that require file access will raise a django.core.exceptions.ImproperlyConfigured error. In addition to that, Content files will be stored in a "content" subdirectory of the specified media location. This is to allow us more flexibility as we start to use file storage for other things like the import/export process. We need to have a separate space for Learning Core media files because these files should NOT be directly accessible via the browser (as the MEDIA_ROOT is). This is because of access policies and the fact that the filenames will not be meaningful by themselves and must be translated by app logic. For details, please see: * docs/decisions/0015-serving-static-assets.rst * openedx_learning/apps/authoring/contents/models.py * openedx_learning/apps/authoring/components/models.py Hiding these files in a new location also required changes to the Django admin, which are included here. This commit also adds a little extra to the admin to make it easier to map Component assets to actual files on disk. Finally, this commit adds the following helpers to the Content model: * storage_path(): get the full OS path to the stored file, if available. * read_file(): return opened Django File object for the Content.
openedx · Sep 27, 2024 · e055898 · e055898
1 parent e0baf65
commit e055898
Show file tree

Hide file tree

Showing 4 changed files with 93 additions and 46 deletions.
diff --git a/openedx_learning/apps/authoring/components/admin.py b/openedx_learning/apps/authoring/components/admin.py
@@ -1,6 +1,8 @@
 """
 Django admin for components models
 """
+import base64
+
 from django.contrib import admin
 from django.template.defaultfilters import filesizeformat
 from django.urls import reverse
@@ -67,35 +69,29 @@ def get_queryset(self, request):
         )
 
     fields = [
-        "format_key",
+        "key",
         "format_size",
         "learner_downloadable",
         "rendered_data",
     ]
     readonly_fields = [
         "content",
-        "format_key",
+        "key",
         "format_size",
         "rendered_data",
     ]
     extra = 0
 
+    def has_file(self, cvc_obj):
+        return cvc_obj.content.has_file
+
     def rendered_data(self, cvc_obj):
         return content_preview(cvc_obj)
 
     @admin.display(description="Size")
     def format_size(self, cvc_obj):
         return filesizeformat(cvc_obj.content.size)
 
-    @admin.display(description="Key")
-    def format_key(self, cvc_obj):
-        return format_html(
-            '<a href="{}">{}</a>',
-            link_for_cvc(cvc_obj),
-            # reverse("admin:components_content_change", args=(cvc_obj.content_id,)),
-            cvc_obj.key,
-        )
-
 
 @admin.register(ComponentVersion)
 class ComponentVersionAdmin(ReadOnlyModelAdmin):
@@ -129,18 +125,6 @@ def get_queryset(self, request):
         )
 
 
-def link_for_cvc(cvc_obj: ComponentVersionContent) -> str:
-    """
-    Get the download URL for the given ComponentVersionContent instance
-    """
-    return "/media_server/component_asset/{}/{}/{}/{}".format(
-        cvc_obj.content.learning_package.key,
-        cvc_obj.component_version.component.key,
-        cvc_obj.component_version.version_num,
-        cvc_obj.key,
-    )
-
-
 def format_text_for_admin_display(text: str) -> SafeText:
     """
     Get the HTML to display the given plain text (preserving formatting)
@@ -158,9 +142,17 @@ def content_preview(cvc_obj: ComponentVersionContent) -> SafeText:
     content_obj = cvc_obj.content
 
     if content_obj.media_type.type == "image":
+        # This base64 encoding looks really goofy and is bad for performance,
+        # but image previews in the admin are extremely useful, and this lets us
+        # have them without creating a separate view in Learning Core. (Keep in
+        # mind that these assets are private, so they cannot be accessed via the
+        # MEDIA_URL like most Django uploaded assets.)
+        data = content_obj.read_file().read()
         return format_html(
-            '<img src="{}" style="max-width: 100%;" />',
-            content_obj.file_url(),
+            '<img src="data:{};base64, {}" style="max-width: 100%;" /><br><pre>{}</pre>',
+            content_obj.mime_type,
+            base64.encodebytes(data).decode('utf8'),
+            content_obj.storage_path(),
         )
 
     return format_text_for_admin_display(

diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py
@@ -1,6 +1,8 @@
 """
 Django admin for contents models
 """
+import base64
+
 from django.contrib import admin
 from django.utils.html import format_html
 
@@ -16,7 +18,6 @@ class ContentAdmin(ReadOnlyModelAdmin):
     """
     list_display = [
         "hash_digest",
-        "file_link",
         "learning_package",
         "media_type",
         "size",
@@ -29,24 +30,39 @@ class ContentAdmin(ReadOnlyModelAdmin):
         "media_type",
         "size",
         "created",
-        "file_link",
-        "text_preview",
         "has_file",
+        "file_path",
+        "storage_path",
+        "text_preview",
+        "image_preview",
     ]
     list_filter = ("media_type", "learning_package")
     search_fields = ("hash_digest",)
 
-    def file_link(self, content: Content):
-        if not content.has_file:
-            return ""
+    def storage_path(self, content: Content):
+        return content.storage_path() if content.has_file else ""
 
-        return format_html(
-            '<a href="{}">Download</a>',
-            content.file_url(),
-        )
+    def file_path(self, content: Content):
+        return content.file_path() if content.has_file else ""
 
     def text_preview(self, content: Content):
         return format_html(
             '<pre style="white-space: pre-wrap;">\n{}\n</pre>',
             content.text,
         )
+
+    def image_preview(self, content: Content):
+        """
+        Return HTML for an image, if that is the underlying Content.
+
+        Otherwise, just return a blank string.
+        """
+        if content.media_type.type != "image":
+            return ""
+
+        data = content.read_file().read()
+        return format_html(
+            '<img src="data:{};base64, {}" style="max-width: 100%;" />',
+            content.mime_type,
+            base64.encodebytes(data).decode('utf8'),
+        )
diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py
@@ -7,11 +7,13 @@
 
 from functools import cache, cached_property
 
-from django.core.exceptions import ValidationError
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured, ValidationError
 from django.core.files.base import File
-from django.core.files.storage import Storage, default_storage
+from django.core.files.storage import Storage
 from django.core.validators import MaxValueValidator
 from django.db import models
+from django.utils.module_loading import import_string
 
 from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
 from ....lib.managers import WithRelationsManager
@@ -28,13 +30,25 @@ def get_storage() -> Storage:
     """
     Return the Storage instance for our Content file persistence.
 
-    For right now, we're still only storing inline text and not static assets in
-    production, so just return the default_storage. We're also going through a
-    transition between Django 3.2 -> 4.2, where storage configuration has moved.
+    This will first search for an OPENEDX_LEARNING config dictionary and return
+    a Storage subclass based on that configuration.
 
-    Make this work properly as part of adding support for static assets.
+    If there is no value for the OPENEDX_LEARNING setting, we return the default
+    MEDIA storage class. TODO: Should we make it just error instead?
     """
-    return default_storage
+    config_dict = getattr(settings, 'OPENEDX_LEARNING', {})
+
+    if 'MEDIA' in config_dict:
+        storage_cls = import_string(config_dict['MEDIA']['BACKEND'])
+        options = config_dict['MEDIA'].get('OPTIONS', {})
+        return storage_cls(**options)
+
+    raise ImproperlyConfigured(
+        "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] "
+        "setting, which should have a storage BACKEND and OPTIONS values for "
+        "a Storage subclass. These files should be stored in a location that "
+        "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)."
+    )
 
 
 class MediaType(models.Model):
@@ -284,17 +298,34 @@ def mime_type(self) -> str:
 
     def file_path(self):
         """
-        Path at which this content is stored (or would be stored).
+        Logical path at which this content is stored (or would be stored).
+
+        This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage
+        root.
+        """
+        return f"content/{self.learning_package.uuid}/{self.hash_digest}"
+
+    def storage_path(self):
+        """
+        The full OS path for the underlying file for this Content.
+
+        This will not be supported by all Storage class types.
+        """
+        return get_storage().path(self.file_path())
 
-        This path is relative to configured storage root.
+    def read_file(self) -> File:
+        """
+        Get a File object that has been open for reading.
         """
-        return f"{self.learning_package.uuid}/{self.hash_digest}"
+        return get_storage().open(self.file_path(), 'rb')
 
     def write_file(self, file: File) -> None:
         """
         Write file contents to the file storage backend.
 
-        This function does nothing if the file already exists.
+        This function does nothing if the file already exists. Note that Content
+        is supposed to be immutable, so this should normally only be called once
+        for a given Content row.
         """
         storage = get_storage()
         file_path = self.file_path()

diff --git a/test_settings.py b/test_settings.py
@@ -69,3 +69,11 @@ def root(*args):
     'DEFAULT_PAGINATION_CLASS': 'edx_rest_framework_extensions.paginators.DefaultPagination',
     'PAGE_SIZE': 10,
 }
+
+######################## LEARNING CORE SETTINGS ########################
+
+OPENEDX_LEARNING = {
+    'MEDIA': {
+        'BACKEND': 'django.core.files.storage.InMemoryStorage'
+    }
+}