openedx · ormsbee · Oct 2, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/openedx_learning/__init__.py b/openedx_learning/__init__.py
@@ -1,4 +1,4 @@
 """
 Open edX Learning ("Learning Core").
 """
-__version__ = "0.13.1"
+__version__ = "0.14.0"
diff --git a/openedx_learning/apps/authoring/components/admin.py b/openedx_learning/apps/authoring/components/admin.py
@@ -1,6 +1,8 @@
 """
 Django admin for components models
 """
+import base64
+
 from django.contrib import admin
 from django.template.defaultfilters import filesizeformat
 from django.urls import reverse
@@ -67,35 +69,29 @@ def get_queryset(self, request):
         )
 
     fields = [
-        "format_key",
+        "key",
         "format_size",
         "learner_downloadable",
         "rendered_data",
     ]
     readonly_fields = [
         "content",
-        "format_key",
+        "key",
         "format_size",
         "rendered_data",
     ]
     extra = 0
 
+    def has_file(self, cvc_obj):
+        return cvc_obj.content.has_file
+
     def rendered_data(self, cvc_obj):
         return content_preview(cvc_obj)
 
     @admin.display(description="Size")
     def format_size(self, cvc_obj):
         return filesizeformat(cvc_obj.content.size)
 
-    @admin.display(description="Key")
-    def format_key(self, cvc_obj):
-        return format_html(
-            '<a href="{}">{}</a>',
-            link_for_cvc(cvc_obj),
-            # reverse("admin:components_content_change", args=(cvc_obj.content_id,)),
-            cvc_obj.key,
-        )
-
 
 @admin.register(ComponentVersion)
 class ComponentVersionAdmin(ReadOnlyModelAdmin):
@@ -129,18 +125,6 @@ def get_queryset(self, request):
         )
 
 
-def link_for_cvc(cvc_obj: ComponentVersionContent) -> str:
-    """
-    Get the download URL for the given ComponentVersionContent instance
-    """
-    return "/media_server/component_asset/{}/{}/{}/{}".format(
-        cvc_obj.content.learning_package.key,
-        cvc_obj.component_version.component.key,
-        cvc_obj.component_version.version_num,
-        cvc_obj.key,
-    )
-
-
 def format_text_for_admin_display(text: str) -> SafeText:
     """
     Get the HTML to display the given plain text (preserving formatting)
@@ -158,9 +142,17 @@ def content_preview(cvc_obj: ComponentVersionContent) -> SafeText:
     content_obj = cvc_obj.content
 
     if content_obj.media_type.type == "image":
+        # This base64 encoding looks really goofy and is bad for performance,
+        # but image previews in the admin are extremely useful, and this lets us
+        # have them without creating a separate view in Learning Core. (Keep in
+        # mind that these assets are private, so they cannot be accessed via the
+        # MEDIA_URL like most Django uploaded assets.)
+        data = content_obj.read_file().read()
         return format_html(
-            '<img src="{}" style="max-width: 100%;" />',
-            content_obj.file_url(),
+            '<img src="data:{};base64, {}" style="max-width: 100%;" /><br><pre>{}</pre>',
+            content_obj.mime_type,
+            base64.encodebytes(data).decode('utf8'),
+            content_obj.os_path(),
         )
 
     return format_text_for_admin_display(

diff --git a/openedx_learning/apps/authoring/components/api.py b/openedx_learning/apps/authoring/components/api.py
@@ -269,7 +269,15 @@ def get_component_by_uuid(uuid: UUID) -> Component:
 
 
 def get_component_version_by_uuid(uuid: UUID) -> ComponentVersion:
-    return ComponentVersion.objects.get(publishable_entity_version__uuid=uuid)
+    return (
+        ComponentVersion
+        .objects
+        .select_related(
+            "component",
+            "component__learning_package",
+        )
+        .get(publishable_entity_version__uuid=uuid)
+    )
 
 
 def component_exists_by_key(
@@ -395,7 +403,21 @@ def create_component_version_content(
 ) -> ComponentVersionContent:
     """
     Add a Content to the given ComponentVersion
+
+    We don't allow keys that would be absolute paths, e.g. ones that start with
+    '/'. Storing these causes headaches with building relative paths and because
+    of mismatches with things that expect a leading slash and those that don't.
+    So for safety and consistency, we strip off leading slashes and emit a
+    warning when we do.
     """
+    if key.startswith('/'):
+        logger.warning(
+            "Absolute paths are not supported: "
+            f"removed leading '/' from ComponentVersion {component_version_id} "
+            f"content key: {repr(key)} (content_id: {content_id})"
+        )
+        key = key.lstrip('/')
+
     cvrc, _created = ComponentVersionContent.objects.get_or_create(
         component_version_id=component_version_id,
         content_id=content_id,
@@ -510,7 +532,12 @@ def _error_header(error: AssetError) -> dict[str, str]:
 
     # Check: Does the ComponentVersion exist?
     try:
-        component_version = get_component_version_by_uuid(component_version_uuid)
+        component_version = (
+            ComponentVersion
+            .objects
+            .select_related("component", "component__learning_package")
+            .get(publishable_entity_version__uuid=component_version_uuid)
+        )
     except ComponentVersion.DoesNotExist:
         # No need to add headers here, because no ComponentVersion was found.
         logger.error(f"Asset Not Found: No ComponentVersion with UUID {component_version_uuid}")
@@ -567,10 +594,9 @@ def _error_header(error: AssetError) -> dict[str, str]:
     # At this point, we know that there is valid Content that we want to send.
     # This adds Content-level headers, like the hash/etag and content type.
     info_headers.update(contents_api.get_content_info_headers(content))
-    stored_file_path = content.file_path()
 
     # Recompute redirect headers (reminder: this should never be cached).
-    redirect_headers = contents_api.get_redirect_headers(stored_file_path, public)
+    redirect_headers = contents_api.get_redirect_headers(content.path, public)
     logger.info(
         "Asset redirect (uncached metadata): "
         f"{component_version_uuid}/{asset_path} -> {redirect_headers}"

diff --git a/openedx_learning/apps/authoring/contents/admin.py b/openedx_learning/apps/authoring/contents/admin.py
@@ -1,6 +1,8 @@
 """
 Django admin for contents models
 """
+import base64
+
 from django.contrib import admin
 from django.utils.html import format_html
 
@@ -16,7 +18,6 @@ class ContentAdmin(ReadOnlyModelAdmin):
     """
     list_display = [
         "hash_digest",
-        "file_link",
         "learning_package",
         "media_type",
         "size",
@@ -29,24 +30,42 @@ class ContentAdmin(ReadOnlyModelAdmin):
         "media_type",
         "size",
         "created",
-        "file_link",
-        "text_preview",
         "has_file",
+        "path",
+        "os_path",
+        "text_preview",
+        "image_preview",
     ]
     list_filter = ("media_type", "learning_package")
     search_fields = ("hash_digest",)
 
-    def file_link(self, content: Content):
-        if not content.has_file:
-            return ""
+    @admin.display(description="OS Path")
+    def os_path(self, content: Content):
+        return content.os_path() or ""
 
-        return format_html(
-            '<a href="{}">Download</a>',
-            content.file_url(),
-        )
+    def path(self, content: Content):
+        return content.path if content.has_file else ""
 
     def text_preview(self, content: Content):
+        if not content.text:
+            return ""
         return format_html(
             '<pre style="white-space: pre-wrap;">\n{}\n</pre>',
             content.text,
         )
+
+    def image_preview(self, content: Content):
+        """
+        Return HTML for an image, if that is the underlying Content.
+
+        Otherwise, just return a blank string.
+        """
+        if content.media_type.type != "image":
+            return ""
+
+        data = content.read_file().read()
+        return format_html(
+            '<img src="data:{};base64, {}" style="max-width: 100%;" />',
+            content.mime_type,
+            base64.encodebytes(data).decode('utf8'),
+        )
diff --git a/openedx_learning/apps/authoring/contents/models.py b/openedx_learning/apps/authoring/contents/models.py
@@ -7,11 +7,13 @@
 
 from functools import cache, cached_property
 
-from django.core.exceptions import ValidationError
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured, ValidationError
 from django.core.files.base import File
-from django.core.files.storage import Storage, default_storage
+from django.core.files.storage import Storage
 from django.core.validators import MaxValueValidator
 from django.db import models
+from django.utils.module_loading import import_string
 
 from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
 from ....lib.managers import WithRelationsManager
@@ -28,13 +30,25 @@ def get_storage() -> Storage:
     """
     Return the Storage instance for our Content file persistence.
 
-    For right now, we're still only storing inline text and not static assets in
-    production, so just return the default_storage. We're also going through a
-    transition between Django 3.2 -> 4.2, where storage configuration has moved.
+    This will first search for an OPENEDX_LEARNING config dictionary and return
+    a Storage subclass based on that configuration.
 
-    Make this work properly as part of adding support for static assets.
+    If there is no value for the OPENEDX_LEARNING setting, we return the default
+    MEDIA storage class. TODO: Should we make it just error instead?
     """
-    return default_storage
+    config_dict = getattr(settings, 'OPENEDX_LEARNING', {})
+
+    if 'MEDIA' in config_dict:
+        storage_cls = import_string(config_dict['MEDIA']['BACKEND'])
+        options = config_dict['MEDIA'].get('OPTIONS', {})
+        return storage_cls(**options)
+
+    raise ImproperlyConfigured(
+        "Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] "
+        "setting, which should have a storage BACKEND and OPTIONS values for "
+        "a Storage subclass. These files should be stored in a location that "
+        "is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)."
+    )
 
 
 class MediaType(models.Model):
@@ -282,22 +296,53 @@ def mime_type(self) -> str:
         """
         return str(self.media_type)
 
-    def file_path(self):
+    @cached_property
+    def path(self):
+        """
+        Logical path at which this content is stored (or would be stored).
+
+        This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage
+        root. This file may not exist because has_file=False, or because we
+        haven't written the file yet (this is the method we call when trying to
+        figure out where the file *should* go).
+        """
+        return f"content/{self.learning_package.uuid}/{self.hash_digest}"
+
+    def os_path(self):
+        """
+        The full OS path for the underlying file for this Content.
+
+        This will not be supported by all Storage class types.
+
+        This will return ``None`` if there is no backing file (has_file=False).
         """
-        Path at which this content is stored (or would be stored).
+        if self.has_file:
+            return get_storage().path(self.path)
+        return None
 
-        This path is relative to configured storage root.
+    def read_file(self) -> File:
         """
-        return f"{self.learning_package.uuid}/{self.hash_digest}"
+        Get a File object that has been open for reading.
+
+        We intentionally don't expose an `open()` call where callers can open
+        this file in write mode. Writing a Content file should happen at most
+        once, and the logic is not obvious (see ``write_file``).
+
+        At the end of the day, the caller can close the returned File and reopen
+        it in whatever mode they want, but we're trying to gently discourage
+        that kind of usage.
+        """
+        return get_storage().open(self.path, 'rb')
 
     def write_file(self, file: File) -> None:
         """
         Write file contents to the file storage backend.
 
-        This function does nothing if the file already exists.
+        This function does nothing if the file already exists. Note that Content
+        is supposed to be immutable, so this should normally only be called once
+        for a given Content row.
         """
         storage = get_storage()
-        file_path = self.file_path()
 
         # There are two reasons why a file might already exist even if the the
         # Content row is new:
@@ -314,15 +359,15 @@ def write_file(self, file: File) -> None:
         # 3. Similar to (2), but only part of the file was written before an
         # error occurred. This seems unlikely, but possible if the underlying
         # storage engine writes in chunks.
-        if storage.exists(file_path) and storage.size(file_path) == file.size:
+        if storage.exists(self.path) and storage.size(self.path) == file.size:
             return
-        storage.save(file_path, file)
+        storage.save(self.path, file)
 
     def file_url(self) -> str:
         """
         This will sometimes be a time-limited signed URL.
         """
-        return content_file_url(self.file_path())
+        return get_storage().url(self.path)
 
     def clean(self):
         """
@@ -361,7 +406,3 @@ class Meta:
         ]
         verbose_name = "Content"
         verbose_name_plural = "Contents"
-
-
-def content_file_url(file_path):
-    return get_storage().url(file_path)
diff --git a/test_settings.py b/test_settings.py
@@ -69,3 +69,14 @@ def root(*args):
     'DEFAULT_PAGINATION_CLASS': 'edx_rest_framework_extensions.paginators.DefaultPagination',
     'PAGE_SIZE': 10,
 }
+
+######################## LEARNING CORE SETTINGS ########################
+
+OPENEDX_LEARNING = {
+    'MEDIA': {
+        'BACKEND': 'django.core.files.storage.InMemoryStorage',
+        'OPTIONS': {
+            'location': MEDIA_ROOT + "_private"
+        }
+    }
+}