Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Asset handling updates (config, admin) #234

Merged
merged 2 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion openedx_learning/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""
Open edX Learning ("Learning Core").
"""
__version__ = "0.13.1"
__version__ = "0.14.0"
42 changes: 17 additions & 25 deletions openedx_learning/apps/authoring/components/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Django admin for components models
"""
import base64

from django.contrib import admin
from django.template.defaultfilters import filesizeformat
from django.urls import reverse
Expand Down Expand Up @@ -67,35 +69,29 @@ def get_queryset(self, request):
)

fields = [
"format_key",
"key",
"format_size",
"learner_downloadable",
"rendered_data",
]
readonly_fields = [
"content",
"format_key",
"key",
"format_size",
"rendered_data",
]
extra = 0

def has_file(self, cvc_obj):
return cvc_obj.content.has_file

def rendered_data(self, cvc_obj):
return content_preview(cvc_obj)

@admin.display(description="Size")
def format_size(self, cvc_obj):
return filesizeformat(cvc_obj.content.size)

@admin.display(description="Key")
def format_key(self, cvc_obj):
return format_html(
'<a href="{}">{}</a>',
link_for_cvc(cvc_obj),
# reverse("admin:components_content_change", args=(cvc_obj.content_id,)),
cvc_obj.key,
)


@admin.register(ComponentVersion)
class ComponentVersionAdmin(ReadOnlyModelAdmin):
Expand Down Expand Up @@ -129,18 +125,6 @@ def get_queryset(self, request):
)


def link_for_cvc(cvc_obj: ComponentVersionContent) -> str:
"""
Get the download URL for the given ComponentVersionContent instance
"""
return "/media_server/component_asset/{}/{}/{}/{}".format(
cvc_obj.content.learning_package.key,
cvc_obj.component_version.component.key,
cvc_obj.component_version.version_num,
cvc_obj.key,
)


def format_text_for_admin_display(text: str) -> SafeText:
"""
Get the HTML to display the given plain text (preserving formatting)
Expand All @@ -158,9 +142,17 @@ def content_preview(cvc_obj: ComponentVersionContent) -> SafeText:
content_obj = cvc_obj.content

if content_obj.media_type.type == "image":
# This base64 encoding looks really goofy and is bad for performance,
# but image previews in the admin are extremely useful, and this lets us
# have them without creating a separate view in Learning Core. (Keep in
# mind that these assets are private, so they cannot be accessed via the
# MEDIA_URL like most Django uploaded assets.)
data = content_obj.read_file().read()
return format_html(
'<img src="{}" style="max-width: 100%;" />',
content_obj.file_url(),
'<img src="data:{};base64, {}" style="max-width: 100%;" /><br><pre>{}</pre>',
content_obj.mime_type,
base64.encodebytes(data).decode('utf8'),
content_obj.os_path(),
)

return format_text_for_admin_display(
Expand Down
34 changes: 30 additions & 4 deletions openedx_learning/apps/authoring/components/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,15 @@ def get_component_by_uuid(uuid: UUID) -> Component:


def get_component_version_by_uuid(uuid: UUID) -> ComponentVersion:
return ComponentVersion.objects.get(publishable_entity_version__uuid=uuid)
return (
ComponentVersion
.objects
.select_related(
"component",
"component__learning_package",
)
.get(publishable_entity_version__uuid=uuid)
)


def component_exists_by_key(
Expand Down Expand Up @@ -395,7 +403,21 @@ def create_component_version_content(
) -> ComponentVersionContent:
"""
Add a Content to the given ComponentVersion

We don't allow keys that would be absolute paths, e.g. ones that start with
'/'. Storing these causes headaches with building relative paths and because
of mismatches with things that expect a leading slash and those that don't.
So for safety and consistency, we strip off leading slashes and emit a
warning when we do.
"""
if key.startswith('/'):
logger.warning(
"Absolute paths are not supported: "
f"removed leading '/' from ComponentVersion {component_version_id} "
f"content key: {repr(key)} (content_id: {content_id})"
)
key = key.lstrip('/')

cvrc, _created = ComponentVersionContent.objects.get_or_create(
component_version_id=component_version_id,
content_id=content_id,
Expand Down Expand Up @@ -510,7 +532,12 @@ def _error_header(error: AssetError) -> dict[str, str]:

# Check: Does the ComponentVersion exist?
try:
component_version = get_component_version_by_uuid(component_version_uuid)
component_version = (
ComponentVersion
.objects
.select_related("component", "component__learning_package")
.get(publishable_entity_version__uuid=component_version_uuid)
)
except ComponentVersion.DoesNotExist:
# No need to add headers here, because no ComponentVersion was found.
logger.error(f"Asset Not Found: No ComponentVersion with UUID {component_version_uuid}")
Expand Down Expand Up @@ -567,10 +594,9 @@ def _error_header(error: AssetError) -> dict[str, str]:
# At this point, we know that there is valid Content that we want to send.
# This adds Content-level headers, like the hash/etag and content type.
info_headers.update(contents_api.get_content_info_headers(content))
stored_file_path = content.file_path()

# Recompute redirect headers (reminder: this should never be cached).
redirect_headers = contents_api.get_redirect_headers(stored_file_path, public)
redirect_headers = contents_api.get_redirect_headers(content.path, public)
logger.info(
"Asset redirect (uncached metadata): "
f"{component_version_uuid}/{asset_path} -> {redirect_headers}"
Expand Down
39 changes: 29 additions & 10 deletions openedx_learning/apps/authoring/contents/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""
Django admin for contents models
"""
import base64

from django.contrib import admin
from django.utils.html import format_html

Expand All @@ -16,7 +18,6 @@ class ContentAdmin(ReadOnlyModelAdmin):
"""
list_display = [
"hash_digest",
"file_link",
"learning_package",
"media_type",
"size",
Expand All @@ -29,24 +30,42 @@ class ContentAdmin(ReadOnlyModelAdmin):
"media_type",
"size",
"created",
"file_link",
"text_preview",
"has_file",
"path",
"os_path",
"text_preview",
"image_preview",
]
list_filter = ("media_type", "learning_package")
search_fields = ("hash_digest",)

def file_link(self, content: Content):
if not content.has_file:
return ""
@admin.display(description="OS Path")
def os_path(self, content: Content):
return content.os_path() or ""

return format_html(
'<a href="{}">Download</a>',
content.file_url(),
)
def path(self, content: Content):
return content.path if content.has_file else ""

def text_preview(self, content: Content):
if not content.text:
return ""
return format_html(
'<pre style="white-space: pre-wrap;">\n{}\n</pre>',
content.text,
)

def image_preview(self, content: Content):
"""
Return HTML for an image, if that is the underlying Content.

Otherwise, just return a blank string.
"""
if content.media_type.type != "image":
return ""

data = content.read_file().read()
return format_html(
'<img src="data:{};base64, {}" style="max-width: 100%;" />',
content.mime_type,
base64.encodebytes(data).decode('utf8'),
)
81 changes: 61 additions & 20 deletions openedx_learning/apps/authoring/contents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@

from functools import cache, cached_property

from django.core.exceptions import ValidationError
from django.conf import settings
from django.core.exceptions import ImproperlyConfigured, ValidationError
from django.core.files.base import File
from django.core.files.storage import Storage, default_storage
from django.core.files.storage import Storage
from django.core.validators import MaxValueValidator
from django.db import models
from django.utils.module_loading import import_string

from ....lib.fields import MultiCollationTextField, case_insensitive_char_field, hash_field, manual_date_time_field
from ....lib.managers import WithRelationsManager
Expand All @@ -28,13 +30,25 @@ def get_storage() -> Storage:
"""
Return the Storage instance for our Content file persistence.

For right now, we're still only storing inline text and not static assets in
production, so just return the default_storage. We're also going through a
transition between Django 3.2 -> 4.2, where storage configuration has moved.
This will first search for an OPENEDX_LEARNING config dictionary and return
a Storage subclass based on that configuration.

Make this work properly as part of adding support for static assets.
If there is no value for the OPENEDX_LEARNING setting, we return the default
MEDIA storage class. TODO: Should we make it just error instead?
"""
return default_storage
config_dict = getattr(settings, 'OPENEDX_LEARNING', {})

if 'MEDIA' in config_dict:
storage_cls = import_string(config_dict['MEDIA']['BACKEND'])
options = config_dict['MEDIA'].get('OPTIONS', {})
return storage_cls(**options)

raise ImproperlyConfigured(
"Cannot access file storage: Missing the OPENEDX_LEARNING['MEDIA'] "
"setting, which should have a storage BACKEND and OPTIONS values for "
"a Storage subclass. These files should be stored in a location that "
"is NOT publicly accessible to browsers (so not in the MEDIA_ROOT)."
)


class MediaType(models.Model):
Expand Down Expand Up @@ -282,22 +296,53 @@ def mime_type(self) -> str:
"""
return str(self.media_type)

def file_path(self):
@cached_property
def path(self):
"""
Logical path at which this content is stored (or would be stored).

This path is relative to OPENEDX_LEARNING['MEDIA'] configured storage
root. This file may not exist because has_file=False, or because we
haven't written the file yet (this is the method we call when trying to
figure out where the file *should* go).
"""
return f"content/{self.learning_package.uuid}/{self.hash_digest}"

def os_path(self):
"""
The full OS path for the underlying file for this Content.

This will not be supported by all Storage class types.

This will return ``None`` if there is no backing file (has_file=False).
"""
Path at which this content is stored (or would be stored).
if self.has_file:
return get_storage().path(self.path)
return None

This path is relative to configured storage root.
def read_file(self) -> File:
"""
return f"{self.learning_package.uuid}/{self.hash_digest}"
Get a File object that has been open for reading.

We intentionally don't expose an `open()` call where callers can open
this file in write mode. Writing a Content file should happen at most
once, and the logic is not obvious (see ``write_file``).

At the end of the day, the caller can close the returned File and reopen
it in whatever mode they want, but we're trying to gently discourage
that kind of usage.
"""
return get_storage().open(self.path, 'rb')

def write_file(self, file: File) -> None:
"""
Write file contents to the file storage backend.

This function does nothing if the file already exists.
This function does nothing if the file already exists. Note that Content
is supposed to be immutable, so this should normally only be called once
for a given Content row.
"""
storage = get_storage()
file_path = self.file_path()

# There are two reasons why a file might already exist even if the the
# Content row is new:
Expand All @@ -314,15 +359,15 @@ def write_file(self, file: File) -> None:
# 3. Similar to (2), but only part of the file was written before an
# error occurred. This seems unlikely, but possible if the underlying
# storage engine writes in chunks.
if storage.exists(file_path) and storage.size(file_path) == file.size:
if storage.exists(self.path) and storage.size(self.path) == file.size:
return
storage.save(file_path, file)
storage.save(self.path, file)

def file_url(self) -> str:
"""
This will sometimes be a time-limited signed URL.
"""
return content_file_url(self.file_path())
return get_storage().url(self.path)

def clean(self):
"""
Expand Down Expand Up @@ -361,7 +406,3 @@ class Meta:
]
verbose_name = "Content"
verbose_name_plural = "Contents"


def content_file_url(file_path):
return get_storage().url(file_path)
11 changes: 11 additions & 0 deletions test_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,14 @@ def root(*args):
'DEFAULT_PAGINATION_CLASS': 'edx_rest_framework_extensions.paginators.DefaultPagination',
'PAGE_SIZE': 10,
}

######################## LEARNING CORE SETTINGS ########################

OPENEDX_LEARNING = {
'MEDIA': {
'BACKEND': 'django.core.files.storage.InMemoryStorage',
'OPTIONS': {
'location': MEDIA_ROOT + "_private"
}
}
}
Loading