From 4b93cbcf2f23ea8bbba1c22a228d248fa53f4b2a Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 8 Dec 2023 12:59:59 -0500 Subject: [PATCH 1/9] feat: add Collections --- openedx_learning/core/collections/__init__.py | 0 openedx_learning/core/collections/api.py | 112 ++++++++ openedx_learning/core/collections/apps.py | 24 ++ .../collections/migrations/0001_initial.py | 100 +++++++ .../core/collections/migrations/__init__.py | 0 openedx_learning/core/collections/models.py | 250 ++++++++++++++++++ openedx_learning/core/collections/readme.rst | 22 ++ test_settings.py | 7 +- .../core/collections/__init__.py | 0 .../core/collections/test_api.py | 76 ++++++ 10 files changed, 588 insertions(+), 3 deletions(-) create mode 100644 openedx_learning/core/collections/__init__.py create mode 100644 openedx_learning/core/collections/api.py create mode 100644 openedx_learning/core/collections/apps.py create mode 100644 openedx_learning/core/collections/migrations/0001_initial.py create mode 100644 openedx_learning/core/collections/migrations/__init__.py create mode 100644 openedx_learning/core/collections/models.py create mode 100644 openedx_learning/core/collections/readme.rst create mode 100644 tests/openedx_learning/core/collections/__init__.py create mode 100644 tests/openedx_learning/core/collections/test_api.py diff --git a/openedx_learning/core/collections/__init__.py b/openedx_learning/core/collections/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py new file mode 100644 index 00000000..1b223081 --- /dev/null +++ b/openedx_learning/core/collections/api.py @@ -0,0 +1,112 @@ +""" +API to manipulate Collections. + +This API sacrifices some power in order to try to keep things simpler. For +instance, ``add_to_collection`` and ``remove_from_collection`` each create a +new CollectionChangeSet when they're invoked. The data model supports having + +""" +from __future__ import annotations + +from datetime import datetime, timezone + +from django.db.models import F, QuerySet +from django.db.transaction import atomic + +from .models import ( + Collection, CollectionPublishableEntity, CollectionChangeSet, + AddToCollection, RemoveFromCollection, PublishEntity, +) + + +def create_collection( + learning_package_id: int, + key: str, + title: str, + pub_entities_qset: QuerySet | None = None, + created: datetime | None = None, + created_by: int | None = None, +) -> Collection: + """ + Create a Collection and populate with a QuerySet of PublishableEntity. + """ + if not created: + created = datetime.now(tz=timezone.utc) + + with atomic(): + collection = Collection( + learning_package_id=learning_package_id, + key=key, + title=title, + created=created, + created_by=created_by, + ) + collection.full_clean() + collection.save() + + # We always create a CollectionChangeSet for the initial Collection + # creation, even if we don't put anything into it. + change_set = CollectionChangeSet.objects.create( + collection=collection, + version_num=1, + ) + + if pub_entities_qset: + add_to_collection( + collection.id, pub_entities_qset, change_set=change_set + ) + + return collection + + +def get_collection(collection_id: int) -> Collection: + pass + + +def add_to_collection( + collection_id: int, + pub_entities_qset: QuerySet, + change_set: CollectionChangeSet | None = None, + created: datetime | None = None +)-> CollectionChangeSet: + if not change_set: + last_change_set = CollectionChangeSet.objects \ + .filter(collection_id=collection_id) \ + .order_by('-version_num') \ + .first() + if last_change_set: + next_version_num = last_change_set.version_num + 1 + else: + next_version_num = 1 + + change_set = CollectionChangeSet.objects.create( + collection_id=collection_id, + version_num=next_version_num, + ) + + # Add the joins so we can efficiently query what the published versions are. + qset = pub_entities_qset.select_related('published', 'published__version') + adds = ( + AddToCollection( + change_set=change_set, + entity=pub_ent, + published_version=pub_ent.published.version, + ) + for pub_ent in qset.all() + ) + # bulk_create will cast ``adds`` into a list and fully evaluate it. + # This will likely be okay to start, and if Collections stay in the + # hundreds. When it gets bigger, we might want to switch to + # something fancier that only reads in chunks. + # + # We don't need to use atomic() here because bulk_create already works + # atomically. If we got fancier with processing these in chunks, we'd need + # to wrap it in an atomic() context. + AddToCollection.objects.bulk_create(adds) + + return change_set + + +def remove_from_collection(collection_id: int, pub_entities_qset: QuerySet) -> CollectionChangeSet: + pass + diff --git a/openedx_learning/core/collections/apps.py b/openedx_learning/core/collections/apps.py new file mode 100644 index 00000000..3d7e1038 --- /dev/null +++ b/openedx_learning/core/collections/apps.py @@ -0,0 +1,24 @@ +""" +Django metadata for the Collections Django application. +""" +from django.apps import AppConfig + + +class CollectionsConfig(AppConfig): + """ + Configuration for the Collections Django application. + """ + + name = "openedx_learning.core.collections" + verbose_name = "Learning Core: Collections" + default_auto_field = "django.db.models.BigAutoField" + label = "oel_collections" + + def ready(self): + """ + Register the ComponentCollection, ComponentCollectionVersion relation. + """ + #from ..publishing.api import register_content_models # pylint: disable=import-outside-toplevel + #from .models import ComponentCollection, ComponentCollectionVersion # pylint: disable=import-outside-toplevel + + #register_content_models(ComponentCollection, ComponentCollectionVersion) diff --git a/openedx_learning/core/collections/migrations/0001_initial.py b/openedx_learning/core/collections/migrations/0001_initial.py new file mode 100644 index 00000000..390df47a --- /dev/null +++ b/openedx_learning/core/collections/migrations/0001_initial.py @@ -0,0 +1,100 @@ +# Generated by Django 3.2.23 on 2023-12-07 01:38 + +from django.conf import settings +import django.core.validators +from django.db import migrations, models +import django.db.models.deletion +import openedx_learning.lib.fields +import openedx_learning.lib.validators +import uuid + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('oel_publishing', '0002_alter_fk_on_delete'), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Collection', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), + ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), + ('title', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=500)), + ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), + ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), + ('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')), + ], + ), + migrations.CreateModel( + name='CollectionChangeSet', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('version_num', models.PositiveBigIntegerField(validators=[django.core.validators.MinValueValidator(1)])), + ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), + ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection')), + ], + ), + migrations.CreateModel( + name='RemoveFromCollection', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), + ], + ), + migrations.CreateModel( + name='PublishEntity', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), + ('log_record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishlogrecord')), + ], + ), + migrations.CreateModel( + name='CollectionPublishableEntity', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='publishable_entities', to='oel_collections.collection')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), + ], + ), + migrations.CreateModel( + name='AddToCollection', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), + ('published_version', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentityversion')), + ], + ), + migrations.AddConstraint( + model_name='removefromcollection', + constraint=models.UniqueConstraint(fields=('change_set', 'entity'), name='oel_collections_refc_uniq_cs_ent'), + ), + migrations.AddConstraint( + model_name='publishentity', + constraint=models.UniqueConstraint(fields=('change_set', 'log_record'), name='oel_collections_pe_uniq_cs_lr'), + ), + migrations.AddConstraint( + model_name='collectionpublishableentity', + constraint=models.UniqueConstraint(fields=('collection', 'entity'), name='oel_collections_cpe_uniq_col_ent'), + ), + migrations.AddConstraint( + model_name='collectionchangeset', + constraint=models.UniqueConstraint(fields=('collection', 'version_num'), name='oel_collections_ccs_uniq_col_vn'), + ), + migrations.AddConstraint( + model_name='collection', + constraint=models.UniqueConstraint(fields=('learning_package', 'key'), name='oel_collections_col_uniq_lp_key'), + ), + migrations.AddConstraint( + model_name='addtocollection', + constraint=models.UniqueConstraint(fields=('change_set', 'entity'), name='oel_collections_aetc_uniq_cs_ent'), + ), + ] diff --git a/openedx_learning/core/collections/migrations/__init__.py b/openedx_learning/core/collections/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openedx_learning/core/collections/models.py b/openedx_learning/core/collections/models.py new file mode 100644 index 00000000..d6f4b058 --- /dev/null +++ b/openedx_learning/core/collections/models.py @@ -0,0 +1,250 @@ +""" +TLDR Guidelines: + +1. DO NOT modify these models to store full version snapshots. +2. DO NOT use these models to try to reconstruct historical versions of + Collections for fast querying. + +If you're trying to do either of these things, you probably want a new model or +app. For more details, read on. + +The goal of these models is to provide a lightweight method of organizing +PublishableEntities. The first use case for this is modeling the structure of a +v1 Content Library within a LearningPackage. This is what we'll use the +Collection model for. + +An important thing to note here is that Collections are *NOT* publishable +entities themselves. They have no "Draft" or "Published" versions. Collections +are never "published", though the things inside of them are. + +When a LibraryContentBlock makes use of a Content Library, it copies all of +the items it will use into the Course itself. It will also store a version +on the LibraryContentBlock–this is a MongoDB ObjectID in v1 and an integer in +v2 Libraries. Later on, the LibraryContentBlock will want to check back to see +if any updates have been made, using its version as a key. If a new version +exists, the course team has the option of re-copying data from the Library. + +ModuleStore based v1 Libraries and Blockstore-based v2 libraries both version +the entire library in a series of snapshots. This makes it difficult to have +very large libraries, which is an explicit goal for Modular Learning. In +Learning Core, we've moved to tracking the versions of individual Components to +address this issue. But that means we no longer have a single version indicator +for "has anything here changed"? + +We *could* have put that version in the ``publishing`` app's PublishLog, but +that would make it too broad. We want the ability to eventually collapse many v1 +Libraries into a single Learning Core backed v2 Library. If we tracked the +versioning in only a central location, then we'd have many false positives where +the version was bumped because something else in the Learning Package changed. +So instead, we're creating a new Collection model inside the LearningPackage to +track that concept. + +A critical takeaway is that we don't have to store snapshots of every version of +a Collection, because that data has been copied over by the LibraryContentBlock. +We only need to store the current state of the Collection, and increment the +version numbers when changes happen. This will allow the LibraryContentBlock to +check in and re-copy over the latest version if the course team desires. + +That's why these models only store the current state of a Collection. Unlike the +``components`` app, ``collections`` does not store fully materialized snapshots +of past versions. This is done intentionally in order to save space and reduce +the cost of writes. Collections may grow to be very large, and we don't want to +be writing N rows with every version, where N is the number of +PublishableEntities in a Collection. + +These models do store changesets, where the number of rows grows in proportion +to the number of things that are actually changing (instead of copying over +everything on every version). This is intended to make it easier to figure out +what changed between two given versions of a Collection. A LibraryContentBlock +in a course will have stored the version number of the last time it copied data +from the Collection, and we can eventually surface this data to the user. + +While it's possible to reconstruct past versions of Collections based off of +this changeset data, it's going to be a very slow process to do so, and it is +strongly discouraged. +""" +from __future__ import annotations + +from django.conf import settings +from django.core.validators import MinValueValidator +from django.db import models + +from ..publishing.models import ( + LearningPackage, + PublishableEntity, + PublishableEntityVersion, + PublishLogRecord, +) + +from openedx_learning.lib.fields import ( + case_insensitive_char_field, + immutable_uuid_field, + key_field, + manual_date_time_field, +) + + +class Collection(models.Model): + """ + A Collection is a tracked grouping of PublishableEntities. + """ + uuid = immutable_uuid_field() + learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) + key = key_field() + + title = case_insensitive_char_field(max_length=500, blank=True, default="") + created = manual_date_time_field() + created_by = models.ForeignKey( + settings.AUTH_USER_MODEL, + on_delete=models.SET_NULL, + null=True, + blank=True, + ) + + class Meta: + constraints = [ + # The version_num must be unique for any given Collection. + models.UniqueConstraint( + fields=[ + "learning_package", + "key", + ], + name="oel_collections_col_uniq_lp_key", + ) + ] + + +class CollectionPublishableEntity(models.Model): + """ + Collection -> PublishableEntity association. + """ + collection = models.ForeignKey( + Collection, + on_delete=models.CASCADE, + related_name="publishable_entities" + ) + entity = models.ForeignKey( + PublishableEntity, + on_delete=models.CASCADE, + ) + + class Meta: + constraints = [ + # Prevent race conditions from making multiple rows associating the + # same Collection to the same Entity. + models.UniqueConstraint( + fields=[ + "collection", + "entity", + ], + name="oel_collections_cpe_uniq_col_ent", + ) + ] + + +class CollectionChangeSet(models.Model): + """ + Represents an atomic set of changes to a Collection. + + There are currently three ways a Collection can change: + + 1. PublishableEntities are added (AddToCollection) + 2. PublishableEntities are removed (RemoveFromCollection) + 3. The published version of a PublishableEntity changes (PublishLogRecord) + """ + collection = models.ForeignKey(Collection, on_delete=models.CASCADE) + version_num = models.PositiveBigIntegerField( + null=False, + validators=[MinValueValidator(1)], + ) + created = manual_date_time_field() + + class Meta: + constraints = [ + # The version_num must be unique for any given Collection. + models.UniqueConstraint( + fields=[ + "collection", + "version_num", + ], + name="oel_collections_ccs_uniq_col_vn", + ) + ] + + +class AddToCollection(models.Model): + """ + A record for when a PublishableEntity is added to a Collection. + + We also record the published version of the PublishableEntity at the time + it's added to the Collection. This will make it easier to reconstruct the + state of a given version of a Collection if it's necessary to do so. + + Note that something may be removed from a Collection and then re-added at a + later time. + """ + change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) + entity = models.ForeignKey(PublishableEntity, on_delete=models.CASCADE) + + # We want to capture the published version of the entity at the time it's + # added to the Collection. This may be null for entities that have not yet + # been published. + published_version = models.ForeignKey( + PublishableEntityVersion, on_delete=models.CASCADE, null=True, + ) + + class Meta: + constraints = [ + # We can't add the same Entity more than once in the same ChangeSet. + models.UniqueConstraint( + fields=[ + "change_set", + "entity", + ], + name="oel_collections_aetc_uniq_cs_ent", + ) + ] + + +class RemoveFromCollection(models.Model): + """ + A record for when a PublishableEntity is removed from a Collection. + + Note that something may be removed from a Collection, re-added at a later + time, and then removed again. + """ + change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) + entity = models.ForeignKey(PublishableEntity, on_delete=models.CASCADE) + + class Meta: + constraints = [ + # We can't add the same Entity more than once in the same ChangeSet. + models.UniqueConstraint( + fields=[ + "change_set", + "entity", + ], + name="oel_collections_refc_uniq_cs_ent", + ) + ] + + +class PublishEntity(models.Model): + """ + A record for when the published version of a PublishableEntity changes. + """ + change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) + log_record = models.ForeignKey(PublishLogRecord, on_delete=models.CASCADE) + + class Meta: + constraints = [ + # The same PublishLogRecord shouldn't show up multiple times for the + # same ChangeSet. + models.UniqueConstraint( + fields=[ + "change_set", + "log_record", + ], + name="oel_collections_pe_uniq_cs_lr", + ) + ] diff --git a/openedx_learning/core/collections/readme.rst b/openedx_learning/core/collections/readme.rst new file mode 100644 index 00000000..94a3cf29 --- /dev/null +++ b/openedx_learning/core/collections/readme.rst @@ -0,0 +1,22 @@ +Collections App +=============== + +The ``collections`` app ... + +Motivation +---------- + + +Intended Use Cases +------------------ + +* + + + +Architecture Guidelines +----------------------- + +Things to remember: + +* Collections may grow very large. diff --git a/test_settings.py b/test_settings.py index 28676e13..29c1fe40 100644 --- a/test_settings.py +++ b/test_settings.py @@ -40,9 +40,10 @@ def root(*args): # django-rules based authorization 'rules.apps.AutodiscoverRulesConfig', # Our own apps - "openedx_learning.core.components.apps.ComponentsConfig", - "openedx_learning.core.contents.apps.ContentsConfig", - "openedx_learning.core.publishing.apps.PublishingConfig", + "openedx_learning.core.components", + "openedx_learning.core.contents", + "openedx_learning.core.publishing", + "openedx_learning.core.collections", "openedx_tagging.core.tagging.apps.TaggingConfig", ] diff --git a/tests/openedx_learning/core/collections/__init__.py b/tests/openedx_learning/core/collections/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/openedx_learning/core/collections/test_api.py b/tests/openedx_learning/core/collections/test_api.py new file mode 100644 index 00000000..fb2c3380 --- /dev/null +++ b/tests/openedx_learning/core/collections/test_api.py @@ -0,0 +1,76 @@ +""" +Tests of the Collection app's python API +""" +from datetime import datetime, timezone +from uuid import UUID + +from django.core.exceptions import ValidationError + +from openedx_learning.core.collections import api as collections_api +from openedx_learning.core.publishing import api as publishing_api +from openedx_learning.core.publishing.models import PublishableEntity + +from openedx_learning.lib.test_utils import TestCase + + +class CollectionsTestCase(TestCase): + """ + Test creating Collections + """ + @classmethod + def setUpTestData(cls) -> None: + super().setUpTestData() + cls.created = datetime(2023, 12, 7, 18, 23, 50, tzinfo=timezone.utc) + cls.package = publishing_api.create_learning_package( + "collections_test_learning_pkg_key", + "Collections Testing LearningPackage 🔥", + created=cls.created, + ) + + # Make and Publish one PublishableEntity + cls.published_entity = publishing_api.create_publishable_entity( + cls.package.id, + "my_entity_published_example", + cls.created, + created_by=None, + ) + cls.pe_version = publishing_api.create_publishable_entity_version( + entity_id=cls.published_entity.id, + version_num=1, + title="An Entity that we'll Publish 🌴", + created=cls.created, + created_by=None, + ) + publishing_api.publish_all_drafts( + cls.package.id, + message="Publish from CollectionsTestCase.setUpTestData", + published_at=cls.created, + ) + + # Leave another PublishableEntity in Draft. + cls.draft_entity = publishing_api.create_publishable_entity( + cls.package.id, + "my_entity_draft_example", + cls.created, + created_by=None, + ) + cls.de_version = publishing_api.create_publishable_entity_version( + entity_id=cls.draft_entity.id, + version_num=1, + title="An Entity that we'll keep in Draft 🌴", + created=cls.created, + created_by=None, + ) + + def test_bootstrap_only_published(self) -> None: # Note: we must specify '-> None' to opt in to type checking + """ + Normal flow with no errors. + """ + collection = collections_api.create_collection( + self.package.id, + key="test_bootstrap_only_published_collection", + title="Test Bootstrap 🦃 Only Published Collection", + pub_entities_qset=PublishableEntity.objects.filter( + id=self.published_entity.id + ) + ) From c71330471d412fc1afd9ed088fa67c2f41bc59fd Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 8 Dec 2023 17:28:49 -0500 Subject: [PATCH 2/9] fix: streamline the API a bit --- openedx_learning/core/collections/api.py | 44 ++++++++++-------------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index 1b223081..a28cd2e6 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -44,17 +44,12 @@ def create_collection( collection.full_clean() collection.save() - # We always create a CollectionChangeSet for the initial Collection - # creation, even if we don't put anything into it. - change_set = CollectionChangeSet.objects.create( - collection=collection, - version_num=1, - ) + # add_to_collection is what creates our initial CollectionChangeSet, so + # we always call it, even if we're just creating an empty Collection. + if pub_entities_qset is None: + pub_entities_qset = PublishEntity.objects.none - if pub_entities_qset: - add_to_collection( - collection.id, pub_entities_qset, change_set=change_set - ) + add_to_collection(collection.id, pub_entities_qset, created=created) return collection @@ -66,23 +61,22 @@ def get_collection(collection_id: int) -> Collection: def add_to_collection( collection_id: int, pub_entities_qset: QuerySet, - change_set: CollectionChangeSet | None = None, created: datetime | None = None )-> CollectionChangeSet: - if not change_set: - last_change_set = CollectionChangeSet.objects \ - .filter(collection_id=collection_id) \ - .order_by('-version_num') \ - .first() - if last_change_set: - next_version_num = last_change_set.version_num + 1 - else: - next_version_num = 1 - - change_set = CollectionChangeSet.objects.create( - collection_id=collection_id, - version_num=next_version_num, - ) + last_change_set = CollectionChangeSet.objects \ + .filter(collection_id=collection_id) \ + .order_by('-version_num') \ + .first() + if last_change_set: + next_version_num = last_change_set.version_num + 1 + else: + next_version_num = 1 + + change_set = CollectionChangeSet.objects.create( + collection_id=collection_id, + version_num=next_version_num, + created=created, + ) # Add the joins so we can efficiently query what the published versions are. qset = pub_entities_qset.select_related('published', 'published__version') From 566f56bbbc385ade098f0d23a1c98dc48654a1c7 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Mon, 11 Dec 2023 22:59:23 -0500 Subject: [PATCH 3/9] fix: add the collection-publishable-entities relationships --- openedx_learning/core/collections/admin.py | 67 +++++++++++++++++++ openedx_learning/core/collections/api.py | 59 +++++++++++----- .../migrations/0002_auto_20231211_1813.py | 25 +++++++ openedx_learning/core/collections/models.py | 19 +++++- openedx_learning/core/publishing/api.py | 2 +- openedx_learning/core/publishing/models.py | 6 +- .../core/collections/test_api.py | 10 ++- 7 files changed, 164 insertions(+), 24 deletions(-) create mode 100644 openedx_learning/core/collections/admin.py create mode 100644 openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py diff --git a/openedx_learning/core/collections/admin.py b/openedx_learning/core/collections/admin.py new file mode 100644 index 00000000..ee756657 --- /dev/null +++ b/openedx_learning/core/collections/admin.py @@ -0,0 +1,67 @@ +""" +Django admin for Collections. + +This is extremely bare-bones at the moment, and basically gives you just enough +information to let you know whether it's working or not. +""" +from django.contrib import admin + +from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin + +from ..publishing.models import PublishableEntity +from .models import ( + AddToCollection, + Collection, + CollectionChangeSet, + CollectionPublishableEntity, + PublishEntity, + RemoveFromCollection +) + + +class CollectionChangeSetTabularInline(admin.TabularInline): + model = CollectionChangeSet + fields = ["version_num", "created"] + readonly_fields = ["version_num", "created"] + + +class PublishableEntityInline(admin.TabularInline): + model = Collection.publishable_entities.through + + +@admin.register(Collection) +class CollectionAdmin(ReadOnlyModelAdmin): + """ + Read-only admin for LearningPackage model + """ + fields = ["learning_package", "key", "title", "uuid", "created", "created_by"] + readonly_fields = ["learning_package", "key", "title", "uuid", "created", "created_by"] + list_display = ["learning_package", "key", "title", "uuid", "created", "created_by"] + search_fields = ["key", "title", "uuid"] + list_filter = ["learning_package"] + + inlines = [ + CollectionChangeSetTabularInline, + PublishableEntityInline, + ] + + +class AddToCollectionTabularInline(admin.TabularInline): + model = AddToCollection + + +class RemoveFromCollectionTabularInline(admin.TabularInline): + model = RemoveFromCollection + + +class PublishEntityTabularInline(admin.TabularInline): + model = PublishEntity + + +@admin.register(CollectionChangeSet) +class CollectionChangeSetAdmin(ReadOnlyModelAdmin): + inlines = [ + AddToCollectionTabularInline, + RemoveFromCollectionTabularInline, + PublishEntityTabularInline, + ] diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index a28cd2e6..337f0d2b 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -3,7 +3,10 @@ This API sacrifices some power in order to try to keep things simpler. For instance, ``add_to_collection`` and ``remove_from_collection`` each create a -new CollectionChangeSet when they're invoked. The data model supports having +new CollectionChangeSet when they're invoked. The data model supports doing +multiple operations at the same time–in theory you could publish some entities, +add some entities, and delete them in the same operation. But then we could +bring """ from __future__ import annotations @@ -55,7 +58,10 @@ def create_collection( def get_collection(collection_id: int) -> Collection: - pass + """ + Get a Collection by ID. + """ + return Collection.objects.get(id=collection_id) def add_to_collection( @@ -80,23 +86,40 @@ def add_to_collection( # Add the joins so we can efficiently query what the published versions are. qset = pub_entities_qset.select_related('published', 'published__version') - adds = ( - AddToCollection( - change_set=change_set, - entity=pub_ent, - published_version=pub_ent.published.version, + + # We're going to build our relationship models into big lists and then use + # bulk_create on them in order to reduce the number of queries required for + # this as the size of Collections grow. This should be reasonable for up to + # hundreds of PublishableEntities, but we may have to look into more complex + # chunking and async processing if we go beyond that. + change_set_adds = [] + collection_pub_entities = [] + for pub_ent in qset.all(): + if hasattr(pub_ent, 'published'): + published_version = pub_ent.published + else: + published_version = None + + # These will be associated with the ChangeSet for history tracking. + change_set_adds.append( + AddToCollection( + change_set=change_set, + entity=pub_ent, + published_version=published_version, + ) ) - for pub_ent in qset.all() - ) - # bulk_create will cast ``adds`` into a list and fully evaluate it. - # This will likely be okay to start, and if Collections stay in the - # hundreds. When it gets bigger, we might want to switch to - # something fancier that only reads in chunks. - # - # We don't need to use atomic() here because bulk_create already works - # atomically. If we got fancier with processing these in chunks, we'd need - # to wrap it in an atomic() context. - AddToCollection.objects.bulk_create(adds) + + # These are the direct Collection <-> PublishableEntity M2M mappings + collection_pub_entities.append( + CollectionPublishableEntity( + collection_id=collection_id, + entity_id=pub_ent.id, + ) + ) + + with atomic(): + AddToCollection.objects.bulk_create(change_set_adds) + CollectionPublishableEntity.objects.bulk_create(collection_pub_entities) return change_set diff --git a/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py b/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py new file mode 100644 index 00000000..be7570c7 --- /dev/null +++ b/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py @@ -0,0 +1,25 @@ +# Generated by Django 3.2.23 on 2023-12-11 18:13 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0002_alter_fk_on_delete'), + ('oel_collections', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='collection', + name='publishable_entities', + field=models.ManyToManyField(related_name='collection', through='oel_collections.CollectionPublishableEntity', to='oel_publishing.PublishableEntity'), + ), + migrations.AlterField( + model_name='collectionpublishableentity', + name='collection', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection'), + ), + ] diff --git a/openedx_learning/core/collections/models.py b/openedx_learning/core/collections/models.py index d6f4b058..fc10129a 100644 --- a/openedx_learning/core/collections/models.py +++ b/openedx_learning/core/collections/models.py @@ -101,6 +101,14 @@ class Collection(models.Model): blank=True, ) + publishable_entities: models.ManyToManyField[ + PublishableEntity, CollectionPublishableEntity + ] = models.ManyToManyField( + PublishableEntity, + through="CollectionPublishableEntity", + related_name="collections", + ) + class Meta: constraints = [ # The version_num must be unique for any given Collection. @@ -113,6 +121,9 @@ class Meta: ) ] + def __str__(self): + return f"Collection {self.key} ({self.uuid})" + class CollectionPublishableEntity(models.Model): """ @@ -121,7 +132,6 @@ class CollectionPublishableEntity(models.Model): collection = models.ForeignKey( Collection, on_delete=models.CASCADE, - related_name="publishable_entities" ) entity = models.ForeignKey( PublishableEntity, @@ -152,7 +162,9 @@ class CollectionChangeSet(models.Model): 2. PublishableEntities are removed (RemoveFromCollection) 3. The published version of a PublishableEntity changes (PublishLogRecord) """ - collection = models.ForeignKey(Collection, on_delete=models.CASCADE) + collection = models.ForeignKey( + Collection, on_delete=models.CASCADE, related_name="change_sets" + ) version_num = models.PositiveBigIntegerField( null=False, validators=[MinValueValidator(1)], @@ -205,6 +217,9 @@ class Meta: ) ] + def __str__(self): + return f"Add {self.entity_id} in changeset {self.change_set_id}" + class RemoveFromCollection(models.Model): """ diff --git a/openedx_learning/core/publishing/api.py b/openedx_learning/core/publishing/api.py index ab4099ad..21a6a1c5 100644 --- a/openedx_learning/core/publishing/api.py +++ b/openedx_learning/core/publishing/api.py @@ -112,7 +112,7 @@ def publish_all_drafts( message="", published_at: datetime | None = None, published_by: int | None = None -): +) -> None: """ Publish everything that is a Draft and is not already published. """ diff --git a/openedx_learning/core/publishing/models.py b/openedx_learning/core/publishing/models.py index 1eb6803c..a2f482e0 100644 --- a/openedx_learning/core/publishing/models.py +++ b/openedx_learning/core/publishing/models.py @@ -140,7 +140,11 @@ class PublishableEntity(models.Model): """ uuid = immutable_uuid_field() - learning_package = models.ForeignKey(LearningPackage, on_delete=models.CASCADE) + learning_package = models.ForeignKey( + LearningPackage, + on_delete=models.CASCADE, + related_name="publishable_entities", + ) key = key_field() created = manual_date_time_field() created_by = models.ForeignKey( diff --git a/tests/openedx_learning/core/collections/test_api.py b/tests/openedx_learning/core/collections/test_api.py index fb2c3380..0b5e7564 100644 --- a/tests/openedx_learning/core/collections/test_api.py +++ b/tests/openedx_learning/core/collections/test_api.py @@ -62,7 +62,7 @@ def setUpTestData(cls) -> None: created_by=None, ) - def test_bootstrap_only_published(self) -> None: # Note: we must specify '-> None' to opt in to type checking + def test_bootstrap_only_published(self) -> None: """ Normal flow with no errors. """ @@ -72,5 +72,11 @@ def test_bootstrap_only_published(self) -> None: # Note: we must specify '-> No title="Test Bootstrap 🦃 Only Published Collection", pub_entities_qset=PublishableEntity.objects.filter( id=self.published_entity.id - ) + ), + created=self.created, ) + print(list(collection.change_sets.all())) + + entities = list(collection.publishable_entities.all()) + print(entities) + assert len(entities) == 1 From 60c47a67c77f8565576375e307bb82edc4756a8e Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Mon, 11 Dec 2023 23:32:57 -0500 Subject: [PATCH 4/9] fix: transactions are more useful when you group all the right statements inside --- openedx_learning/core/collections/api.py | 72 ++++++++++++------------ 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index 337f0d2b..659afcfb 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -78,46 +78,46 @@ def add_to_collection( else: next_version_num = 1 - change_set = CollectionChangeSet.objects.create( - collection_id=collection_id, - version_num=next_version_num, - created=created, - ) - - # Add the joins so we can efficiently query what the published versions are. - qset = pub_entities_qset.select_related('published', 'published__version') - - # We're going to build our relationship models into big lists and then use - # bulk_create on them in order to reduce the number of queries required for - # this as the size of Collections grow. This should be reasonable for up to - # hundreds of PublishableEntities, but we may have to look into more complex - # chunking and async processing if we go beyond that. - change_set_adds = [] - collection_pub_entities = [] - for pub_ent in qset.all(): - if hasattr(pub_ent, 'published'): - published_version = pub_ent.published - else: - published_version = None - - # These will be associated with the ChangeSet for history tracking. - change_set_adds.append( - AddToCollection( - change_set=change_set, - entity=pub_ent, - published_version=published_version, - ) + with atomic(): + change_set = CollectionChangeSet.objects.create( + collection_id=collection_id, + version_num=next_version_num, + created=created, ) - # These are the direct Collection <-> PublishableEntity M2M mappings - collection_pub_entities.append( - CollectionPublishableEntity( - collection_id=collection_id, - entity_id=pub_ent.id, + # Add the joins so we can efficiently query what the published versions are. + qset = pub_entities_qset.select_related('published', 'published__version') + + # We're going to build our relationship models into big lists and then use + # bulk_create on them in order to reduce the number of queries required for + # this as the size of Collections grow. This should be reasonable for up to + # hundreds of PublishableEntities, but we may have to look into more complex + # chunking and async processing if we go beyond that. + change_set_adds = [] + collection_pub_entities = [] + for pub_ent in qset.all(): + if hasattr(pub_ent, 'published'): + published_version = pub_ent.published + else: + published_version = None + + # These will be associated with the ChangeSet for history tracking. + change_set_adds.append( + AddToCollection( + change_set=change_set, + entity=pub_ent, + published_version=published_version, + ) + ) + + # These are the direct Collection <-> PublishableEntity M2M mappings + collection_pub_entities.append( + CollectionPublishableEntity( + collection_id=collection_id, + entity_id=pub_ent.id, + ) ) - ) - with atomic(): AddToCollection.objects.bulk_create(change_set_adds) CollectionPublishableEntity.objects.bulk_create(collection_pub_entities) From ead38d831fc4642d6b78a1406535d277eac63d41 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Tue, 12 Dec 2023 11:23:53 -0500 Subject: [PATCH 5/9] fix: wrong relation --- openedx_learning/core/collections/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index 659afcfb..97f2f889 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -97,7 +97,7 @@ def add_to_collection( collection_pub_entities = [] for pub_ent in qset.all(): if hasattr(pub_ent, 'published'): - published_version = pub_ent.published + published_version = pub_ent.published.version else: published_version = None From 3e96db44686ab4d3f5e26a0d086080a6a5b7505b Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Thu, 14 Dec 2023 15:55:34 -0500 Subject: [PATCH 6/9] fix: show kyle --- openedx_learning/core/collections/api.py | 1 + openedx_learning/core/collections/apps.py | 9 +++-- openedx_learning/core/collections/handlers.py | 10 ++++++ openedx_learning/core/publishing/api.py | 9 +++++ openedx_learning/core/publishing/signals.py | 36 +++++++++++++++++++ 5 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 openedx_learning/core/collections/handlers.py create mode 100644 openedx_learning/core/publishing/signals.py diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index 97f2f889..7812649c 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -20,6 +20,7 @@ Collection, CollectionPublishableEntity, CollectionChangeSet, AddToCollection, RemoveFromCollection, PublishEntity, ) +from ..publishing.signals import PUBLISHED_PRE_COMMIT def create_collection( diff --git a/openedx_learning/core/collections/apps.py b/openedx_learning/core/collections/apps.py index 3d7e1038..34399c98 100644 --- a/openedx_learning/core/collections/apps.py +++ b/openedx_learning/core/collections/apps.py @@ -18,7 +18,10 @@ def ready(self): """ Register the ComponentCollection, ComponentCollectionVersion relation. """ - #from ..publishing.api import register_content_models # pylint: disable=import-outside-toplevel - #from .models import ComponentCollection, ComponentCollectionVersion # pylint: disable=import-outside-toplevel + from ..publishing.signals import PUBLISHED_PRE_COMMIT + from . import handlers - #register_content_models(ComponentCollection, ComponentCollectionVersion) + PUBLISHED_PRE_COMMIT.connect( + handlers.update_collections_from_publish, + dispatch_uid="oel__collections__update_collections_from_publish", + ) diff --git a/openedx_learning/core/collections/handlers.py b/openedx_learning/core/collections/handlers.py new file mode 100644 index 00000000..f7b12823 --- /dev/null +++ b/openedx_learning/core/collections/handlers.py @@ -0,0 +1,10 @@ +""" +Signal handlers for Collections. + +This is to catch updates when things are published. +""" + +def update_collections_from_publish(sender, publish_log=None, **kwargs): + collections_to_update = publish_log + print(publish_log) + diff --git a/openedx_learning/core/publishing/api.py b/openedx_learning/core/publishing/api.py index 21a6a1c5..2f7cec08 100644 --- a/openedx_learning/core/publishing/api.py +++ b/openedx_learning/core/publishing/api.py @@ -22,6 +22,7 @@ PublishLog, PublishLogRecord, ) +from .signals import PUBLISHED_PRE_COMMIT def create_learning_package( @@ -177,6 +178,14 @@ def publish_from_drafts( }, ) + # We are intentionally using ``send`` instead of ``send_robust`` here, + # because we want to allow listeners to throw an exception and rollback + # the publish transaction if necessary. If you replace this with more + # sophisticated error catching and reporting later, please remember that + # exceptions should generally be caught outside of the atomic() block: + # https://docs.djangoproject.com/en/4.2/topics/db/transactions/#controlling-transactions-explicitly + PUBLISHED_PRE_COMMIT.send(PublishLogRecord, publish_log=publish_log) + return publish_log diff --git a/openedx_learning/core/publishing/signals.py b/openedx_learning/core/publishing/signals.py new file mode 100644 index 00000000..d76f01b8 --- /dev/null +++ b/openedx_learning/core/publishing/signals.py @@ -0,0 +1,36 @@ +""" +Publishing related, process-internal signals. +""" +from django.dispatch import Signal + + +# The PUBLISHED_PRE_COMMIT is sent: +# +# * AFTER a set of PublishableEntity models has been published–i.e. its entries +# in the publishing.models.Published model have been updated to new versions +# and a PublishLog entry has been created with associated PublishLogRecords. +# * BEFORE those publishing changes are committed to the database. +# +# This is the signal that you catch if you need to take actions when content is +# published, and failing those actions should cancel/rollback the publish. One +# case in which you might want to do this is if you have data models that need +# to track and add supplemental data to every PublishLog entry. A transient +# failure that occurs during this process might introduce data inconsistencies +# that we want to avoid. It's better to fail the entire request and force the +# system (or user) to try again. +# +# Do NOT try to catch this signal to launch a celery task. It is sent before +# the publishing model additions have been committed to the database, so they +# will not be accessible from another process. It may look like it's working +# because your celery processes are running in-process during development, or +# because delays in celery process launch allow the original request to commit +# before the celery task actually tries to run its query. But this kind of usage +# will cause issues in production environments at some point. +# +# Signal handlers should be simple and fast. Handlers should not do external web +# service calls, or anything else that is prone to unpredictable latency. +# +# providing_args=[ +# 'publish_log', # instance of saved PublishLog +# ] +PUBLISHED_PRE_COMMIT = Signal() From 5d91d96ab3be975dcd25d33a6e4ddf18eb125b87 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 5 Jan 2024 10:38:48 -0500 Subject: [PATCH 7/9] feat: collections app --- openedx_learning/core/collections/admin.py | 20 ++-- openedx_learning/core/collections/api.py | 111 ++++++++++++------ openedx_learning/core/collections/handlers.py | 27 ++++- .../collections/migrations/0001_initial.py | 71 ++++++----- .../migrations/0002_auto_20231211_1813.py | 25 ---- openedx_learning/core/collections/models.py | 50 +++++--- openedx_learning/core/publishing/models.py | 6 +- .../core/collections/test_api.py | 3 - 8 files changed, 188 insertions(+), 125 deletions(-) delete mode 100644 openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py diff --git a/openedx_learning/core/collections/admin.py b/openedx_learning/core/collections/admin.py index ee756657..40b7f7b3 100644 --- a/openedx_learning/core/collections/admin.py +++ b/openedx_learning/core/collections/admin.py @@ -10,23 +10,23 @@ from ..publishing.models import PublishableEntity from .models import ( - AddToCollection, + AddEntity, Collection, - CollectionChangeSet, + ChangeSet, CollectionPublishableEntity, - PublishEntity, - RemoveFromCollection + UpdateEntities, + RemoveEntity ) class CollectionChangeSetTabularInline(admin.TabularInline): - model = CollectionChangeSet + model = ChangeSet fields = ["version_num", "created"] readonly_fields = ["version_num", "created"] class PublishableEntityInline(admin.TabularInline): - model = Collection.publishable_entities.through + model = Collection.entities.through @admin.register(Collection) @@ -47,18 +47,18 @@ class CollectionAdmin(ReadOnlyModelAdmin): class AddToCollectionTabularInline(admin.TabularInline): - model = AddToCollection + model = AddEntity class RemoveFromCollectionTabularInline(admin.TabularInline): - model = RemoveFromCollection + model = RemoveEntity class PublishEntityTabularInline(admin.TabularInline): - model = PublishEntity + model = UpdateEntities -@admin.register(CollectionChangeSet) +@admin.register(ChangeSet) class CollectionChangeSetAdmin(ReadOnlyModelAdmin): inlines = [ AddToCollectionTabularInline, diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index 7812649c..b607464c 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -1,13 +1,5 @@ """ API to manipulate Collections. - -This API sacrifices some power in order to try to keep things simpler. For -instance, ``add_to_collection`` and ``remove_from_collection`` each create a -new CollectionChangeSet when they're invoked. The data model supports doing -multiple operations at the same time–in theory you could publish some entities, -add some entities, and delete them in the same operation. But then we could -bring - """ from __future__ import annotations @@ -16,9 +8,10 @@ from django.db.models import F, QuerySet from django.db.transaction import atomic +from ..publishing.models import PublishableEntity from .models import ( - Collection, CollectionPublishableEntity, CollectionChangeSet, - AddToCollection, RemoveFromCollection, PublishEntity, + Collection, CollectionPublishableEntity, ChangeSet, + AddEntity, RemoveEntity, UpdateEntities, ) from ..publishing.signals import PUBLISHED_PRE_COMMIT @@ -27,9 +20,9 @@ def create_collection( learning_package_id: int, key: str, title: str, - pub_entities_qset: QuerySet | None = None, + pub_entities_qset: QuerySet = PublishableEntity.objects.none, # default to empty qset created: datetime | None = None, - created_by: int | None = None, + created_by_id: int | None = None, ) -> Collection: """ Create a Collection and populate with a QuerySet of PublishableEntity. @@ -43,57 +36,89 @@ def create_collection( key=key, title=title, created=created, - created_by=created_by, + created_by_id=created_by_id, ) collection.full_clean() collection.save() # add_to_collection is what creates our initial CollectionChangeSet, so # we always call it, even if we're just creating an empty Collection. - if pub_entities_qset is None: - pub_entities_qset = PublishEntity.objects.none - add_to_collection(collection.id, pub_entities_qset, created=created) return collection - def get_collection(collection_id: int) -> Collection: """ Get a Collection by ID. """ return Collection.objects.get(id=collection_id) +def get_collections_matching_entities(entity_ids_qs: QuerySet) -> QuerySet: + """ + Get a QuerySet of Collections that have any of these PublishableEntities. + """ + return Collection.objects.filter(publishable_entities__in=entity_ids_qs).distinct() + +def get_last_change_set(collection_id: int) -> ChangeSet | None: + """ + Get the most recent ChangeSet for this Collection. + + This may return None if there is no matching ChangeSet (i.e. this is a newly + created Collection). + """ + return ChangeSet.objects \ + .filter(collection_id=collection_id) \ + .order_by('-version_num') \ + .first() + +def get_next_version_num(collection_id: int) -> int: + last_change_set = get_last_change_set(collection_id=collection_id) + return last_change_set.version_num + 1 if last_change_set else 1 + + +def update_collection_with_publish_log(collection_id: int, publish_log) -> ChangeSet: + change_set = create_next_change_set(collection_id, publish_log.published_at) + UpdateEntities.objects.create(change_set=change_set, publish_log=publish_log) + return change_set + + +def create_next_change_set(collection_id: int, created: datetime | None) -> ChangeSet: + return ChangeSet.objects.create( + collection_id=collection_id, + version_num=get_next_version_num(collection_id), + created=created, + ) + +def create_update_entities(): + pass + + def add_to_collection( collection_id: int, pub_entities_qset: QuerySet, created: datetime | None = None -)-> CollectionChangeSet: - last_change_set = CollectionChangeSet.objects \ - .filter(collection_id=collection_id) \ - .order_by('-version_num') \ - .first() - if last_change_set: - next_version_num = last_change_set.version_num + 1 - else: - next_version_num = 1 - +)-> ChangeSet: + """ + Add a QuerySet of PublishableEntities to a Collection. + """ + next_version_num = get_next_version_num(collection_id) with atomic(): - change_set = CollectionChangeSet.objects.create( + change_set = ChangeSet.objects.create( collection_id=collection_id, version_num=next_version_num, created=created, ) - # Add the joins so we can efficiently query what the published versions are. + # Add the joins so we can efficiently query the published versions. qset = pub_entities_qset.select_related('published', 'published__version') - # We're going to build our relationship models into big lists and then use - # bulk_create on them in order to reduce the number of queries required for - # this as the size of Collections grow. This should be reasonable for up to - # hundreds of PublishableEntities, but we may have to look into more complex - # chunking and async processing if we go beyond that. + # We're going to build our relationship models into big lists and then + # use bulk_create on them in order to reduce the number of queries + # required for this as the size of Collections grow. This should be + # reasonable for up to hundreds of PublishableEntities, but we may have + # to look into more complex chunking and async processing if we go + # beyond that. change_set_adds = [] collection_pub_entities = [] for pub_ent in qset.all(): @@ -104,7 +129,7 @@ def add_to_collection( # These will be associated with the ChangeSet for history tracking. change_set_adds.append( - AddToCollection( + AddEntity( change_set=change_set, entity=pub_ent, published_version=published_version, @@ -119,12 +144,22 @@ def add_to_collection( ) ) - AddToCollection.objects.bulk_create(change_set_adds) + AddEntity.objects.bulk_create(change_set_adds) CollectionPublishableEntity.objects.bulk_create(collection_pub_entities) return change_set -def remove_from_collection(collection_id: int, pub_entities_qset: QuerySet) -> CollectionChangeSet: - pass +def remove_from_collection( + collection_id: int, + pub_entities_qset: QuerySet, + created: datetime | None = None +) -> ChangeSet: + next_version_num = get_next_version_num(collection_id) + with atomic(): + change_set = ChangeSet.objects.create( + collection_id=collection_id, + version_num=next_version_num, + created=created, + ) diff --git a/openedx_learning/core/collections/handlers.py b/openedx_learning/core/collections/handlers.py index f7b12823..f56d7262 100644 --- a/openedx_learning/core/collections/handlers.py +++ b/openedx_learning/core/collections/handlers.py @@ -1,10 +1,29 @@ """ Signal handlers for Collections. -This is to catch updates when things are published. +This is to catch updates when things are published. The reason that we use +signals to do this kind of updating is because the ``publishing`` app exists at +a lower layer than the ``collections`` app, i.e. ``publishing`` should not know +that ``collections`` exists. If ``publishing`` updated Collections directly, it +would introduce a circular dependency. """ +from django.db.transaction import atomic + +from .api import ( + get_collections_matching_entities, + update_collection_with_publish_log, +) -def update_collections_from_publish(sender, publish_log=None, **kwargs): - collections_to_update = publish_log - print(publish_log) +def update_collections_from_publish(sender, publish_log=None, **kwargs): + """ + Update all Collections affected by the publish described by publish_log. + """ + # Find all Collections that had at least one PublishableEntity that was + # published in this PublishLog. + affected_collections = get_collections_matching_entities( + publish_log.records.values('entity__id') + ) + with atomic(): + for collection in affected_collections: + update_collection_with_publish_log(collection.id, publish_log) diff --git a/openedx_learning/core/collections/migrations/0001_initial.py b/openedx_learning/core/collections/migrations/0001_initial.py index 390df47a..cde0b8c5 100644 --- a/openedx_learning/core/collections/migrations/0001_initial.py +++ b/openedx_learning/core/collections/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2023-12-07 01:38 +# Generated by Django 3.2.23 on 2024-01-05 15:21 from django.conf import settings import django.core.validators @@ -14,87 +14,100 @@ class Migration(migrations.Migration): initial = True dependencies = [ - ('oel_publishing', '0002_alter_fk_on_delete'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('oel_publishing', '0003_auto_20240105_1521'), ] operations = [ migrations.CreateModel( - name='Collection', + name='ChangeSet', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), - ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), - ('title', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=500)), + ('version_num', models.PositiveBigIntegerField(validators=[django.core.validators.MinValueValidator(1)])), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), - ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), - ('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')), ], ), migrations.CreateModel( - name='CollectionChangeSet', + name='Collection', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('version_num', models.PositiveBigIntegerField(validators=[django.core.validators.MinValueValidator(1)])), + ('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')), + ('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)), + ('title', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, default='', max_length=500)), ('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])), - ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection')), + ('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), ], ), migrations.CreateModel( - name='RemoveFromCollection', + name='UpdateEntities', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), - ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.changeset')), + ('publish_log', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishlog')), ], ), migrations.CreateModel( - name='PublishEntity', + name='RemoveEntity', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), - ('log_record', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishlogrecord')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.changeset')), + ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), ], ), migrations.CreateModel( name='CollectionPublishableEntity', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='publishable_entities', to='oel_collections.collection')), + ('collection', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection')), ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), ], ), + migrations.AddField( + model_name='collection', + name='entities', + field=models.ManyToManyField(related_name='collections', through='oel_collections.CollectionPublishableEntity', to='oel_publishing.PublishableEntity'), + ), + migrations.AddField( + model_name='collection', + name='learning_package', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage'), + ), + migrations.AddField( + model_name='changeset', + name='collection', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='change_sets', to='oel_collections.collection'), + ), migrations.CreateModel( - name='AddToCollection', + name='AddEntity', fields=[ ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collectionchangeset')), + ('change_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.changeset')), ('entity', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentity')), ('published_version', models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.publishableentityversion')), ], ), migrations.AddConstraint( - model_name='removefromcollection', - constraint=models.UniqueConstraint(fields=('change_set', 'entity'), name='oel_collections_refc_uniq_cs_ent'), + model_name='updateentities', + constraint=models.UniqueConstraint(fields=('change_set', 'publish_log'), name='oel_collections_pe_uniq_cs_pl'), ), migrations.AddConstraint( - model_name='publishentity', - constraint=models.UniqueConstraint(fields=('change_set', 'log_record'), name='oel_collections_pe_uniq_cs_lr'), + model_name='removeentity', + constraint=models.UniqueConstraint(fields=('change_set', 'entity'), name='oel_collections_refc_uniq_cs_ent'), ), migrations.AddConstraint( model_name='collectionpublishableentity', constraint=models.UniqueConstraint(fields=('collection', 'entity'), name='oel_collections_cpe_uniq_col_ent'), ), - migrations.AddConstraint( - model_name='collectionchangeset', - constraint=models.UniqueConstraint(fields=('collection', 'version_num'), name='oel_collections_ccs_uniq_col_vn'), - ), migrations.AddConstraint( model_name='collection', constraint=models.UniqueConstraint(fields=('learning_package', 'key'), name='oel_collections_col_uniq_lp_key'), ), migrations.AddConstraint( - model_name='addtocollection', + model_name='changeset', + constraint=models.UniqueConstraint(fields=('collection', 'version_num'), name='oel_collections_ccs_uniq_col_vn'), + ), + migrations.AddConstraint( + model_name='addentity', constraint=models.UniqueConstraint(fields=('change_set', 'entity'), name='oel_collections_aetc_uniq_cs_ent'), ), ] diff --git a/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py b/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py deleted file mode 100644 index be7570c7..00000000 --- a/openedx_learning/core/collections/migrations/0002_auto_20231211_1813.py +++ /dev/null @@ -1,25 +0,0 @@ -# Generated by Django 3.2.23 on 2023-12-11 18:13 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('oel_publishing', '0002_alter_fk_on_delete'), - ('oel_collections', '0001_initial'), - ] - - operations = [ - migrations.AddField( - model_name='collection', - name='publishable_entities', - field=models.ManyToManyField(related_name='collection', through='oel_collections.CollectionPublishableEntity', to='oel_publishing.PublishableEntity'), - ), - migrations.AlterField( - model_name='collectionpublishableentity', - name='collection', - field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_collections.collection'), - ), - ] diff --git a/openedx_learning/core/collections/models.py b/openedx_learning/core/collections/models.py index fc10129a..30a52889 100644 --- a/openedx_learning/core/collections/models.py +++ b/openedx_learning/core/collections/models.py @@ -73,7 +73,7 @@ LearningPackage, PublishableEntity, PublishableEntityVersion, - PublishLogRecord, + PublishLog, ) from openedx_learning.lib.fields import ( @@ -101,9 +101,7 @@ class Collection(models.Model): blank=True, ) - publishable_entities: models.ManyToManyField[ - PublishableEntity, CollectionPublishableEntity - ] = models.ManyToManyField( + entities = models.ManyToManyField( PublishableEntity, through="CollectionPublishableEntity", related_name="collections", @@ -152,7 +150,7 @@ class Meta: ] -class CollectionChangeSet(models.Model): +class ChangeSet(models.Model): """ Represents an atomic set of changes to a Collection. @@ -161,6 +159,9 @@ class CollectionChangeSet(models.Model): 1. PublishableEntities are added (AddToCollection) 2. PublishableEntities are removed (RemoveFromCollection) 3. The published version of a PublishableEntity changes (PublishLogRecord) + + TODO: Does this need a reverse index on collection -version_num, since we're + so often reaching for the most recent? """ collection = models.ForeignKey( Collection, on_delete=models.CASCADE, related_name="change_sets" @@ -184,7 +185,7 @@ class Meta: ] -class AddToCollection(models.Model): +class AddEntity(models.Model): """ A record for when a PublishableEntity is added to a Collection. @@ -195,7 +196,7 @@ class AddToCollection(models.Model): Note that something may be removed from a Collection and then re-added at a later time. """ - change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) + change_set = models.ForeignKey(ChangeSet, on_delete=models.CASCADE) entity = models.ForeignKey(PublishableEntity, on_delete=models.CASCADE) # We want to capture the published version of the entity at the time it's @@ -221,14 +222,14 @@ def __str__(self): return f"Add {self.entity_id} in changeset {self.change_set_id}" -class RemoveFromCollection(models.Model): +class RemoveEntity(models.Model): """ A record for when a PublishableEntity is removed from a Collection. Note that something may be removed from a Collection, re-added at a later time, and then removed again. """ - change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) + change_set = models.ForeignKey(ChangeSet, on_delete=models.CASCADE) entity = models.ForeignKey(PublishableEntity, on_delete=models.CASCADE) class Meta: @@ -244,12 +245,31 @@ class Meta: ] -class PublishEntity(models.Model): +class UpdateEntities(models.Model): """ - A record for when the published version of a PublishableEntity changes. + A record for when the published version of PublishableEntites changes. + + We store a reference to the PublishLog where the publishes happen instead of + storing each PublishLogRecord because many PublishableEntities may get + published at the same time, and they may exist in many Collections. That + would mean that we'd have to create (Collections X PublishableEntities) rows + worth of UpdateEntities for the Collections and PublishableEntities that + were affected. By tying it to the PublishLog, we at least reduce that to the + number of Collections affected. + + If you need to find out which things were published, you can query for the + intersection of the PublishableEntities from the PublishLogRecords tied to + the PublishLog and the PublishableEntities in the Collection. This isn't + completely accurate, since it would not return results for any + PublishableEntities that were removed from the Collection between the time + that the publish happened and you did the query. But this is not a query + pattern that we're optimizing for. It's technically possible to still + extract this information by replaying all the RemoveEntity entries between + the UpdateEntities and the time the query is being done, but that's not an + expected use case. """ - change_set = models.ForeignKey(CollectionChangeSet, on_delete=models.CASCADE) - log_record = models.ForeignKey(PublishLogRecord, on_delete=models.CASCADE) + change_set = models.ForeignKey(ChangeSet, on_delete=models.CASCADE) + publish_log = models.ForeignKey(PublishLog, on_delete=models.CASCADE) class Meta: constraints = [ @@ -258,8 +278,8 @@ class Meta: models.UniqueConstraint( fields=[ "change_set", - "log_record", + "publish_log", ], - name="oel_collections_pe_uniq_cs_lr", + name="oel_collections_pe_uniq_cs_pl", ) ] diff --git a/openedx_learning/core/publishing/models.py b/openedx_learning/core/publishing/models.py index a2f482e0..936854d4 100644 --- a/openedx_learning/core/publishing/models.py +++ b/openedx_learning/core/publishing/models.py @@ -366,7 +366,11 @@ class PublishLogRecord(models.Model): and ``new_version`` field values. """ - publish_log = models.ForeignKey(PublishLog, on_delete=models.CASCADE) + publish_log = models.ForeignKey( + PublishLog, + on_delete=models.CASCADE, + related_name="records", + ) entity = models.ForeignKey(PublishableEntity, on_delete=models.RESTRICT) old_version = models.ForeignKey( PublishableEntityVersion, diff --git a/tests/openedx_learning/core/collections/test_api.py b/tests/openedx_learning/core/collections/test_api.py index 0b5e7564..66ba8416 100644 --- a/tests/openedx_learning/core/collections/test_api.py +++ b/tests/openedx_learning/core/collections/test_api.py @@ -75,8 +75,5 @@ def test_bootstrap_only_published(self) -> None: ), created=self.created, ) - print(list(collection.change_sets.all())) - entities = list(collection.publishable_entities.all()) - print(entities) assert len(entities) == 1 From ef1a8abedcaa800a1c38ba69c848bb3adf477b11 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Fri, 5 Jan 2024 10:46:02 -0500 Subject: [PATCH 8/9] fix: migrations for collections --- .../collections/migrations/0001_initial.py | 4 ++-- .../0003_configure_related_names.py | 24 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 openedx_learning/core/publishing/migrations/0003_configure_related_names.py diff --git a/openedx_learning/core/collections/migrations/0001_initial.py b/openedx_learning/core/collections/migrations/0001_initial.py index cde0b8c5..18a624e9 100644 --- a/openedx_learning/core/collections/migrations/0001_initial.py +++ b/openedx_learning/core/collections/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.23 on 2024-01-05 15:21 +# Generated by Django 3.2.23 on 2024-01-05 15:42 from django.conf import settings import django.core.validators @@ -14,8 +14,8 @@ class Migration(migrations.Migration): initial = True dependencies = [ + ('oel_publishing', '0003_configure_related_names'), migrations.swappable_dependency(settings.AUTH_USER_MODEL), - ('oel_publishing', '0003_auto_20240105_1521'), ] operations = [ diff --git a/openedx_learning/core/publishing/migrations/0003_configure_related_names.py b/openedx_learning/core/publishing/migrations/0003_configure_related_names.py new file mode 100644 index 00000000..95217c89 --- /dev/null +++ b/openedx_learning/core/publishing/migrations/0003_configure_related_names.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.23 on 2024-01-05 15:39 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('oel_publishing', '0002_alter_fk_on_delete'), + ] + + operations = [ + migrations.AlterField( + model_name='publishableentity', + name='learning_package', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='publishable_entities', to='oel_publishing.learningpackage'), + ), + migrations.AlterField( + model_name='publishlogrecord', + name='publish_log', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='records', to='oel_publishing.publishlog'), + ), + ] From e27f8151e8c262d337e9a9eabd5fc1b8c9d217f5 Mon Sep 17 00:00:00 2001 From: David Ormsbee Date: Sun, 7 Jan 2024 10:27:52 -0500 Subject: [PATCH 9/9] fix: linter fixes --- openedx_learning/core/collections/admin.py | 2 -- openedx_learning/core/collections/api.py | 7 ++++--- tests/openedx_learning/core/collections/test_api.py | 2 -- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/openedx_learning/core/collections/admin.py b/openedx_learning/core/collections/admin.py index 40b7f7b3..e2762490 100644 --- a/openedx_learning/core/collections/admin.py +++ b/openedx_learning/core/collections/admin.py @@ -8,12 +8,10 @@ from openedx_learning.lib.admin_utils import ReadOnlyModelAdmin -from ..publishing.models import PublishableEntity from .models import ( AddEntity, Collection, ChangeSet, - CollectionPublishableEntity, UpdateEntities, RemoveEntity ) diff --git a/openedx_learning/core/collections/api.py b/openedx_learning/core/collections/api.py index b607464c..85635eec 100644 --- a/openedx_learning/core/collections/api.py +++ b/openedx_learning/core/collections/api.py @@ -5,15 +5,14 @@ from datetime import datetime, timezone -from django.db.models import F, QuerySet +from django.db.models import QuerySet from django.db.transaction import atomic from ..publishing.models import PublishableEntity from .models import ( Collection, CollectionPublishableEntity, ChangeSet, - AddEntity, RemoveEntity, UpdateEntities, + AddEntity, UpdateEntities, ) -from ..publishing.signals import PUBLISHED_PRE_COMMIT def create_collection( @@ -163,3 +162,5 @@ def remove_from_collection( version_num=next_version_num, created=created, ) + + return change_set diff --git a/tests/openedx_learning/core/collections/test_api.py b/tests/openedx_learning/core/collections/test_api.py index 66ba8416..c2a018b3 100644 --- a/tests/openedx_learning/core/collections/test_api.py +++ b/tests/openedx_learning/core/collections/test_api.py @@ -2,9 +2,7 @@ Tests of the Collection app's python API """ from datetime import datetime, timezone -from uuid import UUID -from django.core.exceptions import ValidationError from openedx_learning.core.collections import api as collections_api from openedx_learning.core.publishing import api as publishing_api