-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
EREGCSC-2767 Fix grouping post-save hook (#1440)
* fix: grouping algorithm behavior in a multigroup or removal scenario * on group remove, use related_resources set to compute all affected resources & groups. - previously relied only on current groups assigned to the resource, was inaccurate. * fix group_parent field update behavior * fix unusual bug where queryset is emptied halfway through the algorithm * provide more comprehensive unit tests to ensure all behaviors work, including: - adding one or more group(s) - remove one or more group(s) - remove a group and assign a new group in the same save operation - ensure group parents are computed properly for all operations * linting * test: add test for modifying group directly * fix: one-time data migration for resources to assign group params
- Loading branch information
Showing
3 changed files
with
337 additions
and
83 deletions.
There are no files selected for viewing
58 changes: 58 additions & 0 deletions
58
solution/backend/resources/migrations/0006_recompute_groups.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
from django.db import migrations, models | ||
from django.db.models import Value, Q | ||
from django.contrib.postgres.aggregates import ArrayAgg | ||
|
||
|
||
TIMEOUT_MINUTES = 10 | ||
|
||
|
||
def generate_related(apps, schema_editor): | ||
schema_editor.execute(f"SET LOCAL statement_timeout TO {TIMEOUT_MINUTES * 60000};") | ||
AbstractResource = apps.get_model("resources", "AbstractResource") | ||
ResourceGroup = apps.get_model("resources", "ResourceGroup") | ||
|
||
for resource in AbstractResource.objects.all(): | ||
new_groups = resource.resource_groups.all() | ||
q = Q(pk__in=new_groups) | Q(resources__in=resource.related_resources.all()) | ||
all_groups = list(ResourceGroup.objects.filter(q).distinct().values_list("pk", flat=True)) | ||
all_resources = list(AbstractResource.objects.filter(resource_groups__in=all_groups).distinct().values_list("pk", flat=True)) | ||
|
||
# Set the parent of each affected group (most recent by date at the top of each group's hierarchy) | ||
AbstractResource.objects.filter(pk__in=all_resources).update(group_parent=False) | ||
for group in ResourceGroup.objects.filter(pk__in=all_groups): | ||
group.resources.filter(pk=group.resources.order_by("-date").first().pk).update(group_parent=True) | ||
|
||
# Compute the related resources, citations, categories, and subjects | ||
# Except for related_resources, these lists are inclusive of the resource we are processing | ||
if new_groups: | ||
related_resources = AbstractResource.objects.filter(resource_groups__in=new_groups) | ||
related_aggregates = related_resources.aggregate( | ||
all_citations=ArrayAgg("cfr_citations", distinct=True, filter=Q(cfr_citations__isnull=False), default=Value([])), | ||
all_categories=ArrayAgg("category", distinct=True, filter=Q(category__isnull=False), default=Value([])), | ||
all_subjects=ArrayAgg("subjects", distinct=True, filter=Q(subjects__isnull=False), default=Value([])), | ||
) | ||
related_resources = related_resources.exclude(pk=resource.pk) # Exclude the current resource for related_resources | ||
|
||
# Set related_X fields for this resource | ||
resource.related_resources.set(related_resources) | ||
resource.related_citations.set(related_aggregates["all_citations"]) | ||
resource.related_categories.set(related_aggregates["all_categories"]) | ||
resource.related_subjects.set(related_aggregates["all_subjects"]) | ||
else: | ||
# Set related_X to contain only the X objects in the individual resource. | ||
# We must do this for filtering purposes when `group_resources=true` on resource endpoints. | ||
resource.related_resources.clear() | ||
resource.related_citations.set(resource.cfr_citations.all()) | ||
resource.related_categories.set([resource.category] if resource.category else []) | ||
resource.related_subjects.set(resource.subjects.all()) | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('resources', '0005_reset_extract_url'), | ||
] | ||
|
||
operations = [ | ||
migrations.RunPython(generate_related, reverse_code=migrations.RunPython.noop), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.