Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/wagtail 3 upgrade #44

Merged
merged 12 commits into from
Sep 7, 2022
Merged
8 changes: 4 additions & 4 deletions constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ django-appconf==1.0.5
django-compressor==4.0
django-filter==21.1
django-libsass==0.9
django-modelcluster==5.3
django-modelcluster==6.0
django-storages==1.12.3
django-taggit==1.5.1
django-taggit==2.1.0
django-treebeard==4.5.1
djangorestframework==3.13.1
docutils==0.15.2
Expand Down Expand Up @@ -58,9 +58,9 @@ tqdm==4.64.0
typed-environment-configuration==0.1.4
typepy==0.2.5
urllib3==1.26.9
wagtail==2.15.5
wagtail==3.0.1
wagtail-markdown==0.8.0
wagtail-orderable==1.0.3
wagtail-orderable==1.0.4
webencodings==0.5.1
Willow==1.4.1
xlrd==2.0.1
Expand Down
88 changes: 44 additions & 44 deletions ietf/bibliography/management/commands/fix_BMI_page_links.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,104 @@
from django.core.management.base import BaseCommand, CommandError

import re

from tqdm import tqdm

from bs4 import BeautifulSoup
from django.core.management.base import BaseCommand, CommandError
from tqdm import tqdm
from wagtail.models import Page

from wagtail.core.models import Page

from ietf.standard.models import StandardPage
from ietf.blog.models import BlogPage
from ietf.iesg_statement.models import IESGStatementPage

from ietf.snippets.models import RFC, Charter, WorkingGroup
from ietf.standard.models import StandardPage


def change_links(page):

unfound_rfcs=set()
unfound_rfcs = set()

rfc_pattern = re.compile('^ *\\[? *RFC *(\\d{4}) *\\]? *$')
rfc_pattern = re.compile("^ *\\[? *RFC *(\\d{4}) *\\]? *$")

group_pattern1 = re.compile('\((\w+)\)\\xa0[Ww]orking [Gg]roup$')
group_pattern2 = re.compile(' *(\w+) +[Ww]orking [Gg]roup$')
group_pattern1 = re.compile("\((\w+)\)\\xa0[Ww]orking [Gg]roup$")
group_pattern2 = re.compile(" *(\w+) +[Ww]orking [Gg]roup$")

for fieldname in page.CONTENT_FIELD_MAP.keys():
field = getattr(page,fieldname)
field = getattr(page, fieldname)
for item in field.stream_data:
if not item['type'] in ('paragraph','raw_html'):
if not item["type"] in ("paragraph", "raw_html"):
continue
soup = BeautifulSoup(item['value'], 'html.parser')
for tag in soup.find_all('a',string=rfc_pattern):
if 'href' in tag.attrs:
soup = BeautifulSoup(item["value"], "html.parser")
for tag in soup.find_all("a", string=rfc_pattern):
if "href" in tag.attrs:
continue
rfc_number = rfc_pattern.match(tag.string)[1]
rfc = RFC.objects.filter(rfc=rfc_number).first()
if not rfc:
unfound_rfcs.add(rfc_number)
continue
tag['data-app'] = 'snippets'
tag['data-linktype'] = 'rfc'
tag['data-id'] = str(rfc.pk)
tag["data-app"] = "snippets"
tag["data-linktype"] = "rfc"
tag["data-id"] = str(rfc.pk)
for pattern in (group_pattern1, group_pattern2):
for tag in soup.find_all('a',string=pattern):
if 'href' in tag.attrs:
for tag in soup.find_all("a", string=pattern):
if "href" in tag.attrs:
continue
if 'linktype' in tag.attrs and tag['linktype']!='charter':
if "linktype" in tag.attrs and tag["linktype"] != "charter":
continue
if not pattern.search(tag.string):
print ("Search failure", tag.string, pattern)
print (page.url_path)
print("Search failure", tag.string, pattern)
print(page.url_path)
continue
acronym = pattern.search(tag.string)[1].lower()
charter = Charter.objects.filter(working_group__acronym=acronym).first()
charter = Charter.objects.filter(
working_group__acronym=acronym
).first()
if charter:
tag['data-app'] = 'snippets'
tag['data-linktype'] = 'charter'
tag['data-id'] = str(charter.pk)
tag["data-app"] = "snippets"
tag["data-linktype"] = "charter"
tag["data-id"] = str(charter.pk)
else:
group = WorkingGroup.objects.filter(acronym=acronym).first()
if group:
tag['data-app'] = 'snippets'
tag['data-linktype'] = 'workinggroup'
tag['data-id'] = str(group.pk)
tag["data-app"] = "snippets"
tag["data-linktype"] = "workinggroup"
tag["data-id"] = str(group.pk)
else:
print("Nothing found in ",str(tag))
print("Nothing found in ", str(tag))
print("Acronym was", acronym)
continue
item['value'] = str(soup)
item["value"] = str(soup)

all_the_fields = list(page.CONTENT_FIELD_MAP.keys())
all_the_fields.extend(list(page.CONTENT_FIELD_MAP.values()))
page.save(update_fields=all_the_fields)

return unfound_rfcs


class Command(BaseCommand):
help = 'Replace <a> tag parameters on pages using BibliographyMixin'
help = "Replace <a> tag parameters on pages using BibliographyMixin"

def add_arguments(self, parser):
parser.add_argument('url_paths', nargs='*', type=str)
parser.add_argument("url_paths", nargs="*", type=str)

def handle(self, *args, **options):

unfound_rfcs=set()
unfound_rfcs = set()

if options['url_paths']:
for url_path in options['url_paths']:
if options["url_paths"]:
for url_path in options["url_paths"]:
page = Page.objects.filter(url_path=url_path).first()
if not page:
CommandError('Page with path '+url_path+' not found')
CommandError("Page with path " + url_path + " not found")
unfound_rfcs.update(change_links(page.specific))
else:
print ('Standard Pages:')
print("Standard Pages:")
for page in tqdm(StandardPage.objects.all()):
unfound_rfcs.update(change_links(page))
print ('Blog Pages:')
print("Blog Pages:")
for page in tqdm(BlogPage.objects.all()):
unfound_rfcs.update(change_links(page))
print ('IESGStatement Pages:')
print("IESGStatement Pages:")
for page in tqdm(IESGStatementPage.objects.all()):
unfound_rfcs.update(change_links(page))
if unfound_rfcs:
print ("Unfound RFCs", unfound_rfcs)
print("Unfound RFCs", unfound_rfcs)
126 changes: 67 additions & 59 deletions ietf/bibliography/models.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from bs4 import BeautifulSoup, NavigableString

from django.template import TemplateDoesNotExist
from django.template.loader import get_template
from django.apps import apps
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.db import models
from django.apps import apps
from django.core.exceptions import ObjectDoesNotExist

from wagtail.core.models import Page
from django.db import models
from django.template import TemplateDoesNotExist
from django.template.loader import get_template
from wagtail.models import Page

from ietf.utils import OrderedSet

Expand All @@ -26,7 +24,7 @@ class BibliographyItem(models.Model):
)
page = models.ForeignKey(
Page,
related_name='bibliography_items',
related_name="bibliography_items",
help_text="The page that this item links to.",
on_delete=models.CASCADE,
)
Expand All @@ -41,23 +39,22 @@ class BibliographyItem(models.Model):
max_length=127,
help_text='The "value" with which this item was created, eg. "3514" in [[rfc:3514]].',
)
content_long_title = models.CharField(
max_length=127,
blank=True
)
content_long_title = models.CharField(max_length=127, blank=True)
content_title = models.CharField(
max_length=127,
help_text='The link title for this item, eg. "RFC 7168" for [[rfc:7168]].',
)
content_type = models.ForeignKey(
ContentType,
blank=True, null=True,
blank=True,
null=True,
on_delete=models.CASCADE,
)
object_id = models.PositiveIntegerField(
blank=True, null=True,
blank=True,
null=True,
)
content_object = GenericForeignKey('content_type', 'object_id')
content_object = GenericForeignKey("content_type", "object_id")

def render_title(self):
if not self.content_object:
Expand All @@ -73,10 +70,10 @@ def render_uri(self):

@property
def link(self):
soup = BeautifulSoup("", 'html5lib')
link = soup.new_tag('a', href="#bibliography" + str(self.ordering))
link['class'] = "bibliography-reference"
link['data-ordering'] = str(self.ordering)
soup = BeautifulSoup("", "html5lib")
link = soup.new_tag("a", href="#bibliography" + str(self.ordering))
link["class"] = "bibliography-reference"
link["data-ordering"] = str(self.ordering)
link.insert(0, NavigableString(self.content_title))
return link

Expand All @@ -91,24 +88,21 @@ def render(self, request=None):
else:
try:
template = get_template(
'bibliography/item_{}.html'.format(self.content_key)
"bibliography/item_{}.html".format(self.content_key)
)
except TemplateDoesNotExist:
template = None
BibliographyItem.TEMPLATE_CACHE[self.content_key] = template

if template:
return template.render({
'object': self.content_object,
'item': self
}, request=request)
return template.render(
{"object": self.content_object, "item": self}, request=request
)
else:
return str(object)

def __str__(self):
return "Bibliography Item #{}: {}".format(
self.ordering, self.content_object
)
return "Bibliography Item #{}: {}".format(self.ordering, self.content_object)


class BibliographyMixin(models.Model):
Expand All @@ -125,59 +119,70 @@ def save(self, *args, **kwargs):
# Don't update prepared content fields if none of the source fields are being updated (e.g. when saving a draft)
# NB - We have to update all prepared and source fields or none, as there's no way of determining which field a
# given BibliographyItem appears in.
update_fields = kwargs.get('update_fields')
update_fields = kwargs.get("update_fields")
recreate_bibliography_items = True

if update_fields is not None:
source_fields_being_updated = [source_field in update_fields for source_field in self.CONTENT_FIELD_MAP.values()]
prepared_fields_being_updated = [prepared_field in update_fields for prepared_field in self.CONTENT_FIELD_MAP.keys()]
source_fields_being_updated = [
source_field in update_fields
for source_field in self.CONTENT_FIELD_MAP.values()
]
prepared_fields_being_updated = [
prepared_field in update_fields
for prepared_field in self.CONTENT_FIELD_MAP.keys()
]

if any(source_fields_being_updated) or any(prepared_fields_being_updated):
if not all(source_fields_being_updated) or not all(prepared_fields_being_updated):
raise ValueError('Either all prepared content fields must be updated or none')
if not all(source_fields_being_updated) or not all(
prepared_fields_being_updated
):
raise ValueError(
"Either all prepared content fields must be updated or none"
)
else:
recreate_bibliography_items = False

if recreate_bibliography_items:
self.bibliography_items.all().delete()

all_content = "".join([
str(getattr(self, content_field)) or '' for content_field
in self.CONTENT_FIELD_MAP.keys()
])
all_soup = BeautifulSoup(all_content, 'html.parser')
all_content = "".join(
[
str(getattr(self, content_field)) or ""
for content_field in self.CONTENT_FIELD_MAP.keys()
]
)
all_soup = BeautifulSoup(all_content, "html.parser")
subsoups = {
prepared_content_field: BeautifulSoup(
str(getattr(self, content_field)) or '', 'html.parser'
) for content_field, prepared_content_field in
self.CONTENT_FIELD_MAP.items()
str(getattr(self, content_field)) or "", "html.parser"
)
for content_field, prepared_content_field in self.CONTENT_FIELD_MAP.items()
}
tags = OrderedSet(all_soup.find_all('a', attrs={'data-app': True}))
tags = OrderedSet(all_soup.find_all("a", attrs={"data-app": True}))

for tag in tags:
app = tag['data-app']
model = tag['data-linktype']
obj_id = tag['data-id']
app = tag["data-app"]
model = tag["data-linktype"]
obj_id = tag["data-id"]

try:
obj = apps.get_model(
app_label=app,
model_name=model
).objects.get(pk=obj_id)
obj = apps.get_model(app_label=app, model_name=model).objects.get(
pk=obj_id
)
try:
long_title = obj.long_title
except AttributeError:
long_title = ""
object_details = {
'content_object': obj,
'content_long_title': long_title,
'content_title': obj.__str__()
"content_object": obj,
"content_long_title": long_title,
"content_title": obj.__str__(),
}
except ObjectDoesNotExist:
object_details = {
'content_object': None,
'content_long_title': "",
'content_title': '(removed)'
"content_object": None,
"content_long_title": "",
"content_title": "(removed)",
}
item = BibliographyItem.objects.create(
page=self,
Expand All @@ -187,11 +192,14 @@ def save(self, *args, **kwargs):
**object_details
)
for soup in subsoups.values():
for t in soup.find_all('a', attrs={
'data-app': app,
'data-linktype': model,
'data-id': obj_id
}):
for t in soup.find_all(
"a",
attrs={
"data-app": app,
"data-linktype": model,
"data-id": obj_id,
},
):
t.replaceWith(item.link)

for prepared_content_field, prepared_soup in subsoups.items():
Expand Down
Loading