Skip to content

Commit

Permalink
Parse bbfile dataset in Blackboard Ultra bodies
Browse files Browse the repository at this point in the history
Some files can contain link metadata embedded in dataset HTML attributes. This metadata is needed to provide local files with accurate file names. Moreover, inner text of links inside body descriptions are now manually assigned when they are empty.
  • Loading branch information
sanjacob authored Jan 30, 2025
1 parent 84826c0 commit eb1f0b5
Showing 1 changed file with 22 additions and 1 deletion.
23 changes: 22 additions & 1 deletion blackboard_sync/content/webdav.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# MA 02110-1301, USA.


import json
import mimetypes
from pathlib import Path
from requests import Response
Expand Down Expand Up @@ -55,21 +56,41 @@ def __init__(self, body: str, base_url: str,
def _find_replace(self, soup: BeautifulSoup,
tag: str, attr: str, base_url: str) -> list[Link]:
links = []
is_link = tag == "a"

for el in soup.find_all(tag):
# Add link for later download
uri = el.get(attr)

if uri:
# Parse JSON dataset
filename = _parse_bbfile(el) if is_link else None
# Handle url-encoding
filename = unquote(uri.split('/')[-1])
filename = filename or unquote(uri.split('/')[-1])
links.append(Link(href=uri, text=filename))

# Assign inner text if missing
if not el.string:
el.append(filename)

# Replace for local instance
if uri.startswith(base_url):
el[attr] = filename
return links

def _parse_bbfile(self, el: str) -> str | None:
json_data = el.get("data-bbfile")
filename = None

if json_data:
try:
bbfile = json.loads(json_data)
except json.JSONDecodeError:
pass
else:
filename = bbfile.get("linkName")
return filename

@property
def links(self) -> List[Link]:
return self._links
Expand Down

0 comments on commit eb1f0b5

Please sign in to comment.