Skip to content

Commit

Permalink
add download sciebo folders
Browse files Browse the repository at this point in the history
  • Loading branch information
Nils committed Jun 12, 2024
1 parent a80208a commit 1997cc4
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 12 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ install_requires =
yt-dlp>=2021.12.27
pdfkit>=0.6.0
tqdm>=4.0.0
lxml>=5.0.0

[options.extras_require]
keyring =
Expand Down
109 changes: 97 additions & 12 deletions syncmymoodle/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,7 @@ def __init__(
self.type = type
self.parent = parent
self.children: List[Node] = []
self.additional_info = (
additional_info # Currently only used for course_id in opencast
)
self.additional_info = additional_info # Currently only used for course_id in opencast and auth header in sciebo
self.is_downloaded = (
is_downloaded # Can also be used to exclude files from being downloaded
)
Expand Down Expand Up @@ -829,6 +827,8 @@ def download_file(self, node):
else:
resume_size = 0
header = dict()
if node.type.lower() == "sciebo file":
header = {**header, **node.additional_info}

with closing(
self.session.get(node.url, headers=header, stream=True)
Expand Down Expand Up @@ -1050,19 +1050,104 @@ def scanForLinks(

# https://rwth-aachen.sciebo.de/s/XXX
if self.config.get("used_modules", {}).get("url", {}).get("sciebo", {}):
sciebo_links = re.findall(
"https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text
sciebo_links = list(
set(re.findall("https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text))
)
for vid in sciebo_links:
response = self.session.get(vid)
sciebo_url = "https://rwth-aachen.sciebo.de"
webdav_location = "/public.php/webdav/"
for link in sciebo_links:
logging.info(f"Found Sciebo Link: {link}")

# get the download page
response = self.session.get(link)

# parse html code
soup = bs(response.text, features="html.parser")
url = soup.find("input", {"name": "downloadURL"})
filename = soup.find("input", {"name": "filename"})
if url and filename:
parent_node.add_child(
filename["value"], url["value"], "Sciebo file", url=url["value"]

# get the requesttoken
requestToken = soup.head["data-requesttoken"]
logger.info(f"RequestToken: {requestToken}")

# print the property value of the input tag with the name sharingToken
sharingToken = soup.find("input", {"name": "sharingToken"})["value"]
logger.info(f"SharingToken: {sharingToken}")

# get baseauthentication secret
baseAuthSecret = base64.b64encode(
(sharingToken + ":null").encode()
).decode()
logger.info(f"BaseAuthSecret: {baseAuthSecret}")

# get auth header
auth_header = {
"Authorization": "Basic " + baseAuthSecret,
"requesttoken": requestToken,
}

parent_node = parent_node.add_child(
f"sciebo-{sharingToken}", None, "Sciebo Folder"
)

# recursive function to get all files in the sciebo folder
def get_sciebo_files(
href: str, parent_node: Node, sharingToken: str, auth_header: dict
):

# request the URL with the PROPFIND method and the header
response = self.session.request(
"PROPFIND", sciebo_url + href, headers=auth_header
)

# parse the response
soup = bs(response.text, features="xml")

for response in soup.find_all("d:response"):
# get the href of the response
new_href = response.find("d:href").text

if new_href == href:
logger.info(
f"Skipping {new_href} because it is the current folder"
)
continue

logger.info(f"response: {response.find('d:href').text}")
# get the displayname of the response
displayname = (
new_href.split("/")[-2]
if new_href.endswith("/")
else new_href.split("/")[-1]
)
displayname = (
f"sciebo-{sharingToken}"
if displayname == "webdav"
else displayname
)

# check if the response is a folder
if new_href.endswith("/"):
# create a new node for the folder
folder_node = parent_node.add_child(
displayname, None, "Sciebo Folder"
)
# recursive call to get all files in the folder
get_sciebo_files(
new_href, folder_node, sharingToken, auth_header
)
else:
# create a new node for the file
parent_node.add_child(
displayname,
None,
"Sciebo File",
url=sciebo_url + new_href,
additional_info=auth_header,
)

get_sciebo_files(
webdav_location, parent_node, sharingToken, auth_header
)


def main():
parser = ArgumentParser(
Expand Down

0 comments on commit 1997cc4

Please sign in to comment.