From 1997cc4b32b21b35a6cb92d345bad6678867681b Mon Sep 17 00:00:00 2001
From: Nils <nils@heesemann.eu>
Date: Thu, 13 Jun 2024 00:12:07 +0200
Subject: [PATCH] add download sciebo folders

---
 setup.cfg                |   1 +
 syncmymoodle/__main__.py | 109 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 98 insertions(+), 12 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 3d085d7..d4b6f99 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -24,6 +24,7 @@ install_requires =
     yt-dlp>=2021.12.27
     pdfkit>=0.6.0
     tqdm>=4.0.0
+    lxml>=5.0.0
 
 [options.extras_require]
 keyring =
diff --git a/syncmymoodle/__main__.py b/syncmymoodle/__main__.py
index 0a9ff2a..8e5b695 100755
--- a/syncmymoodle/__main__.py
+++ b/syncmymoodle/__main__.py
@@ -55,9 +55,7 @@ def __init__(
         self.type = type
         self.parent = parent
         self.children: List[Node] = []
-        self.additional_info = (
-            additional_info  # Currently only used for course_id in opencast
-        )
+        self.additional_info = additional_info  # Currently only used for course_id in opencast and auth header in sciebo
         self.is_downloaded = (
             is_downloaded  # Can also be used to exclude files from being downloaded
         )
@@ -829,6 +827,8 @@ def download_file(self, node):
         else:
             resume_size = 0
             header = dict()
+        if node.type.lower() == "sciebo file":
+            header = {**header, **node.additional_info}
 
         with closing(
             self.session.get(node.url, headers=header, stream=True)
@@ -1050,19 +1050,104 @@ def scanForLinks(
 
         # https://rwth-aachen.sciebo.de/s/XXX
         if self.config.get("used_modules", {}).get("url", {}).get("sciebo", {}):
-            sciebo_links = re.findall(
-                "https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text
+            sciebo_links = list(
+                set(re.findall("https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text))
             )
-            for vid in sciebo_links:
-                response = self.session.get(vid)
+            sciebo_url = "https://rwth-aachen.sciebo.de"
+            webdav_location = "/public.php/webdav/"
+            for link in sciebo_links:
+                logging.info(f"Found Sciebo Link: {link}")
+
+                # get the download page
+                response = self.session.get(link)
+
+                # parse html code
                 soup = bs(response.text, features="html.parser")
-                url = soup.find("input", {"name": "downloadURL"})
-                filename = soup.find("input", {"name": "filename"})
-                if url and filename:
-                    parent_node.add_child(
-                        filename["value"], url["value"], "Sciebo file", url=url["value"]
+
+                # get the requesttoken
+                requestToken = soup.head["data-requesttoken"]
+                logger.info(f"RequestToken: {requestToken}")
+
+                # print the property value of the input tag with the name sharingToken
+                sharingToken = soup.find("input", {"name": "sharingToken"})["value"]
+                logger.info(f"SharingToken: {sharingToken}")
+
+                # get baseauthentication secret
+                baseAuthSecret = base64.b64encode(
+                    (sharingToken + ":null").encode()
+                ).decode()
+                logger.info(f"BaseAuthSecret: {baseAuthSecret}")
+
+                # get auth header
+                auth_header = {
+                    "Authorization": "Basic " + baseAuthSecret,
+                    "requesttoken": requestToken,
+                }
+
+                parent_node = parent_node.add_child(
+                    f"sciebo-{sharingToken}", None, "Sciebo Folder"
+                )
+
+                # recursive function to get all files in the sciebo folder
+                def get_sciebo_files(
+                    href: str, parent_node: Node, sharingToken: str, auth_header: dict
+                ):
+
+                    # request the URL with the PROPFIND method and the header
+                    response = self.session.request(
+                        "PROPFIND", sciebo_url + href, headers=auth_header
                     )
 
+                    # parse the response
+                    soup = bs(response.text, features="xml")
+
+                    for response in soup.find_all("d:response"):
+                        # get the href of the response
+                        new_href = response.find("d:href").text
+
+                        if new_href == href:
+                            logger.info(
+                                f"Skipping {new_href} because it is the current folder"
+                            )
+                            continue
+
+                        logger.info(f"response: {response.find('d:href').text}")
+                        # get the displayname of the response
+                        displayname = (
+                            new_href.split("/")[-2]
+                            if new_href.endswith("/")
+                            else new_href.split("/")[-1]
+                        )
+                        displayname = (
+                            f"sciebo-{sharingToken}"
+                            if displayname == "webdav"
+                            else displayname
+                        )
+
+                        # check if the response is a folder
+                        if new_href.endswith("/"):
+                            # create a new node for the folder
+                            folder_node = parent_node.add_child(
+                                displayname, None, "Sciebo Folder"
+                            )
+                            # recursive call to get all files in the folder
+                            get_sciebo_files(
+                                new_href, folder_node, sharingToken, auth_header
+                            )
+                        else:
+                            # create a new node for the file
+                            parent_node.add_child(
+                                displayname,
+                                None,
+                                "Sciebo File",
+                                url=sciebo_url + new_href,
+                                additional_info=auth_header,
+                            )
+
+                get_sciebo_files(
+                    webdav_location, parent_node, sharingToken, auth_header
+                )
+
 
 def main():
     parser = ArgumentParser(