add download sciebo folders

Romern · Jun 12, 2024 · 1997cc4 · 1997cc4
1 parent a80208a
commit 1997cc4
Show file tree

Hide file tree

Showing 2 changed files with 98 additions and 12 deletions.
diff --git a/setup.cfg b/setup.cfg
@@ -24,6 +24,7 @@ install_requires =
     yt-dlp>=2021.12.27
     pdfkit>=0.6.0
     tqdm>=4.0.0
+    lxml>=5.0.0
 
 [options.extras_require]
 keyring =

diff --git a/syncmymoodle/__main__.py b/syncmymoodle/__main__.py
@@ -55,9 +55,7 @@ def __init__(
         self.type = type
         self.parent = parent
         self.children: List[Node] = []
-        self.additional_info = (
-            additional_info  # Currently only used for course_id in opencast
-        )
+        self.additional_info = additional_info  # Currently only used for course_id in opencast and auth header in sciebo
         self.is_downloaded = (
             is_downloaded  # Can also be used to exclude files from being downloaded
         )
@@ -829,6 +827,8 @@ def download_file(self, node):
         else:
             resume_size = 0
             header = dict()
+        if node.type.lower() == "sciebo file":
+            header = {**header, **node.additional_info}
 
         with closing(
             self.session.get(node.url, headers=header, stream=True)
@@ -1050,19 +1050,104 @@ def scanForLinks(
 
         # https://rwth-aachen.sciebo.de/s/XXX
         if self.config.get("used_modules", {}).get("url", {}).get("sciebo", {}):
-            sciebo_links = re.findall(
-                "https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text
+            sciebo_links = list(
+                set(re.findall("https://rwth-aachen.sciebo.de/s/[a-zA-Z0-9-]+", text))
             )
-            for vid in sciebo_links:
-                response = self.session.get(vid)
+            sciebo_url = "https://rwth-aachen.sciebo.de"
+            webdav_location = "/public.php/webdav/"
+            for link in sciebo_links:
+                logging.info(f"Found Sciebo Link: {link}")
+
+                # get the download page
+                response = self.session.get(link)
+
+                # parse html code
                 soup = bs(response.text, features="html.parser")
-                url = soup.find("input", {"name": "downloadURL"})
-                filename = soup.find("input", {"name": "filename"})
-                if url and filename:
-                    parent_node.add_child(
-                        filename["value"], url["value"], "Sciebo file", url=url["value"]
+
+                # get the requesttoken
+                requestToken = soup.head["data-requesttoken"]
+                logger.info(f"RequestToken: {requestToken}")
+
+                # print the property value of the input tag with the name sharingToken
+                sharingToken = soup.find("input", {"name": "sharingToken"})["value"]
+                logger.info(f"SharingToken: {sharingToken}")
+
+                # get baseauthentication secret
+                baseAuthSecret = base64.b64encode(
+                    (sharingToken + ":null").encode()
+                ).decode()
+                logger.info(f"BaseAuthSecret: {baseAuthSecret}")
+
+                # get auth header
+                auth_header = {
+                    "Authorization": "Basic " + baseAuthSecret,
+                    "requesttoken": requestToken,
+                }
+
+                parent_node = parent_node.add_child(
+                    f"sciebo-{sharingToken}", None, "Sciebo Folder"
+                )
+
+                # recursive function to get all files in the sciebo folder
+                def get_sciebo_files(
+                    href: str, parent_node: Node, sharingToken: str, auth_header: dict
+                ):
+
+                    # request the URL with the PROPFIND method and the header
+                    response = self.session.request(
+                        "PROPFIND", sciebo_url + href, headers=auth_header
                     )
 
+                    # parse the response
+                    soup = bs(response.text, features="xml")
+
+                    for response in soup.find_all("d:response"):
+                        # get the href of the response
+                        new_href = response.find("d:href").text
+
+                        if new_href == href:
+                            logger.info(
+                                f"Skipping {new_href} because it is the current folder"
+                            )
+                            continue
+
+                        logger.info(f"response: {response.find('d:href').text}")
+                        # get the displayname of the response
+                        displayname = (
+                            new_href.split("/")[-2]
+                            if new_href.endswith("/")
+                            else new_href.split("/")[-1]
+                        )
+                        displayname = (
+                            f"sciebo-{sharingToken}"
+                            if displayname == "webdav"
+                            else displayname
+                        )
+
+                        # check if the response is a folder
+                        if new_href.endswith("/"):
+                            # create a new node for the folder
+                            folder_node = parent_node.add_child(
+                                displayname, None, "Sciebo Folder"
+                            )
+                            # recursive call to get all files in the folder
+                            get_sciebo_files(
+                                new_href, folder_node, sharingToken, auth_header
+                            )
+                        else:
+                            # create a new node for the file
+                            parent_node.add_child(
+                                displayname,
+                                None,
+                                "Sciebo File",
+                                url=sciebo_url + new_href,
+                                additional_info=auth_header,
+                            )
+
+                get_sciebo_files(
+                    webdav_location, parent_node, sharingToken, auth_header
+                )
+
 
 def main():
     parser = ArgumentParser(