jupyterhub · RohanBhattaraiNP · Jan 20, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 27, 2025
diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py
@@ -14,9 +14,6 @@ class Zenodo(DoiProvider):
 
     def __init__(self):
         super().__init__()
-        # We need the hostname (url where records are), api url (for metadata),
-        # filepath (path to files in metadata), filename (path to filename in
-        # metadata), download (path to file download URL), and type (path to item type in metadata)
         self.hosts = [
             {
                 "hostname": [
@@ -30,31 +27,34 @@ def __init__(self):
                 "filename": "key",
                 "download": "links.content",
                 "type": "metadata.upload_type",
+                "is_caltech": False,
             },
             {
                 "hostname": [
                     "https://zenodo.org/record/",
-                    "http://zenodo.org/record/",
                     "https://zenodo.org/records/",
+                    "http://zenodo.org/record/",
                 ],
                 "api": "https://zenodo.org/api/records/",
                 "files": "links.files",
                 "filepath": "entries",
                 "filename": "key",
                 "download": "links.content",
                 "type": "metadata.upload_type",
+                "is_caltech": False,
             },
             {
                 "hostname": [
                     "https://data.caltech.edu/records/",
                     "http://data.caltech.edu/records/",
                 ],
-                "api": "https://data.caltech.edu/api/record/",
-                "files": "",
-                "filepath": "metadata.electronic_location_and_access",
-                "filename": "electronic_name.0",
-                "download": "uniform_resource_identifier",
-                "type": "metadata.resourceType.resourceTypeGeneral",
+                "api": "https://data.caltech.edu/api/records/",
+                "files": "links.files",
+                "filepath": "entries",
+                "filename": "key",
+                "download": "links.content",
+                "type": "metadata.upload_type",
+                "is_caltech": True,
             },
         ]
 
@@ -93,6 +93,37 @@ def fetch(self, spec, output_dir, yield_output=False):
         for file_ref in files:
             yield from self.fetch_file(file_ref, host, output_dir, unzip=only_one_file)
 
+    def fetch_file(self, file_ref, host, output_dir, unzip=True):
+        """Fetch and save a file from Zenodo."""
+        filename = deep_get(file_ref, host["filename"])
+        if host["is_caltech"]:
+            # Construct the direct download URL for Caltech Data
+            download_url = (
+                f"https://data.caltech.edu/records/{self.record_id}/files/{filename}"
+            )
+        else:
+            # Use the standard Zenodo download URL structure
+            download_url = deep_get(file_ref, host["download"])
+
+        # Create output directory
+        makedirs(output_dir, exist_ok=True)
+
+        output_path = path.join(output_dir, filename)
+        yield f"Downloading {filename} to {output_path}\n"
+
+        # Get file using a streaming approach
+        response = self.urlopen(download_url)
+        content = response.content  # Get the binary content
+
+        # Write the content to file
+        with open(output_path, "wb") as fp:
+            fp.write(content)
+
+        if unzip and filename.endswith(".zip"):
+            yield f"Extracting {filename} to {output_dir}\n"
+            shutil.unpack_archive(output_path, output_dir)
+            os.remove(output_path)
+
     @property
     def content_id(self):
         """The Zenodo record ID as the content of a record is immutable"""