From ee3684bd1f0f2bd91d97dffcf304ff850e16fb5a Mon Sep 17 00:00:00 2001 From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com> Date: Mon, 20 Jan 2025 14:59:07 -0800 Subject: [PATCH 1/6] Update zenodo.py --- repo2docker/contentproviders/zenodo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 6982c3a7..c7b28835 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -49,7 +49,7 @@ def __init__(self): "https://data.caltech.edu/records/", "http://data.caltech.edu/records/", ], - "api": "https://data.caltech.edu/api/record/", + "api": "https://data.caltech.edu/api/records/", "files": "", "filepath": "metadata.electronic_location_and_access", "filename": "electronic_name.0", From d502018aa57a34ddeddc31197236c7d3fbd63723 Mon Sep 17 00:00:00 2001 From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com> Date: Tue, 21 Jan 2025 02:07:08 -0800 Subject: [PATCH 2/6] Update zenodo.py --- repo2docker/contentproviders/zenodo.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index c7b28835..37955899 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -50,11 +50,11 @@ def __init__(self): "http://data.caltech.edu/records/", ], "api": "https://data.caltech.edu/api/records/", - "files": "", - "filepath": "metadata.electronic_location_and_access", - "filename": "electronic_name.0", - "download": "uniform_resource_identifier", - "type": "metadata.resourceType.resourceTypeGeneral", + "files": "links.files", + "filepath": "entries", + "filename": "key", + "download": "links.content", + "type": "metadata.upload_type" }, ] From 64829ee72473c8be18d47c1783ecf55efa60420c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 19:53:56 +0000 Subject: [PATCH 3/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- repo2docker/contentproviders/zenodo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 37955899..30973775 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -54,7 +54,7 @@ def __init__(self): "filepath": "entries", "filename": "key", "download": "links.content", - "type": "metadata.upload_type" + "type": "metadata.upload_type", }, ] From f5ddfe785bf215d8e51e934de953daaf1b82f732 Mon Sep 17 00:00:00 2001 From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com> Date: Sun, 26 Jan 2025 16:04:01 -0800 Subject: [PATCH 4/6] Update zenodo.py --- repo2docker/contentproviders/zenodo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index 30973775..fb526b26 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -34,8 +34,8 @@ def __init__(self): { "hostname": [ "https://zenodo.org/record/", - "http://zenodo.org/record/", "https://zenodo.org/records/", + "http://zenodo.org/record/", ], "api": "https://zenodo.org/api/records/", "files": "links.files", From bf03d82ab1a039debd98659d6cf71bdf647adf34 Mon Sep 17 00:00:00 2001 From: RohanBhattaraiNP <152933030+RohanBhattaraiNP@users.noreply.github.com> Date: Fri, 31 Jan 2025 04:44:10 -0800 Subject: [PATCH 5/6] Update zenodo.py --- repo2docker/contentproviders/zenodo.py | 35 +++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index fb526b26..f2e8694b 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -14,9 +14,6 @@ class Zenodo(DoiProvider): def __init__(self): super().__init__() - # We need the hostname (url where records are), api url (for metadata), - # filepath (path to files in metadata), filename (path to filename in - # metadata), download (path to file download URL), and type (path to item type in metadata) self.hosts = [ { "hostname": [ @@ -30,6 +27,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", + "is_caltech": False }, { "hostname": [ @@ -43,6 +41,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", + "is_caltech": False }, { "hostname": [ @@ -55,6 +54,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", + "is_caltech": True }, ] @@ -93,6 +93,35 @@ def fetch(self, spec, output_dir, yield_output=False): for file_ref in files: yield from self.fetch_file(file_ref, host, output_dir, unzip=only_one_file) + def fetch_file(self, file_ref, host, output_dir, unzip=True): + """Fetch and save a file from Zenodo.""" + filename = deep_get(file_ref, host["filename"]) + if host["is_caltech"]: + # Construct the direct download URL for Caltech Data + download_url = f"https://data.caltech.edu/records/{self.record_id}/files/{filename}" + else: + # Use the standard Zenodo download URL structure + download_url = deep_get(file_ref, host["download"]) + + # Create output directory + makedirs(output_dir, exist_ok=True) + + output_path = path.join(output_dir, filename) + yield f"Downloading {filename} to {output_path}\n" + + # Get file using a streaming approach + response = self.urlopen(download_url) + content = response.content # Get the binary content + + # Write the content to file + with open(output_path, "wb") as fp: + fp.write(content) + + if unzip and filename.endswith(".zip"): + yield f"Extracting {filename} to {output_dir}\n" + shutil.unpack_archive(output_path, output_dir) + os.remove(output_path) + @property def content_id(self): """The Zenodo record ID as the content of a record is immutable""" From f5b0aa51fa20e88091fa3967b51701bf745c11bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 31 Jan 2025 12:46:16 +0000 Subject: [PATCH 6/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- repo2docker/contentproviders/zenodo.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/repo2docker/contentproviders/zenodo.py b/repo2docker/contentproviders/zenodo.py index f2e8694b..d6f04669 100644 --- a/repo2docker/contentproviders/zenodo.py +++ b/repo2docker/contentproviders/zenodo.py @@ -27,7 +27,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", - "is_caltech": False + "is_caltech": False, }, { "hostname": [ @@ -41,7 +41,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", - "is_caltech": False + "is_caltech": False, }, { "hostname": [ @@ -54,7 +54,7 @@ def __init__(self): "filename": "key", "download": "links.content", "type": "metadata.upload_type", - "is_caltech": True + "is_caltech": True, }, ] @@ -98,7 +98,9 @@ def fetch_file(self, file_ref, host, output_dir, unzip=True): filename = deep_get(file_ref, host["filename"]) if host["is_caltech"]: # Construct the direct download URL for Caltech Data - download_url = f"https://data.caltech.edu/records/{self.record_id}/files/{filename}" + download_url = ( + f"https://data.caltech.edu/records/{self.record_id}/files/{filename}" + ) else: # Use the standard Zenodo download URL structure download_url = deep_get(file_ref, host["download"]) @@ -112,7 +114,7 @@ def fetch_file(self, file_ref, host, output_dir, unzip=True): # Get file using a streaming approach response = self.urlopen(download_url) content = response.content # Get the binary content - + # Write the content to file with open(output_path, "wb") as fp: fp.write(content)