From b7d462d982ac3c590208bb2a7e2493fd888a0689 Mon Sep 17 00:00:00 2001 From: katielink Date: Mon, 1 May 2023 17:24:20 +0100 Subject: [PATCH 01/26] Add huggingface_hub as an optional dependency --- docs/requirements.txt | 1 + docs/source/installation.md | 8 ++++---- monai/bundle/scripts.py | 1 + requirements-dev.txt | 1 + setup.cfg | 3 +++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 9369548c67..eabf66afed 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -37,3 +37,4 @@ optuna opencv-python-headless onnx>=1.13.0 onnxruntime; python_version <= '3.10' +huggingface_hub diff --git a/docs/source/installation.md b/docs/source/installation.md index c3e7297da6..3a8f2b07d9 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -10,8 +10,8 @@ - [Uninstall the packages](#uninstall-the-packages) - [From conda-forge](#from-conda-forge) - [From GitHub](#from-github) - - [Option 1 (as a part of your system-wide module)](#option-1-as-a-part-of-your-system-wide-module) - - [Option 2 (editable installation)](#option-2-editable-installation) + - [Option 1 (as a part of your system-wide module):](#option-1-as-a-part-of-your-system-wide-module) + - [Option 2 (editable installation):](#option-2-editable-installation) - [Validating the install](#validating-the-install) - [MONAI version string](#monai-version-string) - [From DockerHub](#from-dockerhub) @@ -254,10 +254,10 @@ Since MONAI v0.2.0, the extras syntax such as `pip install 'monai[nibabel]'` is - The options are ``` -[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, clearml, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire, jsonschema, ninja, pynrrd, pydicom, h5py, nni, optuna, onnx, onnxruntime] +[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, clearml, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire, jsonschema, ninja, pynrrd, pydicom, h5py, nni, optuna, onnx, onnxruntime, huggingface_hub] ``` which correspond to `nibabel`, `scikit-image`, `pillow`, `tensorboard`, -`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `clearml`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, `jsonschema`, `ninja`, `pynrrd`, `pydicom`, `h5py`, `nni`, `optuna`, `onnx`, `onnxruntime`, respectively. +`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `clearml`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, `jsonschema`, `ninja`, `pynrrd`, `pydicom`, `h5py`, `nni`, `optuna`, `onnx`, `onnxruntime`, `huggingface_hub`, respectively. - `pip install 'monai[all]'` installs all the optional dependencies. diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index b3e1174b3f..7cabc0cc2b 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -57,6 +57,7 @@ Checkpoint, has_ignite = optional_import("ignite.handlers", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Checkpoint") requests_get, has_requests = optional_import("requests", name="get") onnx, _ = optional_import("onnx") +huggingface_hub, _ = optional_import("huggingface_hub") logger = get_logger(module_name=__name__) diff --git a/requirements-dev.txt b/requirements-dev.txt index f02aa48ba9..3f2973d005 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -51,3 +51,4 @@ git+https://github.com/Project-MONAI/MetricsReloaded@monai-support#egg=MetricsRe onnx>=1.13.0 onnxruntime; python_version <= '3.10' typeguard<3 # https://github.com/microsoft/nni/issues/5457 +huggingface_hub diff --git a/setup.cfg b/setup.cfg index 0670b11eec..029ea08513 100644 --- a/setup.cfg +++ b/setup.cfg @@ -79,6 +79,7 @@ all = optuna onnx>=1.13.0 onnxruntime; python_version <= '3.10' + huggingface_hub nibabel = nibabel ninja = @@ -145,6 +146,8 @@ onnx = # # workaround https://github.com/Project-MONAI/MONAI/issues/5882 # MetricsReloaded = # MetricsReloaded @ git+https://github.com/Project-MONAI/MetricsReloaded@monai-support#egg=MetricsReloaded +huggingface_hub = + huggingface_hub [flake8] select = B,C,E,F,N,P,T4,W,B9 From be3e6784eb877577010678108510369c207fdc40 Mon Sep 17 00:00:00 2001 From: katielink Date: Mon, 1 May 2023 18:32:08 +0100 Subject: [PATCH 02/26] Add download from huggingface_hub functionality --- monai/bundle/scripts.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 7cabc0cc2b..9379532465 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -19,7 +19,7 @@ from collections.abc import Mapping, Sequence from pathlib import Path from pydoc import locate -from shutil import copyfile +from shutil import copyfile, copytree, rmtree from textwrap import dedent from typing import Any, Callable @@ -193,6 +193,15 @@ def _download_from_ngc( extractall(filepath=filepath, output_dir=extract_path, has_base=True) +def _download_from_huggingface_hub(repo: str, download_path: str, filename: str) -> None: + if len(repo.split("/")) != 2: + raise ValueError("if source is `hf_hub`, repo should be in the form `repo_owner/repo_name`") + snapshot_folder = huggingface_hub.snapshot_download(repo_id=repo, cache_dir=download_path) + download_dir = os.path.join(download_path, filename) + copytree(snapshot_folder, download_dir, dirs_exist_ok=True) + rmtree(snapshot_folder) + + def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, list[str] | str] | Any | None: if source == "ngc": name = _add_ngc_prefix(name) @@ -248,6 +257,9 @@ def download( # Execute this module as a CLI entry, and download bundle from ngc with latest version: python -m monai.bundle download --name --source "ngc" --bundle_dir "./" + # Execute this module as a CLI entry, and download bundle from Hugging Face Hub: + python -m monai.bundle download --name "bundle_name" --source "huggingface_hub" --repo "repo_owner/repo_name" + # Execute this module as a CLI entry, and download bundle via URL: python -m monai.bundle download --name --url @@ -271,9 +283,10 @@ def download( Default is `bundle` subfolder under `torch.hub.get_dir()`. source: storage location name. This argument is used when `url` is `None`. In default, the value is achieved from the environment variable BUNDLE_DOWNLOAD_SRC, and - it should be "ngc" or "github". - repo: repo name. This argument is used when `url` is `None` and `source` is "github". - If used, it should be in the form of "repo_owner/repo_name/release_tag". + it should be "ngc", "github", or "huggingface_hub". + repo: repo name. This argument is used when `url` is `None` and `source` is "github" or "huggingface_hub". + If `source` is "github", it should be in the form of "repo_owner/repo_name/release_tag". + If `source` is "huggingface_hub", it should be in the form of "repo_owner/repo_name". url: url to download the data. If not `None`, data will be downloaded directly and `source` will not be checked. If `name` is `None`, filename is determined by `monai.apps.utils._basename(url)`. @@ -333,9 +346,17 @@ def download( remove_prefix=remove_prefix_, progress=progress_, ) + elif source_ == "huggingface_hub": + if name_ is None: + raise ValueError(f"To download from source: 'huggingface_hub', `name` must be provided, got {name_}.") + _download_from_huggingface_hub( + repo=repo_, + download_path=bundle_dir_, + filename=name_ + ) else: raise NotImplementedError( - f"Currently only download from `url`, source 'github' or 'ngc' are implemented, got source: {source_}." + f"Currently only download from `url`, source 'github', 'ngc', or 'huggingface_hub' are implemented, got source: {source_}." ) From bd61de88f404031a9c4cb59c1a61b66c9493aa78 Mon Sep 17 00:00:00 2001 From: katielink Date: Mon, 1 May 2023 17:24:20 +0100 Subject: [PATCH 03/26] Add huggingface_hub as an optional dependency Signed-off-by: katielink --- docs/requirements.txt | 1 + docs/source/installation.md | 8 ++++---- monai/bundle/scripts.py | 1 + requirements-dev.txt | 1 + setup.cfg | 3 +++ 5 files changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 9369548c67..eabf66afed 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -37,3 +37,4 @@ optuna opencv-python-headless onnx>=1.13.0 onnxruntime; python_version <= '3.10' +huggingface_hub diff --git a/docs/source/installation.md b/docs/source/installation.md index c3e7297da6..3a8f2b07d9 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -10,8 +10,8 @@ - [Uninstall the packages](#uninstall-the-packages) - [From conda-forge](#from-conda-forge) - [From GitHub](#from-github) - - [Option 1 (as a part of your system-wide module)](#option-1-as-a-part-of-your-system-wide-module) - - [Option 2 (editable installation)](#option-2-editable-installation) + - [Option 1 (as a part of your system-wide module):](#option-1-as-a-part-of-your-system-wide-module) + - [Option 2 (editable installation):](#option-2-editable-installation) - [Validating the install](#validating-the-install) - [MONAI version string](#monai-version-string) - [From DockerHub](#from-dockerhub) @@ -254,10 +254,10 @@ Since MONAI v0.2.0, the extras syntax such as `pip install 'monai[nibabel]'` is - The options are ``` -[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, clearml, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire, jsonschema, ninja, pynrrd, pydicom, h5py, nni, optuna, onnx, onnxruntime] +[nibabel, skimage, pillow, tensorboard, gdown, ignite, torchvision, itk, tqdm, lmdb, psutil, cucim, openslide, pandas, einops, transformers, mlflow, clearml, matplotlib, tensorboardX, tifffile, imagecodecs, pyyaml, fire, jsonschema, ninja, pynrrd, pydicom, h5py, nni, optuna, onnx, onnxruntime, huggingface_hub] ``` which correspond to `nibabel`, `scikit-image`, `pillow`, `tensorboard`, -`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `clearml`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, `jsonschema`, `ninja`, `pynrrd`, `pydicom`, `h5py`, `nni`, `optuna`, `onnx`, `onnxruntime`, respectively. +`gdown`, `pytorch-ignite`, `torchvision`, `itk`, `tqdm`, `lmdb`, `psutil`, `cucim`, `openslide-python`, `pandas`, `einops`, `transformers`, `mlflow`, `clearml`, `matplotlib`, `tensorboardX`, `tifffile`, `imagecodecs`, `pyyaml`, `fire`, `jsonschema`, `ninja`, `pynrrd`, `pydicom`, `h5py`, `nni`, `optuna`, `onnx`, `onnxruntime`, `huggingface_hub`, respectively. - `pip install 'monai[all]'` installs all the optional dependencies. diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index b3e1174b3f..7cabc0cc2b 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -57,6 +57,7 @@ Checkpoint, has_ignite = optional_import("ignite.handlers", IgniteInfo.OPT_IMPORT_VERSION, min_version, "Checkpoint") requests_get, has_requests = optional_import("requests", name="get") onnx, _ = optional_import("onnx") +huggingface_hub, _ = optional_import("huggingface_hub") logger = get_logger(module_name=__name__) diff --git a/requirements-dev.txt b/requirements-dev.txt index f02aa48ba9..3f2973d005 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -51,3 +51,4 @@ git+https://github.com/Project-MONAI/MetricsReloaded@monai-support#egg=MetricsRe onnx>=1.13.0 onnxruntime; python_version <= '3.10' typeguard<3 # https://github.com/microsoft/nni/issues/5457 +huggingface_hub diff --git a/setup.cfg b/setup.cfg index 0670b11eec..029ea08513 100644 --- a/setup.cfg +++ b/setup.cfg @@ -79,6 +79,7 @@ all = optuna onnx>=1.13.0 onnxruntime; python_version <= '3.10' + huggingface_hub nibabel = nibabel ninja = @@ -145,6 +146,8 @@ onnx = # # workaround https://github.com/Project-MONAI/MONAI/issues/5882 # MetricsReloaded = # MetricsReloaded @ git+https://github.com/Project-MONAI/MetricsReloaded@monai-support#egg=MetricsReloaded +huggingface_hub = + huggingface_hub [flake8] select = B,C,E,F,N,P,T4,W,B9 From d23aacd11b10dcaebb6f2cf35c10fe70b6253d0c Mon Sep 17 00:00:00 2001 From: katielink Date: Mon, 1 May 2023 18:32:08 +0100 Subject: [PATCH 04/26] Add download from huggingface_hub functionality Signed-off-by: katielink --- monai/bundle/scripts.py | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 7cabc0cc2b..9379532465 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -19,7 +19,7 @@ from collections.abc import Mapping, Sequence from pathlib import Path from pydoc import locate -from shutil import copyfile +from shutil import copyfile, copytree, rmtree from textwrap import dedent from typing import Any, Callable @@ -193,6 +193,15 @@ def _download_from_ngc( extractall(filepath=filepath, output_dir=extract_path, has_base=True) +def _download_from_huggingface_hub(repo: str, download_path: str, filename: str) -> None: + if len(repo.split("/")) != 2: + raise ValueError("if source is `hf_hub`, repo should be in the form `repo_owner/repo_name`") + snapshot_folder = huggingface_hub.snapshot_download(repo_id=repo, cache_dir=download_path) + download_dir = os.path.join(download_path, filename) + copytree(snapshot_folder, download_dir, dirs_exist_ok=True) + rmtree(snapshot_folder) + + def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, list[str] | str] | Any | None: if source == "ngc": name = _add_ngc_prefix(name) @@ -248,6 +257,9 @@ def download( # Execute this module as a CLI entry, and download bundle from ngc with latest version: python -m monai.bundle download --name --source "ngc" --bundle_dir "./" + # Execute this module as a CLI entry, and download bundle from Hugging Face Hub: + python -m monai.bundle download --name "bundle_name" --source "huggingface_hub" --repo "repo_owner/repo_name" + # Execute this module as a CLI entry, and download bundle via URL: python -m monai.bundle download --name --url @@ -271,9 +283,10 @@ def download( Default is `bundle` subfolder under `torch.hub.get_dir()`. source: storage location name. This argument is used when `url` is `None`. In default, the value is achieved from the environment variable BUNDLE_DOWNLOAD_SRC, and - it should be "ngc" or "github". - repo: repo name. This argument is used when `url` is `None` and `source` is "github". - If used, it should be in the form of "repo_owner/repo_name/release_tag". + it should be "ngc", "github", or "huggingface_hub". + repo: repo name. This argument is used when `url` is `None` and `source` is "github" or "huggingface_hub". + If `source` is "github", it should be in the form of "repo_owner/repo_name/release_tag". + If `source` is "huggingface_hub", it should be in the form of "repo_owner/repo_name". url: url to download the data. If not `None`, data will be downloaded directly and `source` will not be checked. If `name` is `None`, filename is determined by `monai.apps.utils._basename(url)`. @@ -333,9 +346,17 @@ def download( remove_prefix=remove_prefix_, progress=progress_, ) + elif source_ == "huggingface_hub": + if name_ is None: + raise ValueError(f"To download from source: 'huggingface_hub', `name` must be provided, got {name_}.") + _download_from_huggingface_hub( + repo=repo_, + download_path=bundle_dir_, + filename=name_ + ) else: raise NotImplementedError( - f"Currently only download from `url`, source 'github' or 'ngc' are implemented, got source: {source_}." + f"Currently only download from `url`, source 'github', 'ngc', or 'huggingface_hub' are implemented, got source: {source_}." ) From 165d65941a2e17a1fdef6bc2d21845cfc10d8e0f Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 2 May 2023 18:05:55 +0100 Subject: [PATCH 05/26] Refactored downloading bundle Signed-off-by: katielink --- monai/bundle/scripts.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 9379532465..09b4c6cf62 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -19,7 +19,7 @@ from collections.abc import Mapping, Sequence from pathlib import Path from pydoc import locate -from shutil import copyfile, copytree, rmtree +from shutil import copyfile, copytree from textwrap import dedent from typing import Any, Callable @@ -193,13 +193,11 @@ def _download_from_ngc( extractall(filepath=filepath, output_dir=extract_path, has_base=True) -def _download_from_huggingface_hub(repo: str, download_path: str, filename: str) -> None: +def _download_from_huggingface_hub(repo: str, download_path: str, filename: str, version: str) -> None: if len(repo.split("/")) != 2: - raise ValueError("if source is `hf_hub`, repo should be in the form `repo_owner/repo_name`") - snapshot_folder = huggingface_hub.snapshot_download(repo_id=repo, cache_dir=download_path) - download_dir = os.path.join(download_path, filename) - copytree(snapshot_folder, download_dir, dirs_exist_ok=True) - rmtree(snapshot_folder) + raise ValueError("if source is `huggingface_hub`, repo should be in the form `repo_owner/repo_name`") + extract_path = os.path.join(download_path, filename) + huggingface_hub.snapshot_download(repo_id=repo, revision=version, local_dir=extract_path, local_dir_use_symlinks="auto") def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, list[str] | str] | Any | None: @@ -213,6 +211,10 @@ def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, l elif source == "github": repo_owner, repo_name, tag_name = repo.split("/") return get_bundle_versions(name, repo=f"{repo_owner}/{repo_name}", tag=tag_name)["latest_version"] + elif source == "huggingface_hub": + git_revisions = huggingface_hub.list_repo_refs(repo_id=f"{repo}/{name}", repo_type="model") + #TODO: implement this + return None else: raise ValueError(f"To get the latest bundle version, source should be 'github' or 'ngc', got {source}.") @@ -278,7 +280,7 @@ def download( "monai_brats_mri_segmentation" in ngc: https://catalog.ngc.nvidia.com/models?filters=&orderBy=scoreDESC&query=monai. version: version name of the target bundle to download, like: "0.1.0". If `None`, will download - the latest version. + the latest version. If `source` is "huggingface_hub", this argument is a Git revision id. bundle_dir: target directory to store the downloaded data. Default is `bundle` subfolder under `torch.hub.get_dir()`. source: storage location name. This argument is used when `url` is `None`. @@ -352,7 +354,8 @@ def download( _download_from_huggingface_hub( repo=repo_, download_path=bundle_dir_, - filename=name_ + filename=name_, + version=version_, ) else: raise NotImplementedError( @@ -387,7 +390,7 @@ def load( "monai_brats_mri_segmentation" in ngc: https://catalog.ngc.nvidia.com/models?filters=&orderBy=scoreDESC&query=monai. version: version name of the target bundle to download, like: "0.1.0". If `None`, will download - the latest version. + the latest version. If `source` is "huggingface_hub", this argument is a Git revision id. model_file: the relative path of the model weights or TorchScript module within bundle. If `None`, "models/model.pt" or "models/model.ts" will be used. load_ts_module: a flag to specify if loading the TorchScript module. @@ -396,9 +399,10 @@ def load( source: storage location name. This argument is used when `model_file` is not existing locally and need to be downloaded first. In default, the value is achieved from the environment variable BUNDLE_DOWNLOAD_SRC, and - it should be "ngc" or "github". - repo: repo name. This argument is used when `url` is `None` and `source` is "github". - If used, it should be in the form of "repo_owner/repo_name/release_tag". + it should be "ngc", "github", or "huggingface_hub". + repo: repo name. This argument is used when `url` is `None` and `source` is "github" or "huggingface_hub". + If `source` is "github", it should be in the form of "repo_owner/repo_name/release_tag". + If `source` is "huggingface_hub", it should be in the form of "repo_owner/repo_name". remove_prefix: This argument is used when `source` is "ngc". Currently, all ngc bundles have the ``monai_`` prefix, which is not existing in their model zoo contrasts. In order to maintain the consistency between these two sources, remove prefix is necessary. From e11e6a1d8ae15f01303064377fd1ef7c3ed8691c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 May 2023 17:08:05 +0000 Subject: [PATCH 06/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/bundle/scripts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 09b4c6cf62..366862c878 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -19,7 +19,7 @@ from collections.abc import Mapping, Sequence from pathlib import Path from pydoc import locate -from shutil import copyfile, copytree +from shutil import copyfile from textwrap import dedent from typing import Any, Callable @@ -212,7 +212,7 @@ def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, l repo_owner, repo_name, tag_name = repo.split("/") return get_bundle_versions(name, repo=f"{repo_owner}/{repo_name}", tag=tag_name)["latest_version"] elif source == "huggingface_hub": - git_revisions = huggingface_hub.list_repo_refs(repo_id=f"{repo}/{name}", repo_type="model") + huggingface_hub.list_repo_refs(repo_id=f"{repo}/{name}", repo_type="model") #TODO: implement this return None else: From 44b09aca8b0ad9b6135060b737b8c4b513fcdbf5 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 21 Jun 2023 14:07:51 -0400 Subject: [PATCH 07/26] Add initial push functionality --- monai/bundle/scripts.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 09b4c6cf62..f8c3ffa742 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1515,3 +1515,16 @@ def init_bundle( copyfile(str(ckpt_file), str(models_dir / "model.pt")) elif network is not None: save_state(network, str(models_dir / "model.pt")) + + +def push_to_hf_hub(bundle_dir: str, repo_name: str) -> None: + """ + Push the current bundle to the Hugging Face Hub. + + Args: + bundle_dir: path to the bundle directory to push + repo_name: name of the repo to create or push to + """ + hf_api = huggingface_hub.HfApi() + repo_id = hf_api.create_repo(name=repo_name, exist_ok=True) + return hf_api.upload_folder(path=bundle_dir, repo_id=repo_id) From de050334293463a5defd7475440882feaf93bb14 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 21 Jun 2023 14:15:31 -0400 Subject: [PATCH 08/26] Fix docstring Signed-off-by: katielink DCO Remediation Commit for katielink I, katielink , hereby add my Signed-off-by to this commit: b7d462d982ac3c590208bb2a7e2493fd888a0689 I, katielink , hereby add my Signed-off-by to this commit: be3e6784eb877577010678108510369c207fdc40 Signed-off-by: katielink --- monai/bundle/scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 913c563f8e..984003891e 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1523,7 +1523,7 @@ def push_to_hf_hub(bundle_dir: str, repo_name: str) -> None: Args: bundle_dir: path to the bundle directory to push - repo_name: name of the repo to create or push to + repo_name: name of the repo to create or push to the HF Hub """ hf_api = huggingface_hub.HfApi() repo_id = hf_api.create_repo(name=repo_name, exist_ok=True) From 12fc6c2e91d715e8abb2d35b8121e05092aed3da Mon Sep 17 00:00:00 2001 From: katielink Date: Sun, 16 Jul 2023 21:27:24 -0400 Subject: [PATCH 09/26] Style + naming updates Signed-off-by: katielink --- monai/bundle/__init__.py | 1 + monai/bundle/scripts.py | 18 +++++++----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/monai/bundle/__init__.py b/monai/bundle/__init__.py index e8ea9d62b0..e69fc80d89 100644 --- a/monai/bundle/__init__.py +++ b/monai/bundle/__init__.py @@ -24,6 +24,7 @@ init_bundle, load, onnx_export, + push_to_hf_hub, run, run_workflow, trt_export, diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 51cd044907..30c6aa7a87 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -193,11 +193,11 @@ def _download_from_ngc( extractall(filepath=filepath, output_dir=extract_path, has_base=True) -def _download_from_huggingface_hub(repo: str, download_path: str, filename: str, version: str) -> None: - if len(repo.split("/")) != 2: +def _download_from_huggingface_hub(repo_id: str, download_path: str, filename: str, version: str) -> None: + if len(repo_id.split("/")) != 2: raise ValueError("if source is `huggingface_hub`, repo should be in the form `repo_owner/repo_name`") extract_path = os.path.join(download_path, filename) - huggingface_hub.snapshot_download(repo_id=repo, revision=version, local_dir=extract_path, local_dir_use_symlinks="auto") + huggingface_hub.snapshot_download(repo_id=repo_id, revision=version, local_dir=extract_path) def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, list[str] | str] | Any | None: @@ -212,7 +212,7 @@ def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, l repo_owner, repo_name, tag_name = repo.split("/") return get_bundle_versions(name, repo=f"{repo_owner}/{repo_name}", tag=tag_name)["latest_version"] elif source == "huggingface_hub": - huggingface_hub.list_repo_refs(repo_id=f"{repo}/{name}", repo_type="model") + huggingface_hub.list_repo_refs(repo_id=repo, repo_type="model") #TODO: implement this return None else: @@ -351,12 +351,7 @@ def download( elif source_ == "huggingface_hub": if name_ is None: raise ValueError(f"To download from source: 'huggingface_hub', `name` must be provided, got {name_}.") - _download_from_huggingface_hub( - repo=repo_, - download_path=bundle_dir_, - filename=name_, - version=version_, - ) + _download_from_huggingface_hub(repo=repo_, download_path=bundle_dir_, filename=name_, version=version_) else: raise NotImplementedError( f"Currently only download from `url`, source 'github', 'ngc', or 'huggingface_hub' are implemented, got source: {source_}." @@ -1538,5 +1533,6 @@ def push_to_hf_hub(bundle_dir: str, repo_name: str) -> None: repo_name: name of the repo to create or push to the HF Hub """ hf_api = huggingface_hub.HfApi() - repo_id = hf_api.create_repo(name=repo_name, exist_ok=True) + repo_url = hf_api.create_repo(name=repo_name, exist_ok=True) + repo_id = repo_url.repo_id return hf_api.upload_folder(path=bundle_dir, repo_id=repo_id) From 393e0d0b16a5c4754bcb36f3ec88ac1c86141a4e Mon Sep 17 00:00:00 2001 From: katielink Date: Sun, 16 Jul 2023 23:02:57 -0400 Subject: [PATCH 10/26] Add exception for repo format for hf hub Signed-off-by: katielink --- monai/bundle/scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 30c6aa7a87..82b488d303 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -321,7 +321,7 @@ def download( bundle_dir_ = _process_bundle_dir(bundle_dir_) if repo_ is None: repo_ = "Project-MONAI/model-zoo/hosting_storage_v1" - if len(repo_.split("/")) != 3: + if len(repo_.split("/")) != 3 and source_ != "huggingface_hub": raise ValueError("repo should be in the form of `repo_owner/repo_name/release_tag`.") if url_ is not None: From 96ef680869afcb54540dd4e248fb3aa6066ea6b7 Mon Sep 17 00:00:00 2001 From: katielink Date: Mon, 14 Aug 2023 20:52:13 -0400 Subject: [PATCH 11/26] Refactor integration for better repo/bundle versioning Signed-off-by: katielink --- monai/bundle/scripts.py | 91 ++++++++++++++++++++++++++--------- tests/test_bundle_download.py | 33 +++++++++++++ 2 files changed, 101 insertions(+), 23 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 82b488d303..9f0dc5c2e0 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -193,13 +193,6 @@ def _download_from_ngc( extractall(filepath=filepath, output_dir=extract_path, has_base=True) -def _download_from_huggingface_hub(repo_id: str, download_path: str, filename: str, version: str) -> None: - if len(repo_id.split("/")) != 2: - raise ValueError("if source is `huggingface_hub`, repo should be in the form `repo_owner/repo_name`") - extract_path = os.path.join(download_path, filename) - huggingface_hub.snapshot_download(repo_id=repo_id, revision=version, local_dir=extract_path) - - def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, list[str] | str] | Any | None: if source == "ngc": name = _add_ngc_prefix(name) @@ -212,9 +205,13 @@ def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, l repo_owner, repo_name, tag_name = repo.split("/") return get_bundle_versions(name, repo=f"{repo_owner}/{repo_name}", tag=tag_name)["latest_version"] elif source == "huggingface_hub": - huggingface_hub.list_repo_refs(repo_id=repo, repo_type="model") - #TODO: implement this - return None + refs = huggingface_hub.list_repo_refs(repo_id=repo) + if len(refs.tags) > 0: + all_versions = [t.name for t in refs.tags] # git tags, not to be confused with `tag` + latest_version = ['latest_version' if 'latest_version' in all_versions else all_versions[-1]][0] + else: + latest_version = [b.name for b in refs.branches][0] # use the branch that was last updated + return latest_version else: raise ValueError(f"To get the latest bundle version, source should be 'github' or 'ngc', got {source}.") @@ -280,7 +277,7 @@ def download( "monai_brats_mri_segmentation" in ngc: https://catalog.ngc.nvidia.com/models?filters=&orderBy=scoreDESC&query=monai. version: version name of the target bundle to download, like: "0.1.0". If `None`, will download - the latest version. If `source` is "huggingface_hub", this argument is a Git revision id. + the latest version (or the last commit to the `main` branch in the case of Hugging Face Hub). bundle_dir: target directory to store the downloaded data. Default is `bundle` subfolder under `torch.hub.get_dir()`. source: storage location name. This argument is used when `url` is `None`. @@ -323,7 +320,8 @@ def download( repo_ = "Project-MONAI/model-zoo/hosting_storage_v1" if len(repo_.split("/")) != 3 and source_ != "huggingface_hub": raise ValueError("repo should be in the form of `repo_owner/repo_name/release_tag`.") - + elif len(repo_.split("/")) != 2 and source_ == "huggingface_hub": + raise ValueError("Hugging Face Hub repo should be in the form of `repo_owner/repo_name`") if url_ is not None: if name_ is not None: filepath = bundle_dir_ / f"{name_}.zip" @@ -349,9 +347,8 @@ def download( progress=progress_, ) elif source_ == "huggingface_hub": - if name_ is None: - raise ValueError(f"To download from source: 'huggingface_hub', `name` must be provided, got {name_}.") - _download_from_huggingface_hub(repo=repo_, download_path=bundle_dir_, filename=name_, version=version_) + extract_path = os.path.join(bundle_dir_, name_) + huggingface_hub.snapshot_download(repo_id=repo_, revision=version_, local_dir=extract_path) else: raise NotImplementedError( f"Currently only download from `url`, source 'github', 'ngc', or 'huggingface_hub' are implemented, got source: {source_}." @@ -1524,15 +1521,63 @@ def init_bundle( save_state(network, str(models_dir / "model.pt")) -def push_to_hf_hub(bundle_dir: str, repo_name: str) -> None: +def push_to_hf_hub( + repo: str, + bundle_name: str, + bundle_dir: str, + token: str | None = None, + private: bool | None = True, + branch_name: str | None = None, + tag_name: str | None = None, + **upload_folder_kwargs: Any, + ) -> str: """ - Push the current bundle to the Hugging Face Hub. + Push a MONAI bundle to the Hugging Face Hub. Args: - bundle_dir: path to the bundle directory to push - repo_name: name of the repo to create or push to the HF Hub + repo: namespace (user or organization) and a repo name separated by a /, e.g. `hf_username/bundle_name` + bundle_name: name of the bundle directory to push. + bundle_dir: path to the bundle directory. + token: Hugging Face authentication token. Default is `None` (will default to the stored token). + private: Private visibility of the repository on Hugging Face. Default is `True`. + branch_name: Name of branch. If branch does not exist, it will be created. Default is `None`. + tag_name: Name of tag. Default is `None`. + upload_folder_kwargs: Keyword arguments to pass to `HfApi.upload_folder`. + + Returns: + repo_url: URL of the Hugging Face repo """ - hf_api = huggingface_hub.HfApi() - repo_url = hf_api.create_repo(name=repo_name, exist_ok=True) - repo_id = repo_url.repo_id - return hf_api.upload_folder(path=bundle_dir, repo_id=repo_id) + # Connect to API and create repo + hf_api = huggingface_hub.HfApi(token=token) + hf_api.create_repo(repo_id=repo, private=private, exist_ok=True) + + # Create model card in bundle directory + new_modelcard_path = os.path.join(bundle_dir, bundle_name, "README.md") + modelcard_path = os.path.join(bundle_dir, bundle_name, "docs", "README.md") + if os.path.exists(modelcard_path): + # Copy README from old path if it exists + copyfile(modelcard_path, new_modelcard_path) + + # Create branch if branch_name is specified + if branch_name is not None: + huggingface_hub.create_branch( + repo_id=repo, + branch=branch_name, + exist_ok=True) + + # Upload bundle folder to repo + repo_url = hf_api.upload_folder( + repo_id=repo, + folder_path=os.path.join(bundle_dir, bundle_name), + revision=branch_name, + **upload_folder_kwargs) + + # Create tag if specified + if tag_name is not None: + hf_api.create_tag( + repo_id = repo, + tag = tag_name, + revision = branch_name, # if None, will default to `main` branch + exist_ok = True) + + return repo_url diff --git a/tests/test_bundle_download.py b/tests/test_bundle_download.py index 52aa515111..ab791988d7 100644 --- a/tests/test_bundle_download.py +++ b/tests/test_bundle_download.py @@ -15,6 +15,7 @@ import os import tempfile import unittest +from unittest.case import skipUnless import torch from parameterized import parameterized @@ -22,6 +23,7 @@ import monai.networks.nets as nets from monai.apps import check_hash from monai.bundle import ConfigParser, load +from monai.utils import optional_import from tests.utils import ( SkipIfBeforePyTorchVersion, assert_allclose, @@ -30,6 +32,8 @@ skip_if_quick, ) +_, has_huggingface_hub = optional_import("huggingface_hub") + TEST_CASE_1 = ["test_bundle", None] TEST_CASE_2 = ["test_bundle", "0.1.1"] @@ -58,6 +62,12 @@ "model.ts", ] +TEST_CASE_6 = [ + ["model.pt", "model.ts", "network.json", "test_output.pt", "test_input.pt"], + "test_bundle", + "monai-test/test_bundle", +] + class TestDownload(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -178,6 +188,29 @@ def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device, # test extra_file_dict self.assertTrue("network.json" in extra_file_dict.keys()) + @parameterized.expand([TEST_CASE_6]) + @skip_if_quick + @skipUnless(has_huggingface_hub, "Requires `huggingface_hub`.") + def test_hf_hub_download_bundle(self, bundle_files, bundle_name, repo): + with skip_if_downloading_fails(): + with tempfile.TemporaryDirectory() as tempdir: + cmd = [ + "coverage", + "run", + "-m", + "monai.bundle", + "download", + "--name", + bundle_name, + "--source", + "huggingface_hub", + ] + cmd += ["--bundle_dir", tempdir, "--repo", repo, "--progress", "False"] + command_line_tests(cmd) + for file in bundle_files: + file_path = os.path.join(tempdir, bundle_name, file) + self.assertTrue(os.path.exists(file_path)) + if __name__ == "__main__": unittest.main() From 697fbf1ca013be0b53d42ccbb92582137356eaa4 Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 19:19:17 -0400 Subject: [PATCH 12/26] Remove create_branch in push_to_hub flow for simplicity Signed-off-by: katielink --- monai/bundle/scripts.py | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 9f0dc5c2e0..41c0de7d03 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1527,8 +1527,8 @@ def push_to_hf_hub( bundle_dir: str, token: str | None = None, private: bool | None = True, - branch_name: str | None = None, - tag_name: str | None = None, + version_name: str | None = None, + tag_as_latest_version: bool | None = False, **upload_folder_kwargs: Any, ) -> str: """ @@ -1557,27 +1557,24 @@ def push_to_hf_hub( if os.path.exists(modelcard_path): # Copy README from old path if it exists copyfile(modelcard_path, new_modelcard_path) - - # Create branch if branch_name is specified - if branch_name is not None: - huggingface_hub.create_branch( - repo_id=repo, - branch=branch_name, - exist_ok=True) # Upload bundle folder to repo repo_url = hf_api.upload_folder( repo_id=repo, folder_path=os.path.join(bundle_dir, bundle_name), - revision=branch_name, **upload_folder_kwargs) - # Create tag if specified - if tag_name is not None: - hf_api.create_tag( - repo_id = repo, - tag = tag_name, - revision = branch_name, # if None, will default to `main` branch - exist_ok = True) + # Create version tag if specified + if version_name is not None: + hf_api.create_tag( + repo_id = repo, + tag = version_name, + exist_ok = True) + + if tag_as_latest_version: + hf_api.create_tag( + repo_id = repo, + tag = "latest_version", + exist_ok = True) return repo_url From 77ba1e105d7028ba5852f25b2d459801a46366ac Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 19:38:09 -0400 Subject: [PATCH 13/26] Update docstring for push_to_huggingface_hub Signed-off-by: katielink --- monai/bundle/scripts.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 41c0de7d03..192d7ba790 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1540,8 +1540,9 @@ def push_to_hf_hub( bundle_dir: path to the bundle directory. token: Hugging Face authentication token. Default is `None` (will default to the stored token). private: Private visibility of the repository on Hugging Face. Default is `True`. - branch_name: Name of branch. If branch does not exist, it will be created. Default is `None`. - tag_name: Name of tag. Default is `None`. + version_name: Name of the version tag to create. Default is `None` (no version tag is created). + tag_as_latest_version: Whether to tag the commit as `latest_version`. + This version will downloaded by default when using `bundle.download()`. Default is `False`. upload_folder_kwargs: Keyword arguments to pass to `HfApi.upload_folder`. Returns: @@ -1571,6 +1572,7 @@ def push_to_hf_hub( tag = version_name, exist_ok = True) + # Optionally tag as `latest_version` if tag_as_latest_version: hf_api.create_tag( repo_id = repo, From d210e41cb89096b686a58eb5ccb15fd72e0c661f Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 22:07:07 -0400 Subject: [PATCH 14/26] Formatting and minor updates Signed-off-by: katielink --- monai/bundle/scripts.py | 61 +++++++++++++++++++++------------ tests/test_bundle_download.py | 64 +++++++++++++++++------------------ 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 192d7ba790..4d35c2a312 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -208,7 +208,7 @@ def _get_latest_bundle_version(source: str, name: str, repo: str) -> dict[str, l refs = huggingface_hub.list_repo_refs(repo_id=repo) if len(refs.tags) > 0: all_versions = [t.name for t in refs.tags] # git tags, not to be confused with `tag` - latest_version = ['latest_version' if 'latest_version' in all_versions else all_versions[-1]][0] + latest_version = ["latest_version" if "latest_version" in all_versions else all_versions[-1]][0] else: latest_version = [b.name for b in refs.branches][0] # use the branch that was last updated return latest_version @@ -1521,16 +1521,41 @@ def init_bundle( save_state(network, str(models_dir / "model.pt")) +def _add_model_card_metadata(new_modelcard_path): + # Extract license from LICENSE file + license_path = os.path.join(os.path.dirname(new_modelcard_path), 'LICENSE') + if os.path.exists(license_path): + with open(license_path, 'r') as file: + content = file.read() + if 'Apache License' in content and 'Version 2.0' in content: + license_name = 'apache-2.0' + elif 'MIT License' in content: + license_name = 'mit' + else: + license_name = 'unknown' + # Add relevant tags + tags = '- monai\n- medical\nlibrary_name: monai\n' + # Create tag section + tag_content = f"---\ntags:\n{tags}license: {license_name}\n---" + + # Update model card + with open(new_modelcard_path, 'r') as file: + content = file.read() + new_content = tag_content + '\n' + content + with open(new_modelcard_path, 'w') as file: + file.write(new_content) + + def push_to_hf_hub( repo: str, - bundle_name: str, + name: str, bundle_dir: str, token: str | None = None, private: bool | None = True, - version_name: str | None = None, + version: str | None = None, tag_as_latest_version: bool | None = False, **upload_folder_kwargs: Any, - ) -> str: +) -> str: """ Push a MONAI bundle to the Hugging Face Hub. @@ -1541,7 +1566,7 @@ def push_to_hf_hub( token: Hugging Face authentication token. Default is `None` (will default to the stored token). private: Private visibility of the repository on Hugging Face. Default is `True`. version_name: Name of the version tag to create. Default is `None` (no version tag is created). - tag_as_latest_version: Whether to tag the commit as `latest_version`. + tag_as_latest_version: Whether to tag the commit as `latest_version`. This version will downloaded by default when using `bundle.download()`. Default is `False`. upload_folder_kwargs: Keyword arguments to pass to `HfApi.upload_folder`. @@ -1553,30 +1578,22 @@ def push_to_hf_hub( hf_api.create_repo(repo_id=repo, private=private, exist_ok=True) # Create model card in bundle directory - new_modelcard_path = os.path.join(bundle_dir, bundle_name, "README.md") - modelcard_path = os.path.join(bundle_dir, bundle_name, "docs", "README.md") + new_modelcard_path = os.path.join(bundle_dir, name, "README.md") + modelcard_path = os.path.join(bundle_dir, name, "docs", "README.md") if os.path.exists(modelcard_path): # Copy README from old path if it exists copyfile(modelcard_path, new_modelcard_path) - + _add_model_card_metadata(new_modelcard_path) + # Upload bundle folder to repo - repo_url = hf_api.upload_folder( - repo_id=repo, - folder_path=os.path.join(bundle_dir, bundle_name), - **upload_folder_kwargs) + repo_url = hf_api.upload_folder(repo_id=repo, folder_path=os.path.join(bundle_dir, name), **upload_folder_kwargs) # Create version tag if specified - if version_name is not None: - hf_api.create_tag( - repo_id = repo, - tag = version_name, - exist_ok = True) - + if version is not None: + hf_api.create_tag(repo_id=repo, tag=version, exist_ok=True) + # Optionally tag as `latest_version` if tag_as_latest_version: - hf_api.create_tag( - repo_id = repo, - tag = "latest_version", - exist_ok = True) + hf_api.create_tag(repo_id=repo, tag="latest_version", exist_ok=True) return repo_url diff --git a/tests/test_bundle_download.py b/tests/test_bundle_download.py index ab791988d7..8aacb29508 100644 --- a/tests/test_bundle_download.py +++ b/tests/test_bundle_download.py @@ -46,6 +46,12 @@ ] TEST_CASE_4 = [ + ["model.pt", "model.ts", "network.json", "test_output.pt", "test_input.pt"], + "test_bundle", + "monai-test/test_bundle", +] + +TEST_CASE_5 = [ ["model.pt", "model.ts", "network.json", "test_output.pt", "test_input.pt"], "test_bundle", "Project-MONAI/MONAI-extra-test-data/0.8.1", @@ -53,7 +59,7 @@ "model.pt", ] -TEST_CASE_5 = [ +TEST_CASE_6 = [ ["test_output.pt", "test_input.pt"], "test_bundle", "0.1.1", @@ -62,12 +68,6 @@ "model.ts", ] -TEST_CASE_6 = [ - ["model.pt", "model.ts", "network.json", "test_output.pt", "test_input.pt"], - "test_bundle", - "monai-test/test_bundle", -] - class TestDownload(unittest.TestCase): @parameterized.expand([TEST_CASE_1, TEST_CASE_2]) @@ -109,9 +109,32 @@ def test_url_download_bundle(self, bundle_files, bundle_name, url, hash_val): if file == "network.json": self.assertTrue(check_hash(filepath=file_path, val=hash_val)) + @parameterized.expand([TEST_CASE_4]) + @skip_if_quick + @skipUnless(has_huggingface_hub, "Requires `huggingface_hub`.") + def test_hf_hub_download_bundle(self, bundle_files, bundle_name, repo): + with skip_if_downloading_fails(): + with tempfile.TemporaryDirectory() as tempdir: + cmd = [ + "coverage", + "run", + "-m", + "monai.bundle", + "download", + "--name", + bundle_name, + "--source", + "huggingface_hub", + ] + cmd += ["--bundle_dir", tempdir, "--repo", repo, "--progress", "False"] + command_line_tests(cmd) + for file in bundle_files: + file_path = os.path.join(tempdir, bundle_name, file) + self.assertTrue(os.path.exists(file_path)) + class TestLoad(unittest.TestCase): - @parameterized.expand([TEST_CASE_4]) + @parameterized.expand([TEST_CASE_5]) @skip_if_quick def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file): with skip_if_downloading_fails(): @@ -158,7 +181,7 @@ def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file) output_2 = model_2.forward(input_tensor) assert_allclose(output_2, expected_output, atol=1e-4, rtol=1e-4, type_test=False) - @parameterized.expand([TEST_CASE_5]) + @parameterized.expand([TEST_CASE_6]) @skip_if_quick @SkipIfBeforePyTorchVersion((1, 7, 1)) def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device, model_file): @@ -188,29 +211,6 @@ def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device, # test extra_file_dict self.assertTrue("network.json" in extra_file_dict.keys()) - @parameterized.expand([TEST_CASE_6]) - @skip_if_quick - @skipUnless(has_huggingface_hub, "Requires `huggingface_hub`.") - def test_hf_hub_download_bundle(self, bundle_files, bundle_name, repo): - with skip_if_downloading_fails(): - with tempfile.TemporaryDirectory() as tempdir: - cmd = [ - "coverage", - "run", - "-m", - "monai.bundle", - "download", - "--name", - bundle_name, - "--source", - "huggingface_hub", - ] - cmd += ["--bundle_dir", tempdir, "--repo", repo, "--progress", "False"] - command_line_tests(cmd) - for file in bundle_files: - file_path = os.path.join(tempdir, bundle_name, file) - self.assertTrue(os.path.exists(file_path)) - if __name__ == "__main__": unittest.main() From 995057394ba070b492434fc85234296945b8c215 Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 22:21:16 -0400 Subject: [PATCH 15/26] Add push_to_hf_hub usage example and function to docs Signed-off-by: katielink --- docs/source/bundle.rst | 1 + monai/bundle/scripts.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/docs/source/bundle.rst b/docs/source/bundle.rst index 94ca8813f4..0940c005ba 100644 --- a/docs/source/bundle.rst +++ b/docs/source/bundle.rst @@ -48,3 +48,4 @@ Model Bundle .. autofunction:: verify_metadata .. autofunction:: verify_net_in_out .. autofunction:: init_bundle +.. autofunction:: push_to_huggingface_hub diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 4d35c2a312..dca98e6412 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1559,6 +1559,13 @@ def push_to_hf_hub( """ Push a MONAI bundle to the Hugging Face Hub. + Typical usage examples: + + .. code-block:: bash + + python -m monai.bundle push_to_hf_hub --repo --name \ + --bundle_dir --version ... + Args: repo: namespace (user or organization) and a repo name separated by a /, e.g. `hf_username/bundle_name` bundle_name: name of the bundle directory to push. From f80734c8073cbc76876be05463b80746194bc5d7 Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 22:28:27 -0400 Subject: [PATCH 16/26] Formatting Signed-off-by: katielink --- monai/bundle/scripts.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index dca98e6412..21bbf8121a 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1523,26 +1523,26 @@ def init_bundle( def _add_model_card_metadata(new_modelcard_path): # Extract license from LICENSE file - license_path = os.path.join(os.path.dirname(new_modelcard_path), 'LICENSE') + license_path = os.path.join(os.path.dirname(new_modelcard_path), "LICENSE") if os.path.exists(license_path): - with open(license_path, 'r') as file: + with open(license_path, "r") as file: content = file.read() - if 'Apache License' in content and 'Version 2.0' in content: - license_name = 'apache-2.0' - elif 'MIT License' in content: - license_name = 'mit' + if "Apache License" in content and "Version 2.0" in content: + license_name = "apache-2.0" + elif "MIT License" in content: + license_name = "mit" else: - license_name = 'unknown' + license_name = "unknown" # Add relevant tags - tags = '- monai\n- medical\nlibrary_name: monai\n' + tags = "- monai\n- medical\nlibrary_name: monai\n" # Create tag section tag_content = f"---\ntags:\n{tags}license: {license_name}\n---" # Update model card - with open(new_modelcard_path, 'r') as file: + with open(new_modelcard_path, "r") as file: content = file.read() - new_content = tag_content + '\n' + content - with open(new_modelcard_path, 'w') as file: + new_content = tag_content + "\n" + content + with open(new_modelcard_path, "w") as file: file.write(new_content) From 9698039fcaa9adc067a2815e10a656d9aa5f39b4 Mon Sep 17 00:00:00 2001 From: katielink Date: Tue, 15 Aug 2023 23:10:51 -0400 Subject: [PATCH 17/26] Fix bug Signed-off-by: katielink --- monai/bundle/scripts.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 21bbf8121a..4193f41150 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1523,6 +1523,7 @@ def init_bundle( def _add_model_card_metadata(new_modelcard_path): # Extract license from LICENSE file + license_name = "unknown" license_path = os.path.join(os.path.dirname(new_modelcard_path), "LICENSE") if os.path.exists(license_path): with open(license_path, "r") as file: @@ -1531,8 +1532,6 @@ def _add_model_card_metadata(new_modelcard_path): license_name = "apache-2.0" elif "MIT License" in content: license_name = "mit" - else: - license_name = "unknown" # Add relevant tags tags = "- monai\n- medical\nlibrary_name: monai\n" # Create tag section From aac46791400b9435951c9cbfb5fdfb2556de00fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 16 Aug 2023 13:01:16 +0000 Subject: [PATCH 18/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/bundle/scripts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index de3837f3b3..81a278ee8b 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1564,7 +1564,7 @@ def _add_model_card_metadata(new_modelcard_path): license_name = "unknown" license_path = os.path.join(os.path.dirname(new_modelcard_path), "LICENSE") if os.path.exists(license_path): - with open(license_path, "r") as file: + with open(license_path) as file: content = file.read() if "Apache License" in content and "Version 2.0" in content: license_name = "apache-2.0" @@ -1576,7 +1576,7 @@ def _add_model_card_metadata(new_modelcard_path): tag_content = f"---\ntags:\n{tags}license: {license_name}\n---" # Update model card - with open(new_modelcard_path, "r") as file: + with open(new_modelcard_path) as file: content = file.read() new_content = tag_content + "\n" + content with open(new_modelcard_path, "w") as file: From eff08bc835ffea746cc35447e2abf5bdb4126359 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 18 Oct 2023 19:07:31 -0400 Subject: [PATCH 19/26] Add push to hub test Signed-off-by: katielink --- tests/test_bundle_push_to_hf_hub.py | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/test_bundle_push_to_hf_hub.py diff --git a/tests/test_bundle_push_to_hf_hub.py b/tests/test_bundle_push_to_hf_hub.py new file mode 100644 index 0000000000..e661f05fe7 --- /dev/null +++ b/tests/test_bundle_push_to_hf_hub.py @@ -0,0 +1,42 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import tempfile +import unittest +from unittest.case import skipUnless +from unittest.mock import patch + +from parameterized import parameterized + +from monai.bundle import push_to_hf_hub +from monai.utils import optional_import +from tests.utils import skip_if_quick + +huggingface_hub, has_huggingface_hub = optional_import("huggingface_hub") + +TEST_CASE_1 = ["monai-test/test_bundle_push", "test_bundle"] + + +class TestPushToHuggingFaceHub(unittest.TestCase): + @parameterized.expand([TEST_CASE_1]) + @skip_if_quick + @skipUnless(has_huggingface_hub, "Requires `huggingface_hub` package.") + @patch.object(huggingface_hub.HfApi, "create_repo") + @patch.object(huggingface_hub.HfApi, "upload_folder") + @patch.object(huggingface_hub.HfApi, "create_tag") + def test_push_to_huggingface_hub(self, repo, bundle_name, test_createrepo, test_uploadfolder, test_createtag): + test_uploadfolder.return_value = "https://hf.co/repo/test" + with tempfile.TemporaryDirectory() as tempdir: + repo_url = push_to_hf_hub(repo, bundle_name, tempdir) + self.assertEqual("https://hf.co/repo/test", repo_url) From 724269c1260f6adad4403f2c0c1f0b6ef7c563ce Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 23:34:53 +0000 Subject: [PATCH 20/26] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- monai/bundle/scripts.py | 1 - tests/test_bundle_push_to_hf_hub.py | 1 - 2 files changed, 2 deletions(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 57c32203cc..8ae5f1a789 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1794,4 +1794,3 @@ def download_large_files(bundle_path: str | None = None, large_file_name: str | lf_data["filepath"] = os.path.join(bundle_path, lf_data["path"]) lf_data.pop("path") download_url(**lf_data) - diff --git a/tests/test_bundle_push_to_hf_hub.py b/tests/test_bundle_push_to_hf_hub.py index e661f05fe7..375c5d81e8 100644 --- a/tests/test_bundle_push_to_hf_hub.py +++ b/tests/test_bundle_push_to_hf_hub.py @@ -11,7 +11,6 @@ from __future__ import annotations -import os import tempfile import unittest from unittest.case import skipUnless From 1570cb91b599066917d0edc8cddbea0a8c757d26 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 18 Oct 2023 19:57:22 -0400 Subject: [PATCH 21/26] Fix lint errors Signed-off-by: katielink --- monai/bundle/scripts.py | 1 + tests/test_bundle_download.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index 8ae5f1a789..efa9e1fef8 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1614,6 +1614,7 @@ def init_bundle( elif network is not None: save_state(network, str(models_dir / "model.pt")) + def _add_model_card_metadata(new_modelcard_path): # Extract license from LICENSE file license_name = "unknown" diff --git a/tests/test_bundle_download.py b/tests/test_bundle_download.py index eadc884178..fa96c6f28d 100644 --- a/tests/test_bundle_download.py +++ b/tests/test_bundle_download.py @@ -23,8 +23,8 @@ import monai.networks.nets as nets from monai.apps import check_hash -from monai.utils import optional_import from monai.bundle import ConfigParser, create_workflow, load +from monai.utils import optional_import from tests.utils import ( SkipIfBeforePyTorchVersion, assert_allclose, From 6a848be9989414edcc493523db8e10e8f9595940 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 18 Oct 2023 19:59:46 -0400 Subject: [PATCH 22/26] Add test to exclude_cases Signed-off-by: katielink --- tests/min_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/min_tests.py b/tests/min_tests.py index c2b7536282..8128bb7b84 100644 --- a/tests/min_tests.py +++ b/tests/min_tests.py @@ -37,6 +37,7 @@ def run_testsuit(): "test_auto3dseg", "test_bundle_onnx_export", "test_bundle_trt_export", + "test_bundle_push_to_hf_hub", "test_cachedataset", "test_cachedataset_parallel", "test_cachedataset_persistent_workers", From 654b1d8eb925243fe54416e394401c5f28384456 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 18 Oct 2023 20:22:49 -0400 Subject: [PATCH 23/26] Fix mypy error Signed-off-by: katielink --- monai/bundle/scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/monai/bundle/scripts.py b/monai/bundle/scripts.py index efa9e1fef8..20a491e493 100644 --- a/monai/bundle/scripts.py +++ b/monai/bundle/scripts.py @@ -1648,7 +1648,7 @@ def push_to_hf_hub( version: str | None = None, tag_as_latest_version: bool | None = False, **upload_folder_kwargs: Any, -) -> str: +) -> Any: """ Push a MONAI bundle to the Hugging Face Hub. From 492bb5977552b956d379ff73f386ec5e32021fd0 Mon Sep 17 00:00:00 2001 From: katielink Date: Wed, 18 Oct 2023 20:25:16 -0400 Subject: [PATCH 24/26] Fix docs error Signed-off-by: katielink --- docs/source/bundle.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/bundle.rst b/docs/source/bundle.rst index 1659b5186b..4e3a32b6fe 100644 --- a/docs/source/bundle.rst +++ b/docs/source/bundle.rst @@ -48,5 +48,5 @@ Model Bundle .. autofunction:: verify_metadata .. autofunction:: verify_net_in_out .. autofunction:: init_bundle -.. autofunction:: push_to_huggingface_hub +.. autofunction:: push_to_hf_hub .. autofunction:: update_kwargs From 6646ac37b25df497d5046bea035421baed0850c9 Mon Sep 17 00:00:00 2001 From: Katie Link Date: Thu, 19 Oct 2023 09:22:05 -0400 Subject: [PATCH 25/26] Update requirements-dev.txt Co-authored-by: Wenqi Li <831580+wyli@users.noreply.github.com> Signed-off-by: Katie Link --- requirements-dev.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 5020953e1a..38715b8449 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -53,7 +53,6 @@ onnx>=1.13.0 onnxruntime; python_version <= '3.10' typeguard<3 # https://github.com/microsoft/nni/issues/5457 filelock!=3.12.0 # https://github.com/microsoft/nni/issues/5523 -huggingface_hub zarr lpips==0.1.4 nvidia-ml-py From e63cfdaa18b6ecdb7e45ba87e23ad39787f6be72 Mon Sep 17 00:00:00 2001 From: Katie Link Date: Thu, 19 Oct 2023 09:22:14 -0400 Subject: [PATCH 26/26] Update setup.cfg Co-authored-by: Wenqi Li <831580+wyli@users.noreply.github.com> Signed-off-by: Katie Link --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index f3b5646102..d6c9b4f190 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,7 +80,6 @@ all = optuna onnx>=1.13.0 onnxruntime; python_version <= '3.10' - huggingface_hub zarr lpips==0.1.4 nvidia-ml-py