From fa0da2ca8d18eda25d3c88941a77ee66818da6fc Mon Sep 17 00:00:00 2001 From: amandarichardsonn <30413257+amandarichardsonn@users.noreply.github.com> Date: Thu, 28 Mar 2024 15:30:23 -0700 Subject: [PATCH] Promote Build Device Option to Enum (#527) Removing instances of ["CPU","GPU"] with a `Device` Enum. [ reviewed by @MattToast ] [ committed by @amandarichardsonn ] --- doc/changelog.rst | 3 ++ smartsim/_core/_cli/build.py | 33 +++++++-------- smartsim/_core/_cli/validate.py | 33 ++++++++------- smartsim/_core/_install/builder.py | 65 ++++++++++++++++------------- smartsim/entity/dbobject.py | 19 +++++---- smartsim/entity/ensemble.py | 7 ++-- smartsim/entity/model.py | 7 ++-- tests/backends/test_cli_mini_exp.py | 3 +- tests/install/test_builder.py | 20 ++++++--- 9 files changed, 105 insertions(+), 85 deletions(-) diff --git a/doc/changelog.rst b/doc/changelog.rst index 065b6623d..2cebc120e 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -18,6 +18,7 @@ To be released at some future point in time Description +- Promote device options to an Enum - Update telemetry monitor, add telemetry collectors - Add method to specify node features for a Slurm job - Colo Orchestrator setup now blocks application start until setup finished @@ -33,6 +34,7 @@ Description Detailed Notes +- Promote devices to a dedicated Enum type throughout the SmartSim code base. - Update the telemetry monitor to enable retrieval of metrics on a scheduled interval. Switch basic experiment tracking telemetry to default to on. Add database metric collectors. Improve telemetry monitor logging. Create @@ -70,6 +72,7 @@ Detailed Notes - Remove previously deprecated behavior present in test suite on machines with Slurm and Open MPI. (SmartSim-PR520_) +.. _SmartSim-PR498: https://github.com/CrayLabs/SmartSim/pull/498 .. _SmartSim-PR460: https://github.com/CrayLabs/SmartSim/pull/460 .. _SmartSim-PR512: https://github.com/CrayLabs/SmartSim/pull/512 .. _SmartSim-PR529: https://github.com/CrayLabs/SmartSim/pull/529 diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py index b2ff61a99..08a1a6138 100644 --- a/smartsim/_core/_cli/build.py +++ b/smartsim/_core/_cli/build.py @@ -43,7 +43,7 @@ VersionConflictError, Versioner, ) -from smartsim._core._install.builder import BuildError +from smartsim._core._install.builder import BuildError, Device from smartsim._core.config import CONFIG from smartsim._core.utils.helpers import installed_redisai_backends from smartsim.error import SSConfigError @@ -54,8 +54,6 @@ # NOTE: all smartsim modules need full paths as the smart cli # may be installed into a different directory. - -_TDeviceStr = t.Literal["cpu", "gpu"] _TPinningStr = t.Literal["==", "!=", ">=", ">", "<=", "<", "~="] @@ -134,7 +132,7 @@ def build_database( def build_redis_ai( build_env: BuildEnv, versions: Versioner, - device: _TDeviceStr, + device: Device, use_torch: bool = True, use_tf: bool = True, use_onnx: bool = False, @@ -143,7 +141,7 @@ def build_redis_ai( verbose: bool = False, ) -> None: # make sure user isn't trying to do something silly on MacOS - if build_env.PLATFORM == "darwin" and device == "gpu": + if build_env.PLATFORM == "darwin" and device == Device.GPU: raise BuildError("SmartSim does not support GPU on MacOS") # decide which runtimes to build @@ -154,7 +152,7 @@ def build_redis_ai( ["ONNX", versions.ONNX, color_bool(use_onnx)], ] print(tabulate(backends_table, tablefmt="fancy_outline"), end="\n\n") - print(f"Building for GPU support: {color_bool(device == 'gpu')}\n") + print(f"Building for GPU support: {color_bool(device == Device.GPU)}\n") if not check_backends_install(): sys.exit(1) @@ -195,7 +193,7 @@ def build_redis_ai( else: # get the build environment, update with CUDNN env vars # if present and building for GPU, otherwise warn the user - if device == "gpu": + if device == Device.GPU: gpu_env = build_env.get_cudnn_env() cudnn_env_vars = [ "CUDNN_LIBRARY", @@ -226,18 +224,16 @@ def build_redis_ai( logger.info("ML Backends and RedisAI build complete!") -def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu") -> None: +def check_py_torch_version(versions: Versioner, device: Device = Device.CPU) -> None: """Check Python environment for TensorFlow installation""" - - device = device_in.lower() if BuildEnv.is_macos(): - if device == "gpu": + if device == Device.GPU: raise BuildError("SmartSim does not support GPU on MacOS") device_suffix = "" else: # linux - if device == "cpu": + if device == Device.CPU: device_suffix = versions.TORCH_CPU_SUFFIX - elif device == "gpu": + elif device == Device.GPU: device_suffix = versions.TORCH_CUDA_SUFFIX else: raise BuildError("Unrecognized device requested") @@ -261,7 +257,9 @@ def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu") "Torch version not found in python environment. " "Attempting to install via `pip`" ) - wheel_device = device if device == "cpu" else device_suffix.replace("+", "") + wheel_device = ( + device.value if device == Device.CPU else device_suffix.replace("+", "") + ) pip( "install", "--extra-index-url", @@ -363,8 +361,7 @@ def execute( ) -> int: verbose = args.v keydb = args.keydb - device: _TDeviceStr = args.device - + device = Device(args.device.lower()) # torch and tf build by default pt = not args.no_pt # pylint: disable=invalid-name tf = not args.no_tf # pylint: disable=invalid-name @@ -453,8 +450,8 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--device", type=str.lower, - default="cpu", - choices=["cpu", "gpu"], + default=Device.CPU.value, + choices=[device.value for device in Device], help="Device to build ML runtimes for", ) parser.add_argument( diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py index 0606a7735..8c4cb3c8c 100644 --- a/smartsim/_core/_cli/validate.py +++ b/smartsim/_core/_cli/validate.py @@ -40,6 +40,7 @@ from smartsim import Experiment from smartsim._core._cli.utils import SMART_LOGGER_FORMAT +from smartsim._core._install.builder import Device from smartsim._core.utils.helpers import installed_redisai_backends from smartsim.log import get_logger @@ -61,9 +62,6 @@ _TemporaryDirectory = tempfile.TemporaryDirectory -_TCapitalDeviceStr = t.Literal["CPU", "GPU"] - - class _VerificationTempDir(_TemporaryDirectory): """A Temporary directory to be used as a context manager that will only clean itself up if no error is raised within its context @@ -88,7 +86,7 @@ def execute( simple experiment """ backends = installed_redisai_backends() - device: _TCapitalDeviceStr = args.device.upper() + device: Device = Device(args.device) try: with contextlib.ExitStack() as ctx: temp_dir = ctx.enter_context(_VerificationTempDir(dir=os.getcwd())) @@ -98,7 +96,7 @@ def execute( "SR_LOG_FILE", os.path.join(temp_dir, "smartredis.log") ), } - if device == "GPU": + if device == Device.GPU: validate_env["CUDA_VISIBLE_DEVICES"] = "0" ctx.enter_context(_env_vars_set_to(validate_env)) test_install( @@ -136,8 +134,8 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: parser.add_argument( "--device", type=str.lower, - default="cpu", - choices=["cpu", "gpu"], + default=Device.CPU.value, + choices=[device.value for device in Device], help="Device to test the ML backends against", ) @@ -145,7 +143,7 @@ def configure_parser(parser: argparse.ArgumentParser) -> None: def test_install( location: str, port: t.Optional[int], - device: _TCapitalDeviceStr, + device: Device, with_tf: bool, with_pt: bool, with_onnx: bool, @@ -214,7 +212,7 @@ def _find_free_port() -> int: return int(port) -def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -> None: +def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None: recv_conn, send_conn = mp.Pipe(duplex=False) # Build the model in a subproc so that keras does not hog the gpu proc = mp.Process(target=_build_tf_frozen_model, args=(send_conn, tmp_dir)) @@ -236,7 +234,12 @@ def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) - ) from e client.set_model_from_file( - "keras-fcn", model_path, "TF", device=device, inputs=inputs, outputs=outputs + "keras-fcn", + model_path, + "TF", + device=device.value.upper(), + inputs=inputs, + outputs=outputs, ) client.put_tensor("keras-input", np.random.rand(1, 28, 28).astype(np.float32)) client.run_model("keras-fcn", inputs=["keras-input"], outputs=["keras-output"]) @@ -264,7 +267,7 @@ def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None: conn.send((model_path, inputs, outputs)) -def _test_torch_install(client: Client, device: _TCapitalDeviceStr) -> None: +def _test_torch_install(client: Client, device: Device) -> None: import torch from torch import nn @@ -276,7 +279,7 @@ def __init__(self) -> None: def forward(self, x: torch.Tensor) -> torch.Tensor: return self.conv(x) - if device == "GPU": + if device == Device.GPU: device_ = torch.device("cuda") else: device_ = torch.device("cpu") @@ -292,13 +295,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: torch.jit.save(traced, buffer) # type: ignore[no-untyped-call] model = buffer.getvalue() - client.set_model("torch-nn", model, backend="TORCH", device=device) + client.set_model("torch-nn", model, backend="TORCH", device=device.value.upper()) client.put_tensor("torch-in", torch.rand(1, 1, 3, 3).numpy()) client.run_model("torch-nn", inputs=["torch-in"], outputs=["torch-out"]) client.get_tensor("torch-out") -def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None: +def _test_onnx_install(client: Client, device: Device) -> None: from skl2onnx import to_onnx from sklearn.cluster import KMeans @@ -311,7 +314,7 @@ def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None: sample = np.arange(20, dtype=np.float32).reshape(10, 2) client.put_tensor("onnx-input", sample) - client.set_model("onnx-kmeans", model, "ONNX", device=device) + client.set_model("onnx-kmeans", model, "ONNX", device=device.value.upper()) client.run_model( "onnx-kmeans", inputs=["onnx-input"], outputs=["onnx-labels", "onnx-transform"] ) diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py index c098cfd01..47f12d044 100644 --- a/smartsim/_core/_install/builder.py +++ b/smartsim/_core/_install/builder.py @@ -53,7 +53,7 @@ # TODO: check cmake version and use system if possible to avoid conflicts TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"] -TDeviceStr = t.Literal["cpu", "gpu"] + _T = t.TypeVar("_T") _U = t.TypeVar("_U") @@ -96,6 +96,11 @@ def from_str(cls, string: str, /) -> "Architecture": raise BuildError(f"Unrecognized or unsupported architecture: {string}") +class Device(enum.Enum): + CPU = "cpu" + GPU = "gpu" + + class OperatingSystem(enum.Enum): LINUX = ("linux", "linux2") DARWIN = ("darwin",) @@ -173,7 +178,7 @@ def is_built(self) -> bool: raise NotImplementedError def build_from_git( - self, git_url: str, branch: str, device: TDeviceStr = "cpu" + self, git_url: str, branch: str, device: Device = Device.CPU ) -> None: raise NotImplementedError @@ -274,7 +279,7 @@ def is_built(self) -> bool: return redis_files.issubset(bin_files) or keydb_files.issubset(bin_files) def build_from_git( - self, git_url: str, branch: str, device: TDeviceStr = "cpu" + self, git_url: str, branch: str, device: Device = Device.CPU ) -> None: """Build Redis from git :param git_url: url from which to retrieve Redis @@ -480,7 +485,7 @@ def build_onnx(self) -> bool: def fetch_onnx(self) -> bool: return self.build_onnx - def get_deps_dir_path_for(self, device: TDeviceStr) -> Path: + def get_deps_dir_path_for(self, device: Device) -> Path: def fail_to_format(reason: str) -> BuildError: # pragma: no cover return BuildError(f"Failed to format RedisAI dependency path: {reason}") @@ -497,10 +502,10 @@ def fail_to_format(reason: str) -> BuildError: # pragma: no cover arch = "arm64v8" else: # pragma: no cover raise fail_to_format(f"Unknown architecture: {architecture}") - return self.rai_build_path / f"deps/{os_}-{arch}-{device}" + return self.rai_build_path / f"deps/{os_}-{arch}-{device.value}" def _get_deps_to_fetch_for( - self, device: TDeviceStr + self, device: Device ) -> t.Tuple[_RAIBuildDependency, ...]: os_, arch = self._platform # TODO: It would be nice if the backend version numbers were declared @@ -521,14 +526,14 @@ def _get_deps_to_fetch_for( return tuple(fetchable_deps) - def symlink_libtf(self, device: str) -> None: + def symlink_libtf(self, device: Device) -> None: """Add symbolic link to available libtensorflow in RedisAI deps. :param device: cpu or gpu :type device: str """ rai_deps_path = sorted( - self.rai_build_path.glob(os.path.join("deps", f"*{device}*")) + self.rai_build_path.glob(os.path.join("deps", f"*{device.value}*")) ) if not rai_deps_path: raise FileNotFoundError("Could not find RedisAI 'deps' directory") @@ -577,7 +582,7 @@ def symlink_libtf(self, device: str) -> None: os.symlink(src_file, dst_file) def build_from_git( - self, git_url: str, branch: str, device: TDeviceStr = "cpu" + self, git_url: str, branch: str, device: Device = Device.CPU ) -> None: """Build RedisAI from git @@ -616,14 +621,14 @@ def build_from_git( self.run_command(clone_cmd, out=subprocess.DEVNULL, cwd=self.build_dir) self._fetch_deps_for(device) - if self.libtf_dir and device: + if self.libtf_dir and device.value: self.symlink_libtf(device) build_cmd = self._rai_build_env_prefix( with_pt=self.build_torch, with_tf=self.build_tf, with_ort=self.build_onnx, - extra_env={"GPU": "1" if device == "gpu" else "0"}, + extra_env={"GPU": "1" if device == Device.GPU else "0"}, ) if self.torch_dir: @@ -674,7 +679,7 @@ def _rai_build_env_prefix( *(f"{key}={val}" for key, val in extra_env.items()), ] - def _fetch_deps_for(self, device: TDeviceStr) -> None: + def _fetch_deps_for(self, device: Device) -> None: if not self.rai_build_path.is_dir(): raise BuildError("RedisAI build directory not found") @@ -693,13 +698,13 @@ def _fetch_deps_for(self, device: TDeviceStr) -> None: f"found {len(unique_placed_paths)}" ) - def _install_backends(self, device: str) -> None: + def _install_backends(self, device: Device) -> None: """Move backend libraries to smartsim/_core/lib/ :param device: cpu or cpu :type device: str """ self.rai_install_path = self.rai_build_path.joinpath( - f"install-{device}" + f"install-{device.value}" ).resolve() rai_lib = self.rai_install_path / "redisai.so" rai_backends = self.rai_install_path / "backends" @@ -833,7 +838,7 @@ def _extract_download( @dataclass(frozen=True) class _PTArchive(_WebZip, _RAIBuildDependency): architecture: Architecture - device: TDeviceStr + device: Device version: str @staticmethod @@ -865,10 +870,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: @property def url(self) -> str: - if self.device == "gpu": + if self.device == Device.GPU: pt_build = "cu117" else: - pt_build = "cpu" + pt_build = Device.CPU.value # pylint: disable-next=line-too-long libtorch_archive = ( f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip" @@ -887,10 +892,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]: @property def url(self) -> str: - if self.device == "gpu": + if self.device == Device.GPU: raise BuildError("RedisAI does not currently support GPU on Mac OSX") if self.architecture == Architecture.X64: - pt_build = "cpu" + pt_build = Device.CPU.value libtorch_archive = f"libtorch-macos-{self.version}.zip" root_url = "https://download.pytorch.org/libtorch" return f"{root_url}/{pt_build}/{libtorch_archive}" @@ -902,7 +907,7 @@ def url(self) -> str: ) return f"{root_url}/{libtorch_archive}" - raise BuildError("Unsupported architecture for Pytorch: {self.architecture}") + raise BuildError(f"Unsupported architecture for Pytorch: {self.architecture}") def _choose_pt_variant( @@ -921,7 +926,7 @@ def _choose_pt_variant( class _TFArchive(_WebTGZ, _RAIBuildDependency): os_: OperatingSystem architecture: Architecture - device: TDeviceStr + device: Device version: str @staticmethod @@ -937,7 +942,7 @@ def url(self) -> str: tf_arch = "x86_64" else: raise BuildError( - "Unexpected Architecture for TF Archive: {self.architecture}" + f"Unexpected Architecture for TF Archive: {self.architecture}" ) if self.os_ == OperatingSystem.LINUX: @@ -945,14 +950,14 @@ def url(self) -> str: tf_device = self.device elif self.os_ == OperatingSystem.DARWIN: tf_os = "darwin" - if self.device == "gpu": + if self.device == Device.GPU: raise BuildError("RedisAI does not currently support GPU on Macos") - tf_device = "cpu" + tf_device = Device.CPU else: - raise BuildError("Unexpected OS for TF Archive: {self.os_}") + raise BuildError(f"Unexpected OS for TF Archive: {self.os_}") return ( "https://storage.googleapis.com/tensorflow/libtensorflow/" - f"libtensorflow-{tf_device}-{tf_os}-{tf_arch}-{self.version}.tar.gz" + f"libtensorflow-{tf_device.value}-{tf_os}-{tf_arch}-{self.version}.tar.gz" ) @property @@ -970,7 +975,7 @@ def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path: @dataclass(frozen=True) class _ORTArchive(_WebTGZ, _RAIBuildDependency): os_: OperatingSystem - device: TDeviceStr + device: Device version: str @staticmethod @@ -989,15 +994,15 @@ def url(self) -> str: if self.os_ == OperatingSystem.LINUX: ort_os = "linux" ort_arch = "x64" - ort_build = "-gpu" if self.device == "gpu" else "" + ort_build = "-gpu" if self.device == Device.GPU else "" elif self.os_ == OperatingSystem.DARWIN: ort_os = "osx" ort_arch = "x86_64" ort_build = "" - if self.device == "gpu": + if self.device == Device.GPU: raise BuildError("RedisAI does not currently support GPU on Macos") else: - raise BuildError("Unexpected OS for TF Archive: {self.os_}") + raise BuildError(f"Unexpected OS for TF Archive: {self.os_}") ort_archive = f"onnxruntime-{ort_os}-{ort_arch}{ort_build}-{self.version}.tgz" return f"{ort_url_base}/{ort_archive}" diff --git a/smartsim/entity/dbobject.py b/smartsim/entity/dbobject.py index 0a495f066..ff18da1cd 100644 --- a/smartsim/entity/dbobject.py +++ b/smartsim/entity/dbobject.py @@ -27,6 +27,7 @@ import typing as t from pathlib import Path +from .._core._install.builder import Device from .._core.utils import init_default from ..error import SSUnsupportedError @@ -46,7 +47,7 @@ def __init__( name: str, func: t.Optional[_DBObjectFuncT], file_path: t.Optional[str], - device: t.Literal["CPU", "GPU"], + device: str, devices_per_node: int, first_device: int, ) -> None: @@ -103,9 +104,9 @@ def _check_filepath(file: str) -> Path: return file_path @staticmethod - def _check_device(device: t.Literal["CPU", "GPU"]) -> str: - device = t.cast(t.Literal["CPU", "GPU"], device.upper()) - if not device.startswith("CPU") and not device.startswith("GPU"): + def _check_device(device: str) -> str: + valid_devices = [Device.CPU.value, Device.GPU.value] + if not any(device.lower().startswith(dev) for dev in valid_devices): raise ValueError("Device argument must start with either CPU or GPU") return device @@ -130,16 +131,16 @@ def _enumerate_devices(self) -> t.List[str]: @staticmethod def _check_devices( - device: t.Literal["CPU", "GPU"], + device: str, devices_per_node: int, first_device: int, ) -> None: - if device == "CPU" and devices_per_node > 1: + if device.lower() == Device.CPU.value and devices_per_node > 1: raise SSUnsupportedError( "Cannot set devices_per_node>1 if CPU is specified under devices" ) - if device == "CPU" and first_device > 0: + if device.lower() == Device.CPU.value and first_device > 0: raise SSUnsupportedError( "Cannot set first_device>0 if CPU is specified under devices" ) @@ -160,7 +161,7 @@ def __init__( name: str, script: t.Optional[str] = None, script_path: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, ): @@ -222,7 +223,7 @@ def __init__( backend: str, model: t.Optional[bytes] = None, model_file: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, batch_size: int = 0, diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py index e9aea5767..c04681149 100644 --- a/smartsim/entity/ensemble.py +++ b/smartsim/entity/ensemble.py @@ -30,6 +30,7 @@ from tabulate import tabulate +from .._core._install.builder import Device from .._core.utils.helpers import init_default from ..error import ( EntityExistsError, @@ -356,7 +357,7 @@ def add_ml_model( backend: str, model: t.Optional[bytes] = None, model_path: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, batch_size: int = 0, @@ -440,7 +441,7 @@ def add_script( name: str, script: t.Optional[str] = None, script_path: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, ) -> None: @@ -500,7 +501,7 @@ def add_function( self, name: str, function: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, ) -> None: diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py index 3c51cea85..4a2d9b5f5 100644 --- a/smartsim/entity/model.py +++ b/smartsim/entity/model.py @@ -33,6 +33,7 @@ import warnings from os import path as osp +from .._core._install.builder import Device from .._core.utils.helpers import cat_arg_and_value, init_default from ..error import EntityExistsError, SSUnsupportedError from ..log import get_logger @@ -482,7 +483,7 @@ def add_ml_model( backend: str, model: t.Optional[bytes] = None, model_path: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, batch_size: int = 0, @@ -554,7 +555,7 @@ def add_script( name: str, script: t.Optional[str] = None, script_path: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, ) -> None: @@ -606,7 +607,7 @@ def add_function( self, name: str, function: t.Optional[str] = None, - device: t.Literal["CPU", "GPU"] = "CPU", + device: str = Device.CPU.value.upper(), devices_per_node: int = 1, first_device: int = 0, ) -> None: diff --git a/tests/backends/test_cli_mini_exp.py b/tests/backends/test_cli_mini_exp.py index f02f44270..f7563fc96 100644 --- a/tests/backends/test_cli_mini_exp.py +++ b/tests/backends/test_cli_mini_exp.py @@ -31,6 +31,7 @@ import smartredis import smartsim._core._cli.validate +import smartsim._core._install.builder as build from smartsim._core.utils.helpers import installed_redisai_backends sklearn_available = True @@ -75,7 +76,7 @@ def _mock_make_managed_local_orc(*a, **kw): location=test_dir, port=db_port, # Always test on CPU, heads don't always have GPU - device="CPU", + device=build.Device.CPU, # Test the backends the dev has installed with_tf="tensorflow" in backends, with_pt="torch" in backends, diff --git a/tests/install/test_builder.py b/tests/install/test_builder.py index 5e6c8e597..c69a083d1 100644 --- a/tests/install/test_builder.py +++ b/tests/install/test_builder.py @@ -41,7 +41,9 @@ RAI_VERSIONS = RedisAIVersion("1.2.7") -for_each_device = pytest.mark.parametrize("device", ["cpu", "gpu"]) +for_each_device = pytest.mark.parametrize( + "device", [build.Device.CPU, build.Device.GPU] +) _toggle_build_optional_backend = lambda backend: pytest.mark.parametrize( f"build_{backend}", @@ -163,7 +165,7 @@ def test_rai_builder_will_add_dep_if_backend_requested_wo_duplicates( rai_builder = build.RedisAIBuilder( build_tf=build_tf, build_torch=build_pt, build_onnx=build_ort ) - requested_backends = rai_builder._get_deps_to_fetch_for(device) + requested_backends = rai_builder._get_deps_to_fetch_for(build.Device(device)) assert dlpack_dep_presence(requested_backends) assert tf_dep_presence(build_tf, requested_backends) assert pt_dep_presence(build_pt, requested_backends) @@ -212,7 +214,7 @@ def test_rai_builder_raises_if_it_fetches_an_unexpected_number_of_ml_deps( build.BuildError, match=r"Expected to place \d+ dependencies, but only found \d+", ): - rai_builder._fetch_deps_for("cpu") + rai_builder._fetch_deps_for(build.Device.CPU) def test_threaded_map(): @@ -251,18 +253,24 @@ def test_PTArchiveMacOSX_url(): arch = build.Architecture.X64 pt_version = RAI_VERSIONS.torch - pt_linux_cpu = build._PTArchiveLinux(build.Architecture.X64, "cpu", pt_version) + pt_linux_cpu = build._PTArchiveLinux( + build.Architecture.X64, build.Device.CPU, pt_version + ) x64_prefix = "https://download.pytorch.org/libtorch/" assert x64_prefix in pt_linux_cpu.url - pt_macosx_cpu = build._PTArchiveMacOSX(build.Architecture.ARM64, "cpu", pt_version) + pt_macosx_cpu = build._PTArchiveMacOSX( + build.Architecture.ARM64, build.Device.CPU, pt_version + ) arm64_prefix = "https://github.com/CrayLabs/ml_lib_builder/releases/download/" assert arm64_prefix in pt_macosx_cpu.url def test_PTArchiveMacOSX_gpu_error(): with pytest.raises(build.BuildError, match="support GPU on Mac OSX"): - build._PTArchiveMacOSX(build.Architecture.ARM64, "gpu", RAI_VERSIONS.torch).url + build._PTArchiveMacOSX( + build.Architecture.ARM64, build.Device.GPU, RAI_VERSIONS.torch + ).url def test_valid_platforms():