From fa0da2ca8d18eda25d3c88941a77ee66818da6fc Mon Sep 17 00:00:00 2001
From: amandarichardsonn <30413257+amandarichardsonn@users.noreply.github.com>
Date: Thu, 28 Mar 2024 15:30:23 -0700
Subject: [PATCH] Promote Build Device Option to Enum (#527)

Removing instances of ["CPU","GPU"] with a `Device` Enum.

[ reviewed by @MattToast ]
[ committed by @amandarichardsonn ]
---
 doc/changelog.rst                   |  3 ++
 smartsim/_core/_cli/build.py        | 33 +++++++--------
 smartsim/_core/_cli/validate.py     | 33 ++++++++-------
 smartsim/_core/_install/builder.py  | 65 ++++++++++++++++-------------
 smartsim/entity/dbobject.py         | 19 +++++----
 smartsim/entity/ensemble.py         |  7 ++--
 smartsim/entity/model.py            |  7 ++--
 tests/backends/test_cli_mini_exp.py |  3 +-
 tests/install/test_builder.py       | 20 ++++++---
 9 files changed, 105 insertions(+), 85 deletions(-)

diff --git a/doc/changelog.rst b/doc/changelog.rst
index 065b6623d..2cebc120e 100644
--- a/doc/changelog.rst
+++ b/doc/changelog.rst
@@ -18,6 +18,7 @@ To be released at some future point in time
 
 Description
 
+- Promote device options to an Enum
 - Update telemetry monitor, add telemetry collectors
 - Add method to specify node features for a Slurm job
 - Colo Orchestrator setup now blocks application start until setup finished
@@ -33,6 +34,7 @@ Description
 
 Detailed Notes
 
+- Promote devices to a dedicated Enum type throughout the SmartSim code base.
 - Update the telemetry monitor to enable retrieval of metrics on a scheduled
   interval. Switch basic experiment tracking telemetry to default to on. Add
   database metric collectors. Improve telemetry monitor logging. Create
@@ -70,6 +72,7 @@ Detailed Notes
 - Remove previously deprecated behavior present in test suite on machines with
   Slurm and Open MPI. (SmartSim-PR520_)
 
+.. _SmartSim-PR498: https://github.com/CrayLabs/SmartSim/pull/498
 .. _SmartSim-PR460: https://github.com/CrayLabs/SmartSim/pull/460
 .. _SmartSim-PR512: https://github.com/CrayLabs/SmartSim/pull/512
 .. _SmartSim-PR529: https://github.com/CrayLabs/SmartSim/pull/529
diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py
index b2ff61a99..08a1a6138 100644
--- a/smartsim/_core/_cli/build.py
+++ b/smartsim/_core/_cli/build.py
@@ -43,7 +43,7 @@
     VersionConflictError,
     Versioner,
 )
-from smartsim._core._install.builder import BuildError
+from smartsim._core._install.builder import BuildError, Device
 from smartsim._core.config import CONFIG
 from smartsim._core.utils.helpers import installed_redisai_backends
 from smartsim.error import SSConfigError
@@ -54,8 +54,6 @@
 # NOTE: all smartsim modules need full paths as the smart cli
 #       may be installed into a different directory.
 
-
-_TDeviceStr = t.Literal["cpu", "gpu"]
 _TPinningStr = t.Literal["==", "!=", ">=", ">", "<=", "<", "~="]
 
 
@@ -134,7 +132,7 @@ def build_database(
 def build_redis_ai(
     build_env: BuildEnv,
     versions: Versioner,
-    device: _TDeviceStr,
+    device: Device,
     use_torch: bool = True,
     use_tf: bool = True,
     use_onnx: bool = False,
@@ -143,7 +141,7 @@ def build_redis_ai(
     verbose: bool = False,
 ) -> None:
     # make sure user isn't trying to do something silly on MacOS
-    if build_env.PLATFORM == "darwin" and device == "gpu":
+    if build_env.PLATFORM == "darwin" and device == Device.GPU:
         raise BuildError("SmartSim does not support GPU on MacOS")
 
     # decide which runtimes to build
@@ -154,7 +152,7 @@ def build_redis_ai(
         ["ONNX", versions.ONNX, color_bool(use_onnx)],
     ]
     print(tabulate(backends_table, tablefmt="fancy_outline"), end="\n\n")
-    print(f"Building for GPU support: {color_bool(device == 'gpu')}\n")
+    print(f"Building for GPU support: {color_bool(device == Device.GPU)}\n")
 
     if not check_backends_install():
         sys.exit(1)
@@ -195,7 +193,7 @@ def build_redis_ai(
     else:
         # get the build environment, update with CUDNN env vars
         # if present and building for GPU, otherwise warn the user
-        if device == "gpu":
+        if device == Device.GPU:
             gpu_env = build_env.get_cudnn_env()
             cudnn_env_vars = [
                 "CUDNN_LIBRARY",
@@ -226,18 +224,16 @@ def build_redis_ai(
         logger.info("ML Backends and RedisAI build complete!")
 
 
-def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu") -> None:
+def check_py_torch_version(versions: Versioner, device: Device = Device.CPU) -> None:
     """Check Python environment for TensorFlow installation"""
-
-    device = device_in.lower()
     if BuildEnv.is_macos():
-        if device == "gpu":
+        if device == Device.GPU:
             raise BuildError("SmartSim does not support GPU on MacOS")
         device_suffix = ""
     else:  # linux
-        if device == "cpu":
+        if device == Device.CPU:
             device_suffix = versions.TORCH_CPU_SUFFIX
-        elif device == "gpu":
+        elif device == Device.GPU:
             device_suffix = versions.TORCH_CUDA_SUFFIX
         else:
             raise BuildError("Unrecognized device requested")
@@ -261,7 +257,9 @@ def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu")
             "Torch version not found in python environment. "
             "Attempting to install via `pip`"
         )
-        wheel_device = device if device == "cpu" else device_suffix.replace("+", "")
+        wheel_device = (
+            device.value if device == Device.CPU else device_suffix.replace("+", "")
+        )
         pip(
             "install",
             "--extra-index-url",
@@ -363,8 +361,7 @@ def execute(
 ) -> int:
     verbose = args.v
     keydb = args.keydb
-    device: _TDeviceStr = args.device
-
+    device = Device(args.device.lower())
     # torch and tf build by default
     pt = not args.no_pt  # pylint: disable=invalid-name
     tf = not args.no_tf  # pylint: disable=invalid-name
@@ -453,8 +450,8 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--device",
         type=str.lower,
-        default="cpu",
-        choices=["cpu", "gpu"],
+        default=Device.CPU.value,
+        choices=[device.value for device in Device],
         help="Device to build ML runtimes for",
     )
     parser.add_argument(
diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py
index 0606a7735..8c4cb3c8c 100644
--- a/smartsim/_core/_cli/validate.py
+++ b/smartsim/_core/_cli/validate.py
@@ -40,6 +40,7 @@
 
 from smartsim import Experiment
 from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
+from smartsim._core._install.builder import Device
 from smartsim._core.utils.helpers import installed_redisai_backends
 from smartsim.log import get_logger
 
@@ -61,9 +62,6 @@
     _TemporaryDirectory = tempfile.TemporaryDirectory
 
 
-_TCapitalDeviceStr = t.Literal["CPU", "GPU"]
-
-
 class _VerificationTempDir(_TemporaryDirectory):
     """A Temporary directory to be used as a context manager that will only
     clean itself up if no error is raised within its context
@@ -88,7 +86,7 @@ def execute(
     simple experiment
     """
     backends = installed_redisai_backends()
-    device: _TCapitalDeviceStr = args.device.upper()
+    device: Device = Device(args.device)
     try:
         with contextlib.ExitStack() as ctx:
             temp_dir = ctx.enter_context(_VerificationTempDir(dir=os.getcwd()))
@@ -98,7 +96,7 @@ def execute(
                     "SR_LOG_FILE", os.path.join(temp_dir, "smartredis.log")
                 ),
             }
-            if device == "GPU":
+            if device == Device.GPU:
                 validate_env["CUDA_VISIBLE_DEVICES"] = "0"
             ctx.enter_context(_env_vars_set_to(validate_env))
             test_install(
@@ -136,8 +134,8 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--device",
         type=str.lower,
-        default="cpu",
-        choices=["cpu", "gpu"],
+        default=Device.CPU.value,
+        choices=[device.value for device in Device],
         help="Device to test the ML backends against",
     )
 
@@ -145,7 +143,7 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
 def test_install(
     location: str,
     port: t.Optional[int],
-    device: _TCapitalDeviceStr,
+    device: Device,
     with_tf: bool,
     with_pt: bool,
     with_onnx: bool,
@@ -214,7 +212,7 @@ def _find_free_port() -> int:
         return int(port)
 
 
-def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -> None:
+def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None:
     recv_conn, send_conn = mp.Pipe(duplex=False)
     # Build the model in a subproc so that keras does not hog the gpu
     proc = mp.Process(target=_build_tf_frozen_model, args=(send_conn, tmp_dir))
@@ -236,7 +234,12 @@ def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -
         ) from e
 
     client.set_model_from_file(
-        "keras-fcn", model_path, "TF", device=device, inputs=inputs, outputs=outputs
+        "keras-fcn",
+        model_path,
+        "TF",
+        device=device.value.upper(),
+        inputs=inputs,
+        outputs=outputs,
     )
     client.put_tensor("keras-input", np.random.rand(1, 28, 28).astype(np.float32))
     client.run_model("keras-fcn", inputs=["keras-input"], outputs=["keras-output"])
@@ -264,7 +267,7 @@ def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None:
     conn.send((model_path, inputs, outputs))
 
 
-def _test_torch_install(client: Client, device: _TCapitalDeviceStr) -> None:
+def _test_torch_install(client: Client, device: Device) -> None:
     import torch
     from torch import nn
 
@@ -276,7 +279,7 @@ def __init__(self) -> None:
         def forward(self, x: torch.Tensor) -> torch.Tensor:
             return self.conv(x)
 
-    if device == "GPU":
+    if device == Device.GPU:
         device_ = torch.device("cuda")
     else:
         device_ = torch.device("cpu")
@@ -292,13 +295,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
     torch.jit.save(traced, buffer)  # type: ignore[no-untyped-call]
     model = buffer.getvalue()
 
-    client.set_model("torch-nn", model, backend="TORCH", device=device)
+    client.set_model("torch-nn", model, backend="TORCH", device=device.value.upper())
     client.put_tensor("torch-in", torch.rand(1, 1, 3, 3).numpy())
     client.run_model("torch-nn", inputs=["torch-in"], outputs=["torch-out"])
     client.get_tensor("torch-out")
 
 
-def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None:
+def _test_onnx_install(client: Client, device: Device) -> None:
     from skl2onnx import to_onnx
     from sklearn.cluster import KMeans
 
@@ -311,7 +314,7 @@ def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None:
     sample = np.arange(20, dtype=np.float32).reshape(10, 2)
 
     client.put_tensor("onnx-input", sample)
-    client.set_model("onnx-kmeans", model, "ONNX", device=device)
+    client.set_model("onnx-kmeans", model, "ONNX", device=device.value.upper())
     client.run_model(
         "onnx-kmeans", inputs=["onnx-input"], outputs=["onnx-labels", "onnx-transform"]
     )
diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py
index c098cfd01..47f12d044 100644
--- a/smartsim/_core/_install/builder.py
+++ b/smartsim/_core/_install/builder.py
@@ -53,7 +53,7 @@
 # TODO: check cmake version and use system if possible to avoid conflicts
 
 TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"]
-TDeviceStr = t.Literal["cpu", "gpu"]
+
 
 _T = t.TypeVar("_T")
 _U = t.TypeVar("_U")
@@ -96,6 +96,11 @@ def from_str(cls, string: str, /) -> "Architecture":
         raise BuildError(f"Unrecognized or unsupported architecture: {string}")
 
 
+class Device(enum.Enum):
+    CPU = "cpu"
+    GPU = "gpu"
+
+
 class OperatingSystem(enum.Enum):
     LINUX = ("linux", "linux2")
     DARWIN = ("darwin",)
@@ -173,7 +178,7 @@ def is_built(self) -> bool:
         raise NotImplementedError
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         raise NotImplementedError
 
@@ -274,7 +279,7 @@ def is_built(self) -> bool:
         return redis_files.issubset(bin_files) or keydb_files.issubset(bin_files)
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         """Build Redis from git
         :param git_url: url from which to retrieve Redis
@@ -480,7 +485,7 @@ def build_onnx(self) -> bool:
     def fetch_onnx(self) -> bool:
         return self.build_onnx
 
-    def get_deps_dir_path_for(self, device: TDeviceStr) -> Path:
+    def get_deps_dir_path_for(self, device: Device) -> Path:
         def fail_to_format(reason: str) -> BuildError:  # pragma: no cover
             return BuildError(f"Failed to format RedisAI dependency path: {reason}")
 
@@ -497,10 +502,10 @@ def fail_to_format(reason: str) -> BuildError:  # pragma: no cover
             arch = "arm64v8"
         else:  # pragma: no cover
             raise fail_to_format(f"Unknown architecture: {architecture}")
-        return self.rai_build_path / f"deps/{os_}-{arch}-{device}"
+        return self.rai_build_path / f"deps/{os_}-{arch}-{device.value}"
 
     def _get_deps_to_fetch_for(
-        self, device: TDeviceStr
+        self, device: Device
     ) -> t.Tuple[_RAIBuildDependency, ...]:
         os_, arch = self._platform
         # TODO: It would be nice if the backend version numbers were declared
@@ -521,14 +526,14 @@ def _get_deps_to_fetch_for(
 
         return tuple(fetchable_deps)
 
-    def symlink_libtf(self, device: str) -> None:
+    def symlink_libtf(self, device: Device) -> None:
         """Add symbolic link to available libtensorflow in RedisAI deps.
 
         :param device: cpu or gpu
         :type device: str
         """
         rai_deps_path = sorted(
-            self.rai_build_path.glob(os.path.join("deps", f"*{device}*"))
+            self.rai_build_path.glob(os.path.join("deps", f"*{device.value}*"))
         )
         if not rai_deps_path:
             raise FileNotFoundError("Could not find RedisAI 'deps' directory")
@@ -577,7 +582,7 @@ def symlink_libtf(self, device: str) -> None:
                 os.symlink(src_file, dst_file)
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         """Build RedisAI from git
 
@@ -616,14 +621,14 @@ def build_from_git(
         self.run_command(clone_cmd, out=subprocess.DEVNULL, cwd=self.build_dir)
         self._fetch_deps_for(device)
 
-        if self.libtf_dir and device:
+        if self.libtf_dir and device.value:
             self.symlink_libtf(device)
 
         build_cmd = self._rai_build_env_prefix(
             with_pt=self.build_torch,
             with_tf=self.build_tf,
             with_ort=self.build_onnx,
-            extra_env={"GPU": "1" if device == "gpu" else "0"},
+            extra_env={"GPU": "1" if device == Device.GPU else "0"},
         )
 
         if self.torch_dir:
@@ -674,7 +679,7 @@ def _rai_build_env_prefix(
             *(f"{key}={val}" for key, val in extra_env.items()),
         ]
 
-    def _fetch_deps_for(self, device: TDeviceStr) -> None:
+    def _fetch_deps_for(self, device: Device) -> None:
         if not self.rai_build_path.is_dir():
             raise BuildError("RedisAI build directory not found")
 
@@ -693,13 +698,13 @@ def _fetch_deps_for(self, device: TDeviceStr) -> None:
                 f"found {len(unique_placed_paths)}"
             )
 
-    def _install_backends(self, device: str) -> None:
+    def _install_backends(self, device: Device) -> None:
         """Move backend libraries to smartsim/_core/lib/
         :param device: cpu or cpu
         :type device: str
         """
         self.rai_install_path = self.rai_build_path.joinpath(
-            f"install-{device}"
+            f"install-{device.value}"
         ).resolve()
         rai_lib = self.rai_install_path / "redisai.so"
         rai_backends = self.rai_install_path / "backends"
@@ -833,7 +838,7 @@ def _extract_download(
 @dataclass(frozen=True)
 class _PTArchive(_WebZip, _RAIBuildDependency):
     architecture: Architecture
-    device: TDeviceStr
+    device: Device
     version: str
 
     @staticmethod
@@ -865,10 +870,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
 
     @property
     def url(self) -> str:
-        if self.device == "gpu":
+        if self.device == Device.GPU:
             pt_build = "cu117"
         else:
-            pt_build = "cpu"
+            pt_build = Device.CPU.value
         # pylint: disable-next=line-too-long
         libtorch_archive = (
             f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip"
@@ -887,10 +892,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
 
     @property
     def url(self) -> str:
-        if self.device == "gpu":
+        if self.device == Device.GPU:
             raise BuildError("RedisAI does not currently support GPU on Mac OSX")
         if self.architecture == Architecture.X64:
-            pt_build = "cpu"
+            pt_build = Device.CPU.value
             libtorch_archive = f"libtorch-macos-{self.version}.zip"
             root_url = "https://download.pytorch.org/libtorch"
             return f"{root_url}/{pt_build}/{libtorch_archive}"
@@ -902,7 +907,7 @@ def url(self) -> str:
             )
             return f"{root_url}/{libtorch_archive}"
 
-        raise BuildError("Unsupported architecture for Pytorch: {self.architecture}")
+        raise BuildError(f"Unsupported architecture for Pytorch: {self.architecture}")
 
 
 def _choose_pt_variant(
@@ -921,7 +926,7 @@ def _choose_pt_variant(
 class _TFArchive(_WebTGZ, _RAIBuildDependency):
     os_: OperatingSystem
     architecture: Architecture
-    device: TDeviceStr
+    device: Device
     version: str
 
     @staticmethod
@@ -937,7 +942,7 @@ def url(self) -> str:
             tf_arch = "x86_64"
         else:
             raise BuildError(
-                "Unexpected Architecture for TF Archive: {self.architecture}"
+                f"Unexpected Architecture for TF Archive: {self.architecture}"
             )
 
         if self.os_ == OperatingSystem.LINUX:
@@ -945,14 +950,14 @@ def url(self) -> str:
             tf_device = self.device
         elif self.os_ == OperatingSystem.DARWIN:
             tf_os = "darwin"
-            if self.device == "gpu":
+            if self.device == Device.GPU:
                 raise BuildError("RedisAI does not currently support GPU on Macos")
-            tf_device = "cpu"
+            tf_device = Device.CPU
         else:
-            raise BuildError("Unexpected OS for TF Archive: {self.os_}")
+            raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
         return (
             "https://storage.googleapis.com/tensorflow/libtensorflow/"
-            f"libtensorflow-{tf_device}-{tf_os}-{tf_arch}-{self.version}.tar.gz"
+            f"libtensorflow-{tf_device.value}-{tf_os}-{tf_arch}-{self.version}.tar.gz"
         )
 
     @property
@@ -970,7 +975,7 @@ def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
 @dataclass(frozen=True)
 class _ORTArchive(_WebTGZ, _RAIBuildDependency):
     os_: OperatingSystem
-    device: TDeviceStr
+    device: Device
     version: str
 
     @staticmethod
@@ -989,15 +994,15 @@ def url(self) -> str:
         if self.os_ == OperatingSystem.LINUX:
             ort_os = "linux"
             ort_arch = "x64"
-            ort_build = "-gpu" if self.device == "gpu" else ""
+            ort_build = "-gpu" if self.device == Device.GPU else ""
         elif self.os_ == OperatingSystem.DARWIN:
             ort_os = "osx"
             ort_arch = "x86_64"
             ort_build = ""
-            if self.device == "gpu":
+            if self.device == Device.GPU:
                 raise BuildError("RedisAI does not currently support GPU on Macos")
         else:
-            raise BuildError("Unexpected OS for TF Archive: {self.os_}")
+            raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
         ort_archive = f"onnxruntime-{ort_os}-{ort_arch}{ort_build}-{self.version}.tgz"
         return f"{ort_url_base}/{ort_archive}"
 
diff --git a/smartsim/entity/dbobject.py b/smartsim/entity/dbobject.py
index 0a495f066..ff18da1cd 100644
--- a/smartsim/entity/dbobject.py
+++ b/smartsim/entity/dbobject.py
@@ -27,6 +27,7 @@
 import typing as t
 from pathlib import Path
 
+from .._core._install.builder import Device
 from .._core.utils import init_default
 from ..error import SSUnsupportedError
 
@@ -46,7 +47,7 @@ def __init__(
         name: str,
         func: t.Optional[_DBObjectFuncT],
         file_path: t.Optional[str],
-        device: t.Literal["CPU", "GPU"],
+        device: str,
         devices_per_node: int,
         first_device: int,
     ) -> None:
@@ -103,9 +104,9 @@ def _check_filepath(file: str) -> Path:
         return file_path
 
     @staticmethod
-    def _check_device(device: t.Literal["CPU", "GPU"]) -> str:
-        device = t.cast(t.Literal["CPU", "GPU"], device.upper())
-        if not device.startswith("CPU") and not device.startswith("GPU"):
+    def _check_device(device: str) -> str:
+        valid_devices = [Device.CPU.value, Device.GPU.value]
+        if not any(device.lower().startswith(dev) for dev in valid_devices):
             raise ValueError("Device argument must start with either CPU or GPU")
         return device
 
@@ -130,16 +131,16 @@ def _enumerate_devices(self) -> t.List[str]:
 
     @staticmethod
     def _check_devices(
-        device: t.Literal["CPU", "GPU"],
+        device: str,
         devices_per_node: int,
         first_device: int,
     ) -> None:
-        if device == "CPU" and devices_per_node > 1:
+        if device.lower() == Device.CPU.value and devices_per_node > 1:
             raise SSUnsupportedError(
                 "Cannot set devices_per_node>1 if CPU is specified under devices"
             )
 
-        if device == "CPU" and first_device > 0:
+        if device.lower() == Device.CPU.value and first_device > 0:
             raise SSUnsupportedError(
                 "Cannot set first_device>0 if CPU is specified under devices"
             )
@@ -160,7 +161,7 @@ def __init__(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ):
@@ -222,7 +223,7 @@ def __init__(
         backend: str,
         model: t.Optional[bytes] = None,
         model_file: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py
index e9aea5767..c04681149 100644
--- a/smartsim/entity/ensemble.py
+++ b/smartsim/entity/ensemble.py
@@ -30,6 +30,7 @@
 
 from tabulate import tabulate
 
+from .._core._install.builder import Device
 from .._core.utils.helpers import init_default
 from ..error import (
     EntityExistsError,
@@ -356,7 +357,7 @@ def add_ml_model(
         backend: str,
         model: t.Optional[bytes] = None,
         model_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
@@ -440,7 +441,7 @@ def add_script(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -500,7 +501,7 @@ def add_function(
         self,
         name: str,
         function: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py
index 3c51cea85..4a2d9b5f5 100644
--- a/smartsim/entity/model.py
+++ b/smartsim/entity/model.py
@@ -33,6 +33,7 @@
 import warnings
 from os import path as osp
 
+from .._core._install.builder import Device
 from .._core.utils.helpers import cat_arg_and_value, init_default
 from ..error import EntityExistsError, SSUnsupportedError
 from ..log import get_logger
@@ -482,7 +483,7 @@ def add_ml_model(
         backend: str,
         model: t.Optional[bytes] = None,
         model_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
@@ -554,7 +555,7 @@ def add_script(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -606,7 +607,7 @@ def add_function(
         self,
         name: str,
         function: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
diff --git a/tests/backends/test_cli_mini_exp.py b/tests/backends/test_cli_mini_exp.py
index f02f44270..f7563fc96 100644
--- a/tests/backends/test_cli_mini_exp.py
+++ b/tests/backends/test_cli_mini_exp.py
@@ -31,6 +31,7 @@
 import smartredis
 
 import smartsim._core._cli.validate
+import smartsim._core._install.builder as build
 from smartsim._core.utils.helpers import installed_redisai_backends
 
 sklearn_available = True
@@ -75,7 +76,7 @@ def _mock_make_managed_local_orc(*a, **kw):
         location=test_dir,
         port=db_port,
         # Always test on CPU, heads don't always have GPU
-        device="CPU",
+        device=build.Device.CPU,
         # Test the backends the dev has installed
         with_tf="tensorflow" in backends,
         with_pt="torch" in backends,
diff --git a/tests/install/test_builder.py b/tests/install/test_builder.py
index 5e6c8e597..c69a083d1 100644
--- a/tests/install/test_builder.py
+++ b/tests/install/test_builder.py
@@ -41,7 +41,9 @@
 
 RAI_VERSIONS = RedisAIVersion("1.2.7")
 
-for_each_device = pytest.mark.parametrize("device", ["cpu", "gpu"])
+for_each_device = pytest.mark.parametrize(
+    "device", [build.Device.CPU, build.Device.GPU]
+)
 
 _toggle_build_optional_backend = lambda backend: pytest.mark.parametrize(
     f"build_{backend}",
@@ -163,7 +165,7 @@ def test_rai_builder_will_add_dep_if_backend_requested_wo_duplicates(
     rai_builder = build.RedisAIBuilder(
         build_tf=build_tf, build_torch=build_pt, build_onnx=build_ort
     )
-    requested_backends = rai_builder._get_deps_to_fetch_for(device)
+    requested_backends = rai_builder._get_deps_to_fetch_for(build.Device(device))
     assert dlpack_dep_presence(requested_backends)
     assert tf_dep_presence(build_tf, requested_backends)
     assert pt_dep_presence(build_pt, requested_backends)
@@ -212,7 +214,7 @@ def test_rai_builder_raises_if_it_fetches_an_unexpected_number_of_ml_deps(
         build.BuildError,
         match=r"Expected to place \d+ dependencies, but only found \d+",
     ):
-        rai_builder._fetch_deps_for("cpu")
+        rai_builder._fetch_deps_for(build.Device.CPU)
 
 
 def test_threaded_map():
@@ -251,18 +253,24 @@ def test_PTArchiveMacOSX_url():
     arch = build.Architecture.X64
     pt_version = RAI_VERSIONS.torch
 
-    pt_linux_cpu = build._PTArchiveLinux(build.Architecture.X64, "cpu", pt_version)
+    pt_linux_cpu = build._PTArchiveLinux(
+        build.Architecture.X64, build.Device.CPU, pt_version
+    )
     x64_prefix = "https://download.pytorch.org/libtorch/"
     assert x64_prefix in pt_linux_cpu.url
 
-    pt_macosx_cpu = build._PTArchiveMacOSX(build.Architecture.ARM64, "cpu", pt_version)
+    pt_macosx_cpu = build._PTArchiveMacOSX(
+        build.Architecture.ARM64, build.Device.CPU, pt_version
+    )
     arm64_prefix = "https://github.com/CrayLabs/ml_lib_builder/releases/download/"
     assert arm64_prefix in pt_macosx_cpu.url
 
 
 def test_PTArchiveMacOSX_gpu_error():
     with pytest.raises(build.BuildError, match="support GPU on Mac OSX"):
-        build._PTArchiveMacOSX(build.Architecture.ARM64, "gpu", RAI_VERSIONS.torch).url
+        build._PTArchiveMacOSX(
+            build.Architecture.ARM64, build.Device.GPU, RAI_VERSIONS.torch
+        ).url
 
 
 def test_valid_platforms():