CrayLabs · mellis13 · Oct 5, 2022 · Oct 4, 2022 · Oct 4, 2022 · Oct 4, 2022
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -11,6 +11,26 @@ Jump to :ref:`SmartRedis Changelog <changelog>`
 SmartSim
 ========
 
+Development branch
+------------------
+
+To be released at some future date
+
+Note
+
+This section details changes made in the development branch that have not yet been applied to a released version of the SmartSim library.
+
+Description
+
+- Fix bug in colocated database entrypoint when loading PyTorch models
+
+Detailed Notes
+
+- Fix bug in colocated database entrypoint stemming from uninitialized variables.  This bug affects PyTorch models being loaded into the database. (PR237_)
+
+.. _PR237: https://github.com/CrayLabs/SmartSim/pull/237
+
+
 0.4.1
 -----
 

diff --git a/smartsim/_core/entrypoints/colocated.py b/smartsim/_core/entrypoints/colocated.py
@@ -81,6 +81,9 @@ def launch_db_model(client: Client, db_model: List[str]):
     parser.add_argument("--min_batch_timeout", type=int, default=None)
     args = parser.parse_args(db_model)
 
+    inputs = None
+    outputs = None
+
     if args.inputs:
         inputs = list(args.inputs)
     if args.outputs:

diff --git a/tests/backends/test_dbmodel.py b/tests/backends/test_dbmodel.py
@@ -7,18 +7,27 @@
 from smartsim._core.utils import installed_redisai_backends
 from smartsim.error.errors import SSUnsupportedError
 
-should_run = True
+should_run_tf = True
+should_run_pt = True
 
+# Check TensorFlow is available for tests
 try:
     import tensorflow.keras as keras
     from tensorflow.keras.layers import Conv2D, Input
 except ImportError:
-    should_run = False
+    should_run_tf = False
 
-should_run &= "tensorflow" in installed_redisai_backends()
+should_run_tf &= "tensorflow" in installed_redisai_backends()
 
-if not should_run:
-    pytest.skip("Test needs TF to run", allow_module_level=True)
+# Check if PyTorch is available for tests
+try:
+    import torch
+    import torch.nn as nn
+    import torch.nn.functional as F
+except ImportError:
+    should_run_pt = False
+
+should_run_pt &= "torch" in installed_redisai_backends()
 
 class Net(keras.Model):
     def __init__(self):
@@ -56,15 +65,50 @@ def create_tf_cnn():
 
     return serialize_model(model)
 
-
-def test_db_model(fileutils, wlmutils):
-    """Test DB Models on remote DB"""
-
-    exp_name = "test-db-model"
+# Simple MNIST in PyTorch
+try:
+    class PyTorchNet(nn.Module):
+        def __init__(self):
+            super(PyTorchNet, self).__init__()
+            self.conv1 = nn.Conv2d(1, 32, 3, 1)
+            self.conv2 = nn.Conv2d(32, 64, 3, 1)
+            self.dropout1 = nn.Dropout(0.25)
+            self.dropout2 = nn.Dropout(0.5)
+            self.fc1 = nn.Linear(9216, 128)
+            self.fc2 = nn.Linear(128, 10)
+
+        def forward(self, x):
+            x = self.conv1(x)
+            x = F.relu(x)
+            x = self.conv2(x)
+            x = F.relu(x)
+            x = F.max_pool2d(x, 2)
+            x = self.dropout1(x)
+            x = torch.flatten(x, 1)
+            x = self.fc1(x)
+            x = F.relu(x)
+            x = self.dropout2(x)
+            x = self.fc2(x)
+            output = F.log_softmax(x, dim=1)
+            return output
+except Exception:
+    should_run_pt = False
+
+def save_torch_cnn(path, file_name):
+    n = PyTorchNet()
+    example_forward_input = torch.rand(1, 1, 28, 28)
+    module = torch.jit.trace(n, example_forward_input)
+    torch.jit.save(module, path+"/"+file_name)
+
+@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
+def test_tf_db_model(fileutils, wlmutils):
+    """Test TensorFlow DB Models on remote DB"""
+
+    exp_name = "test-tf-db-model"
 
     # get test setup
     test_dir = fileutils.make_test_dir()
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     exp = Experiment(exp_name, exp_path=test_dir, launcher="local")
     # create colocated model
@@ -110,14 +154,58 @@ def test_db_model(fileutils, wlmutils):
     assert all([stat == status.STATUS_COMPLETED for stat in statuses])
 
 
+@pytest.mark.skipif(not should_run_pt, reason="Test needs PyTorch to run")
+def test_pt_db_model(fileutils, wlmutils):
+    """Test PyTorch DB Models on remote DB"""
+
+    exp_name = "test-pt-db-model"
+
+    # get test setup
+    test_dir = fileutils.make_test_dir()
+    sr_test_script = fileutils.get_test_conf_path("run_pt_dbmodel_smartredis.py")
+
+    exp = Experiment(exp_name, exp_path=test_dir, launcher="local")
+    # create colocated model
+    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)
+
+    smartsim_model = exp.create_model("smartsim_model", run_settings)
+    smartsim_model.set_path(test_dir)
+
+    db = exp.create_database(port=wlmutils.get_test_port(), interface="lo")
+    exp.generate(db)
+
+    save_torch_cnn(test_dir, "model1.pt")
+    model_path = test_dir + "/model1.pt"
+
+    smartsim_model.add_ml_model(
+        "cnn",
+        "TORCH",
+        model_path=model_path,
+        device="CPU",
+        tag="test",
+    )
+
+    for db_model in smartsim_model._db_models:
+        print(db_model)
+
+    # Assert we have added both models
+    assert len(smartsim_model._db_models) == 1
+
+    exp.start(db, smartsim_model, block=True)
+    statuses = exp.get_status(smartsim_model)
+    exp.stop(db)
+    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+
+
+@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 def test_db_model_ensemble(fileutils, wlmutils):
     """Test DBModels on remote DB, with an ensemble"""
 
     exp_name = "test-db-model-ensemble"
 
     # get test setup
     test_dir = fileutils.make_test_dir()
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     exp = Experiment(exp_name, exp_path=test_dir, launcher="local")
     # create colocated model
@@ -174,15 +262,15 @@ def test_db_model_ensemble(fileutils, wlmutils):
     assert all([stat == status.STATUS_COMPLETED for stat in statuses])
 
 
-def test_colocated_db_model(fileutils, wlmutils):
-    """Test DB Models on colocated DB"""
+def test_colocated_db_model_tf(fileutils, wlmutils):
+    """Test DB Models on colocated DB (TensorFlow backend)"""
 
-    exp_name = "test-colocated-db-model"
+    exp_name = "test-colocated-db-model-tf"
     exp = Experiment(exp_name, launcher="local")
 
     # get test setup
     test_dir = fileutils.make_test_dir()
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # create colocated model
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)
@@ -215,8 +303,39 @@ def test_colocated_db_model(fileutils, wlmutils):
     statuses = exp.get_status(colo_model)
     assert all([stat == status.STATUS_COMPLETED for stat in statuses])
 
+@pytest.mark.skipif(not should_run_pt, reason="Test needs PyTorch to run")
+def test_colocated_db_model_pytorch(fileutils, wlmutils):
+    """Test DB Models on colocated DB (PyTorch backend)"""
+
+    exp_name = "test-colocated-db-model-pytorch"
+    exp = Experiment(exp_name, launcher="local")
+
+    # get test setup
+    test_dir = fileutils.make_test_dir()
+    sr_test_script = fileutils.get_test_conf_path("run_pt_dbmodel_smartredis.py")
+
+    # create colocated model
+    colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)
+
+    colo_model = exp.create_model("colocated_model", colo_settings)
+    colo_model.set_path(test_dir)
+    colo_model.colocate_db(
+        port=wlmutils.get_test_port(), db_cpus=1, limit_app_cpus=False, debug=True, ifname="lo"
+    )
+
+    save_torch_cnn(test_dir, "model1.pt")
+    model_file = test_dir + "/model1.pt"
+    colo_model.add_ml_model("cnn", "TORCH", model_path=model_file, device="CPU")
+
+    # Assert we have added both models
+    assert len(colo_model._db_models) == 1
+
+    exp.start(colo_model, block=True)
+    statuses = exp.get_status(colo_model)
+    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+
 
-@pytest.mark.skipif(not should_run, reason="Test needs TF to run")
+@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 def test_colocated_db_model_ensemble(fileutils, wlmutils):
     """Test DBModel on colocated ensembles, first colocating DB,
     then adding DBModel.
@@ -227,7 +346,7 @@ def test_colocated_db_model_ensemble(fileutils, wlmutils):
     # get test setup
     test_dir = fileutils.make_test_dir()
     exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # create colocated model
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)
@@ -294,7 +413,7 @@ def test_colocated_db_model_ensemble(fileutils, wlmutils):
     assert all([stat == status.STATUS_COMPLETED for stat in statuses])
 
 
-@pytest.mark.skipif(not should_run, reason="Test needs TF to run")
+@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 def test_colocated_db_model_ensemble_reordered(fileutils, wlmutils):
     """Test DBModel on colocated ensembles, first adding the DBModel to the
     ensemble, then colocating DB.
@@ -305,7 +424,7 @@ def test_colocated_db_model_ensemble_reordered(fileutils, wlmutils):
     # get test setup
     test_dir = fileutils.make_test_dir()
     exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # create colocated model
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)
@@ -363,7 +482,7 @@ def test_colocated_db_model_ensemble_reordered(fileutils, wlmutils):
     assert all([stat == status.STATUS_COMPLETED for stat in statuses])
 
 
-@pytest.mark.skipif(not should_run, reason="Test needs TF to run")
+@pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
 def test_colocated_db_model_errors(fileutils, wlmutils):
     """Test error when colocated db model has no file."""
 
@@ -372,7 +491,7 @@ def test_colocated_db_model_errors(fileutils, wlmutils):
 
     # get test setup
     test_dir = fileutils.make_test_dir()
-    sr_test_script = fileutils.get_test_conf_path("run_dbmodel_smartredis.py")
+    sr_test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
     # create colocated model
     colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=sr_test_script)

diff --git a/tests/test_configs/run_pt_dbmodel_smartredis.py b/tests/test_configs/run_pt_dbmodel_smartredis.py
@@ -0,0 +1,20 @@
+import numpy as np
+from smartredis import Client
+
+def main():
+    # Address should be set as we are launching through
+    # SmartSim.
+    client = Client(cluster=False)
+
+    array = np.ones((1, 1, 28, 28)).astype(np.single)
+    client.put_tensor("test_array", array)
+    assert client.poll_model("cnn", 500, 30)
+    client.run_model("cnn", ["test_array"], ["test_output"])
+    returned = client.get_tensor("test_output")
+
+    assert returned.shape == (1, 10)
+
+    print(f"Test worked!")
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_configs/run_dbmodel_smartredis.py → ...test_configs/run_tf_dbmodel_smartredis.py b/tests/test_configs/run_dbmodel_smartredis.py → ...test_configs/run_tf_dbmodel_smartredis.py