nv-morpheus · rapids-bot · Aug 23, 2023 · Jul 17, 2023 · Jul 17, 2023 · Jul 17, 2023
@@ -333,7 +333,8 @@ def _split_batches(x: MultiInferenceMessage, max_batch_size: int) -> typing.List
         return out_resp
 
     @staticmethod
-    def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]):
+    def _convert_response(
+            x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]) -> MultiResponseMessage:
 
         # Convert a MultiInferenceMessage into a MultiResponseMessage
         in_message = x[0]

@@ -20,7 +20,9 @@
 import time
 import types
 import typing
+from unittest import mock
 
+import numpy as np
 import pytest
 
 from morpheus.io.deserializers import read_file_to_df
@@ -161,3 +163,13 @@ def import_or_skip(modname: str,
         if fail_missing:
             raise ImportError(e) from e
         raise
+
+
+def mk_async_infer(inf_results: np.ndarray) -> typing.Callable:
+    mock_infer_result = mock.MagicMock()
+    mock_infer_result.as_numpy.side_effect = inf_results
+
+    def async_infer(callback=None, **_):
+        callback(mock_infer_result, None)
+
+    return async_infer
@@ -24,11 +24,11 @@
 
 import cudf as cdf  # rename to avoid clash with property method
 
+from _utils import TEST_DIRS
+from _utils import assert_results
 from morpheus.io.deserializers import read_file_to_df
 from morpheus.utils import compare_df
 from morpheus.utils.type_aliases import DataFrameType
-from utils import TEST_DIRS
-from utils import assert_results
 
 
 class DatasetManager:

@@ -16,7 +16,7 @@
 import typing
 
 from morpheus.messages import MultiInferenceMessage
-from morpheus.messages import TensorMemory
+from morpheus.messages.memory.tensor_memory import TensorMemory
 from morpheus.stages.inference import inference_stage
 
 

@@ -19,6 +19,7 @@
 
 import pytest
 
+from _utils import TEST_DIRS
 from morpheus.config import Config
 from morpheus.config import ConfigAutoEncoder
 from morpheus.config import ConfigFIL
@@ -42,7 +43,6 @@
 from morpheus.stages.preprocess.train_ae_stage import TrainAEStage
 from morpheus.utils.file_utils import load_labels_file
 from morpheus.utils.logger import configure_logging
-from utils import TEST_DIRS
 
 E2E_CONFIG_FILE = os.path.join(TEST_DIRS.morpheus_root, "tests/benchmarks/e2e_test_configs.json")
 with open(E2E_CONFIG_FILE, 'r', encoding='UTF-8') as f:

@@ -620,7 +620,7 @@ def _camouflage_is_running():
         Whether or not we are using Camouflage or an actual Triton server
     """
 
-    from utils import TEST_DIRS
+    from _utils import TEST_DIRS
 
     logger = logging.getLogger(f"morpheus.{__name__}")
 
@@ -869,7 +869,7 @@ def test something(dataset: DatasetManager):
 
     Users who don't want to parametarize over the DataFrame should use the `dataset_pandas` or `dataset_cudf` fixtures.
     """
-    from utils import dataset_manager
+    from _utils import dataset_manager
     yield dataset_manager.DatasetManager(df_type=df_type)
 
 
@@ -894,7 +894,7 @@ def test_something(dataset_pandas: DatasetManager):
         expected_df = expected_df.rename(columns=dict(zip(expected_df.columns, class_labels)))
     ```
     """
-    from utils import dataset_manager
+    from _utils import dataset_manager
     yield dataset_manager.DatasetManager(df_type='pandas')
 
 
@@ -908,7 +908,7 @@ def test_something(dataset_cudf: DatasetManager):
         cdf = dataset_cudf["filter_probs.csv"]
         pdf = dataset_cudf.pandas["filter_probs.csv"]
     """
-    from utils import dataset_manager
+    from _utils import dataset_manager
     yield dataset_manager.DatasetManager(df_type='cudf')
 
 

@@ -22,15 +22,13 @@
 import pytest
 import torch
 
+from _utils import TEST_DIRS
+from _utils.dataset_manager import DatasetManager
 from morpheus.config import AEFeatureScalar
 from morpheus.models.dfencoder import ae_module
 from morpheus.models.dfencoder import autoencoder
 from morpheus.models.dfencoder import scalers
 from morpheus.models.dfencoder.dataframe import EncoderDataFrame
-from utils import TEST_DIRS
-from utils.dataset_manager import DatasetManager
-
-# pylint: disable=redefined-outer-name
 
 # Only pandas and Python is supported
 pytestmark = [pytest.mark.use_pandas, pytest.mark.use_python]
@@ -57,8 +55,8 @@
 NUMERIC_COLS = ['eventID', 'ae_anomaly_score']
 
 
-@pytest.fixture(scope="function")
-def train_ae():
+@pytest.fixture(name="train_ae", scope="function")
+def train_ae_fixture():
     """
     Construct an AutoEncoder instance with the same values used by `train_ae_stage`
     """
@@ -78,8 +76,8 @@ def train_ae():
     )
 
 
-@pytest.fixture(scope="function")
-def train_df(dataset_pandas: DatasetManager) -> typing.Iterator[pd.DataFrame]:
+@pytest.fixture(name="train_df", scope="function")
+def train_df_fixture(dataset_pandas: DatasetManager) -> typing.Iterator[pd.DataFrame]:
     yield dataset_pandas[os.path.join(TEST_DIRS.validation_data_dir, "dfp-cloudtrail-role-g-validation-data-input.csv")]
 
 
@@ -108,59 +106,59 @@ def test_ohe():
     assert torch.equal(results, expected.to("cuda", copy=True)), f"{results} != {expected}"
 
 
-def test_compute_embedding_size():
-    for (inp, expected) in [(0, 0), (5, 4), (20, 9), (40000, 600)]:
-        assert ae_module._compute_embedding_size(inp) == expected
+@pytest.mark.parametrize("num_cats,expected", [(0, 0), (5, 4), (20, 9), (40000, 600)])
+def test_compute_embedding_size(num_cats: int, expected: int):
+    assert ae_module._compute_embedding_size(num_cats) == expected
 
 
 def test_complete_layer_constructor():
-    layer = ae_module.CompleteLayer(4, 5)
-    assert len(layer.layers) == 1
-    assert isinstance(layer.layers[0], torch.nn.Linear)
-    assert layer.layers[0].in_features == 4
-    assert layer.layers[0].out_features == 5
-
-    layer = ae_module.CompleteLayer(4, 5, activation='tanh')
-    assert len(layer.layers) == 2
-    assert layer.layers[1] is torch.tanh
-
-    layer = ae_module.CompleteLayer(4, 5, dropout=0.2)
-    assert len(layer.layers) == 2
-    assert isinstance(layer.layers[1], torch.nn.Dropout)
-    assert layer.layers[1].p == 0.2
-
-    layer = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3)
-    assert len(layer.layers) == 3
-    assert isinstance(layer.layers[0], torch.nn.Linear)
-    assert layer.layers[0].in_features == 6
-    assert layer.layers[0].out_features == 11
-    assert layer.layers[1] is torch.sigmoid
-    assert isinstance(layer.layers[2], torch.nn.Dropout)
-    assert layer.layers[2].p == 0.3
+    complete_layer = ae_module.CompleteLayer(4, 5)
+    assert len(complete_layer.layers) == 1
+    assert isinstance(complete_layer.layers[0], torch.nn.Linear)
+    assert complete_layer.layers[0].in_features == 4
+    assert complete_layer.layers[0].out_features == 5
+
+    complete_layer = ae_module.CompleteLayer(4, 5, activation='tanh')
+    assert len(complete_layer.layers) == 2
+    assert complete_layer.layers[1] is torch.tanh
+
+    complete_layer = ae_module.CompleteLayer(4, 5, dropout=0.2)
+    assert len(complete_layer.layers) == 2
+    assert isinstance(complete_layer.layers[1], torch.nn.Dropout)
+    assert complete_layer.layers[1].p == 0.2
+
+    complete_layer = ae_module.CompleteLayer(6, 11, activation='sigmoid', dropout=0.3)
+    assert len(complete_layer.layers) == 3
+    assert isinstance(complete_layer.layers[0], torch.nn.Linear)
+    assert complete_layer.layers[0].in_features == 6
+    assert complete_layer.layers[0].out_features == 11
+    assert complete_layer.layers[1] is torch.sigmoid
+    assert isinstance(complete_layer.layers[2], torch.nn.Dropout)
+    assert complete_layer.layers[2].p == 0.3
 
 
 def test_complete_layer_interpret_activation():
-    layer = ae_module.CompleteLayer(4, 5)
-    assert layer.interpret_activation('elu') is torch.nn.functional.elu
+    complete_layer = ae_module.CompleteLayer(4, 5)
+    assert complete_layer.interpret_activation('elu') is torch.nn.functional.elu
 
     # Test for bad activation, this really does raise the base Exception class.
     with pytest.raises(Exception):
-        layer.interpret_activation()
+        complete_layer.interpret_activation()
 
     with pytest.raises(Exception):
-        layer.interpret_activation("does_not_exist")
+        complete_layer.interpret_activation("does_not_exist")
 
-    layer = ae_module.CompleteLayer(6, 11, activation='sigmoid')
-    assert layer.interpret_activation() is torch.sigmoid
+    complete_layer = ae_module.CompleteLayer(6, 11, activation='sigmoid')
+    assert complete_layer.interpret_activation() is torch.sigmoid
 
 
 @pytest.mark.usefixtures("manual_seed")
 def test_complete_layer_forward():
     # Setting dropout probability to 0. The results of dropout our deterministic, but are only
     # consistent when run on the same GPU.
-    layer = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0)
+    complete_layer = ae_module.CompleteLayer(3, 5, activation='tanh', dropout=0)
     tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], dtype=torch.float32)
-    results = layer.forward(tensor)
+    results = complete_layer.forward(tensor)
     expected = torch.tensor([[0.7223, 0.7902, 0.9647, 0.5613, 0.9163], [0.9971, 0.9897, 0.9988, 0.8317, 0.9992],
                              [1.0000, 0.9995, 1.0000, 0.9417, 1.0000], [1.0000, 1.0000, 1.0000, 0.9806, 1.0000]],
                             dtype=torch.float32)

@@ -25,11 +25,11 @@
 import numpy as np
 import pytest
 
+from _utils import TEST_DIRS
 from morpheus.models.dfencoder.autoencoder import AutoEncoder
 from morpheus.models.dfencoder.dataloader import DatasetFromPath
 from morpheus.models.dfencoder.dataloader import DFEncoderDataLoader
 from morpheus.models.dfencoder.multiprocessing import start_processes
-from utils import TEST_DIRS
 
 # import torch
 

@@ -20,8 +20,8 @@
 import pandas as pd
 import pytest
 
+from _utils import TEST_DIRS
 from morpheus.models.dfencoder.autoencoder import AutoEncoder
-from utils import TEST_DIRS
 
 INFERENCE_START_DATE = "2022-11-01"
 VALIDATION_SET_SIZE = 3000  # around 1/10 of the train set
@@ -130,7 +130,7 @@ def test_dfencoder_e2e():
     # Assert the consistency of output rows and columns
     assert len(inf_res) == len(inference_df)
     assert sorted(inf_res.columns) == sorted(
-        [ft + col_suffix for ft in FEATURE_COLUMNS
+        [feature + col_suffix for feature in FEATURE_COLUMNS
          for col_suffix in ["", "_pred", "_loss", "_z_loss"]] + ["max_abs_z", "mean_abs_z", "z_loss_scaler_type"])
     # make sure the user baseline is modeled well enough so the minimum and median z scores
     # from inference are in range

@@ -23,14 +23,14 @@
 
 import cudf
 
+from _utils import TEST_DIRS
+from _utils.dataset_manager import DatasetManager
 from morpheus.common import TypeId
 from morpheus.config import Config
 from morpheus.config import PipelineModes
 from morpheus.messages import MessageMeta
 from morpheus.messages import MultiInferenceFILMessage
 from morpheus.messages import MultiMessage
-from utils import TEST_DIRS
-from utils.dataset_manager import DatasetManager
 
 
 def check_inf_message(msg: MultiInferenceFILMessage,

@@ -22,11 +22,11 @@
 
 import cudf
 
+from _utils import TEST_DIRS
 from morpheus.config import Config
 from morpheus.messages import MessageMeta
 from morpheus.messages import MultiMessage
 from morpheus.pipeline.single_port_stage import SinglePortStage
-from utils import TEST_DIRS
 
 
 def _check_pass_thru(config: Config,

@@ -19,11 +19,11 @@
 
 import pytest
 
+from _utils import TEST_DIRS
+from _utils.dataset_manager import DatasetManager
 from morpheus.config import Config
 from morpheus.config import PipelineModes
 from morpheus.messages import MessageMeta
-from utils import TEST_DIRS
-from utils.dataset_manager import DatasetManager
 
 EXPECTED_NEW_COLS = ['to_count', 'bcc_count', 'cc_count', 'total_recipients', 'data']
 

@@ -15,16 +15,15 @@
 
 import os
 import sys
+import typing
 from unittest import mock
 
 import pytest
 
+from _utils import TEST_DIRS
+from _utils import import_or_skip
+from _utils.dataset_manager import DatasetManager
 from morpheus.config import Config
-from utils import TEST_DIRS
-from utils import import_or_skip
-from utils.dataset_manager import DatasetManager
-
-# pylint: disable=redefined-outer-name
 
 SKIP_REASON = (
     "Tests for the digital_fingerprinting production example requires a number of packages not installed in the "
@@ -55,14 +54,14 @@ def mlflow(fail_missing: bool):
     yield import_or_skip("mlflow", reason=SKIP_REASON, fail_missing=fail_missing)
 
 
-@pytest.fixture(scope='session')
-def ae_feature_cols():
-    with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt'), encoding='UTF-8') as fh:
+@pytest.fixture(name='ae_feature_cols', scope='session')
+def ae_feature_cols_fixture():
+    with open(os.path.join(TEST_DIRS.data_dir, 'columns_ae_cloudtrail.txt'), encoding='utf-8') as fh:
         yield [x.strip() for x in fh.readlines()]
 
 
-@pytest.fixture
-def config(config_no_cpp: Config, ae_feature_cols: list[str]):
+@pytest.fixture(name="config")
+def config_fixture(config_no_cpp: Config, ae_feature_cols: typing.List[str]):
     """
     The digital_fingerprinting production example utilizes the Auto Encoder config, and requires C++ execution disabled.
     """
@@ -73,8 +72,8 @@ def config(config_no_cpp: Config, ae_feature_cols: list[str]):
     yield config
 
 
-@pytest.fixture
-def example_dir():
+@pytest.fixture(name="example_dir")
+def example_dir_fixture():
     yield os.path.join(TEST_DIRS.examples_dir, 'digital_fingerprinting/production/morpheus')
 
 
@@ -83,15 +82,14 @@ def example_dir():
 # For this reason we need to ensure that the digital_fingerprinting/production/morpheus dir is in sys.path
 @pytest.fixture(autouse=True)
 def dfp_prod_in_sys_path(
-        request: pytest.FixtureRequest,  # pylint: disable=unused-argument
         restore_sys_path: list[str],  # pylint: disable=unused-argument
         reset_plugins: None,  # pylint: disable=unused-argument
         example_dir: str):
     sys.path.append(example_dir)
 
 
-@pytest.fixture
-def dfp_message_meta(config: Config, dataset_pandas: DatasetManager):
+@pytest.fixture(name="dfp_message_meta")
+def dfp_message_meta_fixture(config, dataset_pandas: DatasetManager):
     import pandas as pd
     from dfp.messages.multi_dfp_message import DFPMessageMeta