Skip to content

Commit

Permalink
Fix CLI parsing of C++ enum values (#676)
Browse files Browse the repository at this point in the history
C++ enums exposed to Python via pybind11 are not subclasses of `enum.Enum` causing them to not be parsed properly by the CLI.

Hot-patching this into 23.01

fixes #675

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: #676
  • Loading branch information
dagardner-nv authored Feb 13, 2023
1 parent 87c6a77 commit 31c92b3
Show file tree
Hide file tree
Showing 9 changed files with 197 additions and 45 deletions.
20 changes: 10 additions & 10 deletions morpheus.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"name": "Python: Run Pipeline (NLP)",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/morpheus/cli.py",
"program": "${workspaceFolder}/morpheus/cli/run.py",
"console": "integratedTerminal",
"justMyCode": false,
"subProcess": true,
Expand Down Expand Up @@ -118,7 +118,7 @@
"name": "Python: Run Pipeline (FIL)",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/morpheus/cli.py",
"program": "${workspaceFolder}/morpheus/cli/run.py",
"console": "integratedTerminal",
"justMyCode": false,
"subProcess": true,
Expand Down Expand Up @@ -174,7 +174,7 @@
"name": "Python: Run Pipeline (AE)",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/morpheus/cli.py",
"program": "${workspaceFolder}/morpheus/cli/run.py",
"console": "integratedTerminal",
"justMyCode": false,
"subProcess": true,
Expand Down Expand Up @@ -233,7 +233,7 @@
"name": "Python: Run Pipeline (NLP-Phishing)",
"type": "python",
"request": "launch",
"program": "${workspaceFolder}/morpheus/cli.py",
"program": "${workspaceFolder}/morpheus/cli/run.py",
"console": "integratedTerminal",
"justMyCode": false,
"subProcess": true,
Expand Down Expand Up @@ -330,7 +330,7 @@
"justMyCode": false
},
{
"name": "Debug SRF from Python (Morpheus-NLP)",
"name": "Debug MRC from Python (Morpheus-NLP)",
"type": "cppdbg",
"request": "launch",
"program": "python",
Expand Down Expand Up @@ -415,7 +415,7 @@
],
"symbolLoadInfo": {
"loadAll": false,
"exceptionList": "libsrf*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
"exceptionList": "libmrc*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
},
"miDebuggerPath": "gdb",
"sourceFileMap": {
Expand All @@ -426,7 +426,7 @@
},
},
{
"name": "Debug SRF from Python (Morpheus-FIL)",
"name": "Debug MRC from Python (Morpheus-FIL)",
"type": "cppdbg",
"request": "launch",
"program": "python",
Expand Down Expand Up @@ -508,7 +508,7 @@
],
"symbolLoadInfo": {
"loadAll": false,
"exceptionList": "libsrf*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
"exceptionList": "libmrc*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
},
"miDebuggerPath": "gdb",
"sourceFileMap": {
Expand All @@ -519,7 +519,7 @@
},
},
{
"name": "Debug SRF from Python (Morpheus-AE)",
"name": "Debug MRC from Python (Morpheus-AE)",
"type": "cppdbg",
"request": "launch",
"program": "python",
Expand Down Expand Up @@ -599,7 +599,7 @@
],
"symbolLoadInfo": {
"loadAll": false,
"exceptionList": "libsrf*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
"exceptionList": "libmrc*.so;cudf_helpers.*;executor.*;morpheus.*;node.*;options.*;pipeline.*;segment.*;subscriber.*;stages.*;messages.*;common*.so"
},
"miDebuggerPath": "gdb",
"sourceFileMap": {
Expand Down
6 changes: 3 additions & 3 deletions morpheus/cli/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from morpheus.cli.stage_registry import LazyStageInfo
from morpheus.cli.utils import MorpheusRelativePath
from morpheus.cli.utils import get_config_from_ctx
from morpheus.cli.utils import get_enum_values
from morpheus.cli.utils import get_enum_keys
from morpheus.cli.utils import get_log_levels
from morpheus.cli.utils import get_pipeline_from_ctx
from morpheus.cli.utils import load_labels_file
Expand Down Expand Up @@ -450,8 +450,8 @@ def pipeline_fil(ctx: click.Context, **kwargs):
help=("Specifying this value will filter all incoming data to only use rows with matching User IDs. "
"Which column is used for the User ID is specified by `userid_column_name`"))
@click.option('--feature_scaler',
type=click.Choice(get_enum_values(AEFeatureScalar), case_sensitive=False),
default=AEFeatureScalar.STANDARD.value,
type=click.Choice(get_enum_keys(AEFeatureScalar), case_sensitive=False),
default=AEFeatureScalar.STANDARD.name,
callback=functools.partial(parse_enum, enum_class=AEFeatureScalar, case_sensitive=False),
help=("Autoencoder feature scaler"))
@click.option('--use_generic_model',
Expand Down
18 changes: 14 additions & 4 deletions morpheus/cli/register_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import pathlib
import re
import typing
from enum import Enum

import click
import numpydoc.docscrape
Expand All @@ -29,8 +28,9 @@
from morpheus.cli.stage_registry import LazyStageInfo
from morpheus.cli.stage_registry import StageInfo
from morpheus.cli.utils import get_config_from_ctx
from morpheus.cli.utils import get_enum_values
from morpheus.cli.utils import get_enum_keys
from morpheus.cli.utils import get_pipeline_from_ctx
from morpheus.cli.utils import is_enum
from morpheus.cli.utils import parse_enum
from morpheus.cli.utils import prepare_command
from morpheus.config import Config
Expand Down Expand Up @@ -161,6 +161,15 @@ def has_matching_kwargs(function, input_dict: dict) -> bool:
return len([True for input_name in list(input_dict.keys()) if input_name in fn_sig.parameters]) > 0


def _convert_enum_default(options_kwargs: dict, annotation, use_value: bool = False):
"""
Display the default value of an enum argument as a string not an enum instance
"""
default_val = options_kwargs.get('default')
if (isinstance(default_val, annotation)):
options_kwargs['default'] = default_val.name


def set_options_param_type(options_kwargs: dict, annotation, doc_type: str):

doc_type_kwargs = get_doc_kwargs(doc_type)
Expand All @@ -177,10 +186,11 @@ def set_options_param_type(options_kwargs: dict, annotation, doc_type: str):
# For paths, use the Path option and apply any kwargs
options_kwargs["type"] = partial_pop_kwargs(click.Path, doc_type_kwargs)()

elif (issubtype(annotation, Enum)):
elif (is_enum(annotation)):
case_sensitive = doc_type_kwargs.get('case_sensitive', True)
options_kwargs["type"] = partial_pop_kwargs(click.Choice, doc_type_kwargs)(get_enum_values(annotation))
options_kwargs["type"] = partial_pop_kwargs(click.Choice, doc_type_kwargs)(get_enum_keys(annotation))

_convert_enum_default(options_kwargs, annotation, use_value=True)
options_kwargs["callback"] = functools.partial(parse_enum, enum_class=annotation, case_sensitive=case_sensitive)

elif (issubtype(annotation, int) and not issubtype(annotation, bool)):
Expand Down
44 changes: 23 additions & 21 deletions morpheus/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def _without_empty_args(passed_args):
return {k: v for k, v in passed_args.items() if v is not None}


def is_pybind_enum(cls: typing.Any):
"""
Determines if the given `cls` is an enum.
C++ enums exposed via pybind11 do not inherit from Enum, but do expose the `__members__` convention.
https://docs.python.org/3.8/library/enum.html?highlight=__members__#iteration
"""
return 'pybind11' in type(cls).__name__ and hasattr(cls, '__members__')


def without_empty_args(f):
"""
Removes keyword arguments that have a None value
Expand Down Expand Up @@ -153,43 +162,36 @@ def parse_log_level(ctx, param, value):
return x


def get_enum_map(enum_class: typing.Type):

assert issubclass(enum_class, Enum), "Must pass a class that derives from Enum"
def is_enum(enum_class: typing.Type):
return issubclass(enum_class, Enum) or is_pybind_enum(enum_class)

enum_map = {x.name: x.value for x in enum_class}

return enum_map
def get_enum_members(enum_class: typing.Type):

assert is_enum(enum_class), \
"Must pass a class that derives from Enum or a C++ enum exposed to Python"

def get_enum_inv_map(enum_class: typing.Type):
return dict(enum_class.__members__)

assert issubclass(enum_class, Enum), "Must pass a class that derives from Enum"

enum_map = {x.value: x.name for x in enum_class}
def get_enum_keys(enum_class: typing.Type):

return enum_map
enum_map = get_enum_members(enum_class)


def get_enum_values(enum_class: typing.Type):

enum_map = get_enum_map(enum_class)

return list(enum_map.values())
return list(enum_map.keys())


def parse_enum(_: click.Context, _2: click.Parameter, value: str, enum_class: typing.Type, case_sensitive=True):

enum_map = get_enum_inv_map(enum_class)
enum_map = get_enum_members(enum_class)

if case_sensitive:
enum_key = enum_map[value]
else:
if not case_sensitive:
# Make the keys lowercase
enum_map = {key.lower(): value for key, value in enum_map.items()}
enum_key = enum_map[value.lower()]
value = value.lower()

result = enum_map[value]

result = enum_class[enum_key]
return result


Expand Down
4 changes: 2 additions & 2 deletions morpheus/stages/input/file_source_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ class FileSourceStage(SingleOutputSource):
iterative : boolean, default = False, is_flag = True
Iterative mode will emit dataframes one at a time. Otherwise a list of dataframes is emitted. Iterative mode is
good for interleaving source stages.
file_type : `morpheus._lib.common.FileTypes`, default = 'auto'
file_type : `morpheus._lib.common.FileTypes`, optional, case_sensitive = False
Indicates what type of file to read. Specifying 'auto' will determine the file type from the extension.
Supported extensions: 'json', 'csv'
Supported extensions: 'csv', 'json' and 'jsonlines'
repeat : int, default = 1, min = 1
Repeats the input dataset multiple times. Useful to extend small datasets for debugging.
filter_null : bool, default = True
Expand Down
4 changes: 2 additions & 2 deletions morpheus/stages/input/kafka_source_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class KafkaSourceStage(SingleOutputSource):
disable_pre_filtering : bool, default = False
Enabling this option will skip pre-filtering of json messages. This is only useful when inputs are known to be
valid json.
auto_offset_reset : `AutoOffsetReset`, default = AutoOffsetReset.LATEST, case_sensitive = False
auto_offset_reset : `AutoOffsetReset`, case_sensitive = False
Sets the value for the configuration option 'auto.offset.reset'. See the kafka documentation for more
information on the effects of each value."
stop_after: int, default = 0
Expand All @@ -85,7 +85,7 @@ def __init__(self,
poll_interval: str = "10millis",
disable_commit: bool = False,
disable_pre_filtering: bool = False,
auto_offset_reset: AutoOffsetReset = "latest",
auto_offset_reset: AutoOffsetReset = AutoOffsetReset.LATEST,
stop_after: int = 0,
async_commits: bool = True):
super().__init__(c)
Expand Down
5 changes: 3 additions & 2 deletions morpheus/stages/output/write_to_file_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,9 @@ class WriteToFileStage(SinglePortStage):
Name of the file to which the messages will be written.
overwrite : boolean, default = False, is_flag = True
Overwrite file if exists. Will generate an error otherwise.
file_type : `morpheus._lib.common.FileTypes`, optional
File type of output (FileTypes.JSON, FileTypes.CSV, FileTypes.Auto), by default FileTypes.Auto.
file_type : `morpheus._lib.common.FileTypes`, optional, case_sensitive = False
Indicates what type of file to write. Specifying 'auto' will determine the file type from the extension.
Supported extensions: 'csv', 'json' and 'jsonlines'
include_index_col : bool, default = True
Write out the index as a column, by default True.
"""
Expand Down
2 changes: 1 addition & 1 deletion morpheus/stages/postprocess/filter_detections_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class FilterDetectionsStage(SinglePortStage):
Threshold to classify, default is 0.5.
copy : bool
Whether or not to perform a copy.
filter_source : `from morpheus._lib.common.FilterSource`, default = 'auto'
filter_source : `from morpheus._lib.common.FilterSource`, case_sensitive = False
Indicate if we are operating on is an output tensor or a field in the DataFrame.
Choosing `Auto` will default to `TENSOR` when the incoming message contains output tensorts and `DATAFRAME`
otherwise.
Expand Down
Loading

0 comments on commit 31c92b3

Please sign in to comment.