Skip to content

Commit

Permalink
Create tests for examples with custom stages (#885)
Browse files Browse the repository at this point in the history
This PR creates at least one test for each example containing custom stages.
This PR currently only covers those examples which do not require additional packages. 
Part of #849.

* Moves the bert vocabulary files to `morpheus/data` dir, no longer requiring them to be fetched from LFS and making them available to unittests.
* Fixes type hints and remove a redundant method in `examples/log_parsing/inference.py`
* Remove redundant copies of `bert-base-cased-hash.txt` and `bert-base-uncased-hash.txt` files, replacing them with symlinks to the files in the morpheus/data` dir fixes #850
* Explicitly set `encoding='UTF-8'` in `examples/log_parsing/postprocessing.py` as a work-around for issue #859 
* Add `py::kw_only` to Python bindings for `TensorMemory` and sublasses to ensure parity with Python impls.
* Set `repr=False` for the `tensors` field of `TensorMemory` avoids bug when printing due to the fact that we assign the value to `self._tensors`
* Seed cupy's random number generator in `manual_seed` method.
* Fix usage of `reload_modules` fixture, requesting a reload of multiple modules should be done with `@pytest.mark.reload_modules([mod1, mod2])` not calling `reload_modules` twice.
* New test data in `tests/tests_data/log_parsing` is based upon the first 5 rows of data from `models/datasets/validation-data/log-parsing-validation-data-input.csv`

Authors:
  - David Gardner (https://github.com/dagardner-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: #885
  • Loading branch information
dagardner-nv authored Apr 28, 2023
1 parent 339a71f commit 18fcbce
Show file tree
Hide file tree
Showing 41 changed files with 31,280 additions and 246,568 deletions.
3 changes: 2 additions & 1 deletion ci/scripts/github/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ sccache --show-stats
rapids-logger "Archiving results"
tar cfj "${WORKSPACE_TMP}/wheel.tar.bz" build/dist

MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
MORPHEUS_LIBS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;) \
$(find ${MORPHEUS_ROOT}/examples -name "*.so" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
tar cfj "${WORKSPACE_TMP}/morhpeus_libs.tar.bz" "${MORPHEUS_LIBS[@]}"

CPP_TESTS=($(find ${MORPHEUS_ROOT}/build/morpheus/_lib/tests -name "*.x" -exec realpath --relative-to ${MORPHEUS_ROOT} {} \;))
Expand Down
4 changes: 2 additions & 2 deletions examples/log_parsing/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ python run.py \
--num_threads 1 \
--input_file ${MORPHEUS_ROOT}/models/datasets/validation-data/log-parsing-validation-data-input.csv \
--output_file ./log-parsing-output.jsonlines \
--model_vocab_hash_file=${MORPHEUS_ROOT}/models/training-tuning-scripts/sid-models/resources/bert-base-cased-hash.txt \
--model_vocab_hash_file=${MORPHEUS_ROOT}/morpheus/data/bert-base-cased-hash.txt \
--model_vocab_file=${MORPHEUS_ROOT}/models/training-tuning-scripts/sid-models/resources/bert-base-cased-vocab.txt \
--model_seq_length=256 \
--model_name log-parsing-onnx \
Expand Down Expand Up @@ -114,7 +114,7 @@ morpheus --log_level INFO \
pipeline-nlp \
from-file --filename ./models/datasets/validation-data/log-parsing-validation-data-input.csv \
deserialize \
preprocess --vocab_hash_file ./models/training-tuning-scripts/sid-models/resources/bert-base-cased-hash.txt --stride 64 --column=raw \
preprocess --vocab_hash_file ${MORPHEUS_ROOT}/morpheus/data/bert-base-cased-hash.txt --stride 64 --column=raw \
monitor --description "Preprocessing rate" \
inf-logparsing --model_name log-parsing-onnx --server_url localhost:8001 --force_convert_inputs=True \
monitor --description "Inference rate" --unit inf \
Expand Down
36 changes: 6 additions & 30 deletions examples/log_parsing/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,10 @@
from morpheus.cli.register_stage import register_stage
from morpheus.config import Config
from morpheus.config import PipelineModes
from morpheus.messages import InferenceMemory
from morpheus.messages import MultiInferenceMessage
from morpheus.pipeline.stream_pair import StreamPair
from morpheus.stages.inference.inference_stage import InferenceStage
from morpheus.stages.inference.inference_stage import InferenceWorker
from morpheus.stages.inference.triton_inference_stage import InputWrapper
from morpheus.stages.inference.triton_inference_stage import _TritonInferenceWorker
from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue

Expand Down Expand Up @@ -97,7 +95,7 @@ def default_inout_mapping(cls) -> typing.Dict[str, str]:
# Some models use different names for the same thing. Set that here but allow user customization
return {"attention_mask": "input_mask"}

def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseLogParsingMessage:
def build_output_message(self, x: MultiInferenceMessage) -> MultiPostprocLogParsingMessage:

memory = PostprocMemoryLogParsing(
count=x.count,
Expand All @@ -111,7 +109,7 @@ def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseLogPars
mess_offset=x.mess_offset,
mess_count=x.mess_count,
memory=memory,
offset=x.offset,
offset=0,
count=x.count)
return output_message

Expand All @@ -131,25 +129,6 @@ def _build_response(self, batch: MultiInferenceMessage,

return mem

def _infer_callback(self,
cb: typing.Callable[[ResponseMemoryLogParsing], None],
m: InputWrapper,
b: MultiInferenceMessage,
result: tritonclient.InferResult,
error: tritonclient.InferenceServerException):

# If its an error, return that here
if (error is not None):
raise error

# Build response
response_mem = self._build_response(b, result)

# Call the callback with the memory
cb(response_mem)

self._mem_pool.return_obj(m)


@register_stage("inf-logparsing", modes=[PipelineModes.NLP])
class LogParsingInferenceStage(InferenceStage):
Expand Down Expand Up @@ -261,7 +240,9 @@ def set_output_fut(resp: ResponseMemoryLogParsing, b, f: mrc.Future):
return stream, out_type

@staticmethod
def _convert_one_response(memory: InferenceMemory, inf: MultiInferenceMessage, res: ResponseMemoryLogParsing):
def _convert_one_response(memory: PostprocMemoryLogParsing,
inf: MultiInferenceMessage,
res: ResponseMemoryLogParsing):

memory.input_ids[inf.offset:inf.count + inf.offset, :] = inf.input_ids
memory.seq_ids[inf.offset:inf.count + inf.offset, :] = inf.seq_ids
Expand All @@ -280,12 +261,7 @@ def _convert_one_response(memory: InferenceMemory, inf: MultiInferenceMessage, r
memory.confidences[idx, :] = cp.maximum(memory.confidences[idx, :], res.confidences[i, :])
memory.labels[idx, :] = cp.maximum(memory.labels[idx, :], res.labels[i, :])

return MultiPostprocLogParsingMessage(meta=inf.meta,
mess_offset=inf.mess_offset,
mess_count=inf.mess_count,
memory=memory,
offset=inf.offset,
count=inf.count)
return MultiPostprocLogParsingMessage.from_message(inf, memory=memory, offset=inf.offset, count=inf.mess_count)

def _get_inference_worker(self, inf_queue: ProducerConsumerQueue) -> InferenceWorker:

Expand Down
7 changes: 5 additions & 2 deletions examples/log_parsing/postprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,14 @@ def __init__(self, c: Config, vocab_path: pathlib.Path, model_config_path: pathl
self._model_config_path = model_config_path

self._vocab_lookup = {}
with open(vocab_path) as f:

# Explicitly setting the encoding, we know we have unicode chars in this file and we need to avoid issue:
# https://github.com/nv-morpheus/Morpheus/issues/859
with open(vocab_path, encoding='UTF-8') as f:
for index, line in enumerate(f):
self._vocab_lookup[index] = line.split()[0]

with open(model_config_path) as f:
with open(model_config_path, encoding='UTF-8') as f:
config = json.load(f)

self._label_map = {int(k): v for k, v in config["id2label"].items()}
Expand Down
Loading

0 comments on commit 18fcbce

Please sign in to comment.