From cdfdcfc15c8951a20fe3d978e79db6b84f59f403 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 20 Jan 2023 11:22:27 -0800 Subject: [PATCH 01/85] Expose constructors for TensorMemory, InferenceMemory and ResponseMemory to python --- .../messages/memory/inference_memory.hpp | 15 ++++++- .../messages/memory/response_memory.hpp | 22 +++++++--- .../messages/memory/tensor_memory.hpp | 43 +++++++++++++++++-- .../src/messages/memory/inference_memory.cpp | 6 +++ .../src/messages/memory/response_memory.cpp | 14 ++++-- .../src/messages/memory/tensor_memory.cpp | 34 +++++++++++++-- morpheus/_lib/src/python_modules/messages.cpp | 16 +++++-- 7 files changed, 130 insertions(+), 20 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index c6562f30a6..c5c41978a8 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -19,7 +19,10 @@ #include "morpheus/messages/memory/tensor_memory.hpp" -#include +#include // for object + +#include // for size_t +#include #include namespace morpheus { @@ -68,6 +71,16 @@ class InferenceMemory : public TensorMemory */ struct InferenceMemoryInterfaceProxy { + /** + * @brief Create and initialize a InferenceMemory object, and return a shared pointer to the result. Each array in + * `tensors` should be of length `count`. + * + * @param count : Lenght of each array in `tensors` + * @param tensors : Map of string on to cupy arrays + * @return std::shared_ptr + */ + static std::shared_ptr init(std::size_t count, std::map tensors); + /** * @brief Get the count object * diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index da4e38d06d..e7724f079d 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -20,9 +20,10 @@ #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include +#include // for object #include // for size_t +#include #include namespace morpheus { @@ -53,7 +54,7 @@ class ResponseMemory : public TensorMemory * @param count * @param tensors */ - ResponseMemory(size_t count, tensor_map_t &&tensors); + ResponseMemory(size_t count, tensor_map_t&& tensors); /** * @brief Checks if a tensor named `name` exists in `tensors` @@ -62,7 +63,7 @@ class ResponseMemory : public TensorMemory * @return true * @return false */ - bool has_output(const std::string &name) const; + bool has_output(const std::string& name) const; }; /****** ResponseMemoryInterfaceProxy *************************/ @@ -73,6 +74,17 @@ class ResponseMemory : public TensorMemory */ struct ResponseMemoryInterfaceProxy { + /** + * @brief Create and initialize a ResponseMemory object, and return a shared pointer to the result. Each array in + * `cupy_tensors` should be of length `count`. + * + * @param count : Lenght of each array in `cupy_tensors` + * @param cupy_tensors : Map of string on to cupy arrays + * @return std::shared_ptr + */ + static std::shared_ptr init(std::size_t count, + std::map cupy_tensors); + /** * @brief Get the output object * @@ -80,7 +92,7 @@ struct ResponseMemoryInterfaceProxy * @param name * @return pybind11::object */ - static pybind11::object get_output(ResponseMemory &self, const std::string &name); + static pybind11::object get_output(ResponseMemory& self, const std::string& name); /** * @brief Get the output tensor object @@ -89,7 +101,7 @@ struct ResponseMemoryInterfaceProxy * @param name * @return TensorObject */ - static TensorObject get_output_tensor(ResponseMemory &self, const std::string &name); + static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index cd4e92eb1f..4dac8398d9 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -19,6 +19,8 @@ #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject +#include // for object + #include // for size_t #include #include @@ -35,6 +37,7 @@ namespace morpheus { * @file */ +#pragma GCC visibility push(default) /** * @brief Container for holding a collection of named `TensorObject`s in a `std::map` keyed by name. * Base class for `InferenceMemory` & `ResponseMemory` @@ -58,7 +61,7 @@ class TensorMemory * @param count * @param tensors */ - TensorMemory(size_t count, tensor_map_t &&tensors); + TensorMemory(size_t count, tensor_map_t&& tensors); virtual ~TensorMemory() = default; size_t count{0}; @@ -71,7 +74,7 @@ class TensorMemory * @return true * @return false */ - bool has_tensor(const std::string &name) const; + bool has_tensor(const std::string& name) const; /** * @brief Copy tensor ranges @@ -80,9 +83,43 @@ class TensorMemory * @param num_selected_rows * @return tensor_map_t */ - tensor_map_t copy_tensor_ranges(const std::vector> &ranges, + tensor_map_t copy_tensor_ranges(const std::vector>& ranges, size_t num_selected_rows) const; }; +/****** TensorMemoryInterfaceProxy *************************/ +/** + * @brief Interface proxy, used to insulate python bindings. + */ +struct TensorMemoryInterfaceProxy +{ + /** + * @brief Create and initialize a TensorMemory object, and return a shared pointer to the result. Each array in + * `tensors` should be of length `count`. + * + * @param count : Lenght of each array in `tensors` + * @param tensors : Map of string on to cupy arrays + * @return std::shared_ptr + */ + static std::shared_ptr init(std::size_t count, std::map tensors); + + /** + * @brief Get the count object + * + * @param self + * @return std::size_t + */ + static std::size_t get_count(TensorMemory& self); + + /** + * @brief + * + * @param cupy_tensors + * @return TensorMemory::tensor_map_t + */ + static TensorMemory::tensor_map_t cupy_to_tensors(const std::map& cupy_tensors); +}; + +#pragma GCC visibility pop /** @} */ // end of group } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 44498d4785..5643d3245b 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -32,6 +32,12 @@ bool InferenceMemory::has_input(const std::string& name) const } /****** InferenceMemoryInterfaceProxy *************************/ +std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t count, + std::map tensors) +{ + return std::make_shared(count, std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors))); +} + std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) { return self.count; diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 77094c9e12..23b0dd0e56 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -28,15 +28,21 @@ namespace morpheus { /****** Component public implementations *******************/ /****** ResponseMemory****************************************/ ResponseMemory::ResponseMemory(size_t count) : TensorMemory(count) {} -ResponseMemory::ResponseMemory(size_t count, tensor_map_t &&tensors) : TensorMemory(count, std::move(tensors)) {} +ResponseMemory::ResponseMemory(size_t count, tensor_map_t&& tensors) : TensorMemory(count, std::move(tensors)) {} -bool ResponseMemory::has_output(const std::string &name) const +bool ResponseMemory::has_output(const std::string& name) const { return this->has_tensor(name); } /****** ResponseMemoryInterfaceProxy *************************/ -pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory &self, const std::string &name) +std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, + std::map tensors) +{ + return std::make_shared(count, std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors))); +} + +pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory& self, const std::string& name) { // Directly return the tensor object if (!self.has_tensor(name)) @@ -47,7 +53,7 @@ pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory &self, return CupyUtil::tensor_to_cupy(self.tensors[name]); } -TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory &self, const std::string &name) +TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& self, const std::string& name) { // Directly return the tensor object if (!self.has_tensor(name)) diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index c277a21e84..f9705ba611 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -17,6 +17,8 @@ #include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for cupy_to_tensor + #include #include @@ -24,18 +26,18 @@ namespace morpheus { /****** Component public implementations *******************/ /****** TensorMemory****************************************/ TensorMemory::TensorMemory(size_t count) : count(count) {} -TensorMemory::TensorMemory(size_t count, tensor_map_t &&tensors) : count(count), tensors(std::move(tensors)) {} +TensorMemory::TensorMemory(size_t count, tensor_map_t&& tensors) : count(count), tensors(std::move(tensors)) {} -bool TensorMemory::has_tensor(const std::string &name) const +bool TensorMemory::has_tensor(const std::string& name) const { return this->tensors.find(name) != this->tensors.end(); } TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( - const std::vector> &ranges, size_t num_selected_rows) const + const std::vector>& ranges, size_t num_selected_rows) const { tensor_map_t tensors; - for (const auto &p : this->tensors) + for (const auto& p : this->tensors) { tensors.insert(std::pair{p.first, p.second.copy_rows(ranges, num_selected_rows)}); } @@ -43,4 +45,28 @@ TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( return tensors; } +/****** TensorMemoryInterfaceProxy *************************/ +std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, + std::map tensors) +{ + return std::make_shared(count, std::move(cupy_to_tensors(tensors))); +} + +std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) +{ + return self.count; +} + +TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors( + const std::map& cupy_tensors) +{ + TensorMemory::tensor_map_t tensors; + for (const auto tensor : cupy_tensors) + { + tensors[tensor.first] = std::move(CupyUtil::cupy_to_tensor(tensor.second)); + } + + return tensors; +} + } // namespace morpheus diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index d07a5ff0fc..c026b320d9 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -125,13 +126,13 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("mess_offset", &MultiMessageInterfaceProxy::mess_offset) .def_property_readonly("mess_count", &MultiMessageInterfaceProxy::mess_count) .def("get_meta", - static_cast(&MultiMessageInterfaceProxy::get_meta), + static_cast(&MultiMessageInterfaceProxy::get_meta), py::return_value_policy::move) .def("get_meta", - static_cast(&MultiMessageInterfaceProxy::get_meta), + static_cast(&MultiMessageInterfaceProxy::get_meta), py::return_value_policy::move) .def("get_meta", - static_cast)>( + static_cast)>( &MultiMessageInterfaceProxy::get_meta), py::return_value_policy::move) .def("set_meta", &MultiMessageInterfaceProxy::set_meta, py::return_value_policy::move) @@ -144,6 +145,9 @@ PYBIND11_MODULE(messages, m) .def("get_meta_list", &MultiMessageInterfaceProxy::get_meta_list, py::return_value_policy::move); py::class_>(m, "InferenceMemory") + .def(py::init<>(&InferenceMemoryInterfaceProxy::init), + py::arg("count"), + py::arg("tensors") = std::map()) .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count); py::class_>(m, "InferenceMemoryNLP") @@ -219,9 +223,15 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("count", &MultiInferenceFILMessageInterfaceProxy::count); py::class_>(m, "TensorMemory") + .def(py::init<>(&TensorMemoryInterfaceProxy::init), + py::arg("count"), + py::arg("tensors") = std::map()) .def_readonly("count", &TensorMemory::count); py::class_>(m, "ResponseMemory") + .def(py::init<>(&ResponseMemoryInterfaceProxy::init), + py::arg("count"), + py::arg("tensors") = std::map()) .def_readonly("count", &ResponseMemory::count) .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) .def("get_output_tensor", From 4b353ec6ea4dac37b4c2754a743990a31827e815 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 20 Jan 2023 11:22:48 -0800 Subject: [PATCH 02/85] Remove un-needed print statement --- morpheus/_lib/src/utilities/cupy_util.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index 9fea771a8b..22993219ae 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -54,7 +54,7 @@ pybind11::module_ CupyUtil::get_cp() return m; } -pybind11::object CupyUtil::tensor_to_cupy(const TensorObject &tensor) +pybind11::object CupyUtil::tensor_to_cupy(const TensorObject& tensor) { // These steps follow the cupy._convert_object_with_cuda_array_interface function shown here: // https://github.com/cupy/cupy/blob/a5b24f91d4d77fa03e6a4dd2ac954ff9a04e21f4/cupy/core/core.pyx#L2478-L2514 @@ -72,12 +72,12 @@ pybind11::object CupyUtil::tensor_to_cupy(const TensorObject &tensor) pybind11::list shape_list; pybind11::list stride_list; - for (auto &idx : tensor.get_shape()) + for (auto& idx : tensor.get_shape()) { shape_list.append(idx); } - for (auto &idx : tensor.get_stride()) + for (auto& idx : tensor.get_stride()) { stride_list.append(idx * tensor.dtype_size()); } @@ -99,7 +99,7 @@ TensorObject CupyUtil::cupy_to_tensor(pybind11::object cupy_array) pybind11::tuple shape_tup = arr_interface["shape"]; - pybind11::print(shape_tup); + // pybind11::print(shape_tup); auto shape = shape_tup.cast>(); auto typestr = arr_interface["typestr"].cast(); @@ -121,7 +121,7 @@ TensorObject CupyUtil::cupy_to_tensor(pybind11::object cupy_array) auto size = cupy_array.attr("data").attr("mem").attr("size").cast(); auto tensor = - Tensor::create(std::make_shared((void const *)data_ptr, size, rmm::cuda_stream_per_thread), + Tensor::create(std::make_shared((void const*)data_ptr, size, rmm::cuda_stream_per_thread), DType::from_numpy(typestr), shape, strides, From ca9f5a276708612a827b87f99c10f03694b67d32 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 20 Jan 2023 11:45:20 -0800 Subject: [PATCH 03/85] Update tests --- tests/test_messages.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_messages.py b/tests/test_messages.py index 73e9c92777..f36beec3b0 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -22,6 +22,7 @@ import morpheus._lib.messages as _messages import morpheus.config from morpheus import messages +from morpheus.messages import tensor_memory def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, no_cpp_class: bool, args: tuple): @@ -49,10 +50,8 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): check_message(messages.MultiMessage, _messages.MultiMessage, should_be_cpp, no_cpp_class, (None, 0, 1)) - assert messages.InferenceMemory._cpp_class is None if no_cpp_class else _messages.InferenceMemory - # C++ impl for InferenceMemory doesn't have a constructor - if (should_be_cpp): - pytest.raises(TypeError, messages.InferenceMemory, 1) + check_message(tensor_memory.TensorMemory, _messages.TensorMemory, should_be_cpp, no_cpp_class, (1, )) + check_message(messages.InferenceMemory, _messages.InferenceMemory, should_be_cpp, no_cpp_class, (1, )) cp_array = cp.zeros((1, 2)) @@ -84,10 +83,7 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): should_be_cpp, no_cpp_class, (None, 0, 1, None, 0, 1)) - assert messages.ResponseMemory._cpp_class is None if no_cpp_class else _messages.ResponseMemory - # C++ impl doesn't have a constructor - if (should_be_cpp): - pytest.raises(TypeError, messages.ResponseMemory, 1) + check_message(messages.ResponseMemory, _messages.ResponseMemory, should_be_cpp, no_cpp_class, (1, )) check_message(messages.ResponseMemoryProbs, _messages.ResponseMemoryProbs, From e2022bce627d8be8f4a389545d4217a43cc77060 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 20 Jan 2023 14:26:47 -0800 Subject: [PATCH 04/85] add a tensors property --- .../messages/memory/tensor_memory.hpp | 5 ++++ .../messages/memory/inference_memory_nlp.cpp | 22 +++++++------- .../src/messages/memory/tensor_memory.cpp | 30 ++++++++++++++++--- morpheus/_lib/src/python_modules/messages.cpp | 3 +- 4 files changed, 44 insertions(+), 16 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 4dac8398d9..d93b3e6c1e 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -111,6 +111,9 @@ struct TensorMemoryInterfaceProxy */ static std::size_t get_count(TensorMemory& self); + static pybind11::object get_tensors(TensorMemory& self); + static void set_tensors(TensorMemory& self, std::map tensors); + /** * @brief * @@ -118,6 +121,8 @@ struct TensorMemoryInterfaceProxy * @return TensorMemory::tensor_map_t */ static TensorMemory::tensor_map_t cupy_to_tensors(const std::map& cupy_tensors); + + static pybind11::object tensors_to_cupy(const TensorMemory::tensor_map_t& tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp index 1d8f6511ff..031a60fe0f 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp @@ -19,7 +19,7 @@ #include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/memory/tensor_memory.hpp" -#include "morpheus/utilities/cupy_util.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include // for size_type #include @@ -43,7 +43,7 @@ InferenceMemoryNLP::InferenceMemoryNLP(std::size_t count, this->tensors["seq_ids"] = std::move(seq_ids); } -const TensorObject &InferenceMemoryNLP::get_input_ids() const +const TensorObject& InferenceMemoryNLP::get_input_ids() const { auto found = this->tensors.find("input_ids"); if (found == this->tensors.end()) @@ -59,7 +59,7 @@ void InferenceMemoryNLP::set_input_ids(TensorObject input_ids) this->tensors["input_ids"] = std::move(input_ids); } -const TensorObject &InferenceMemoryNLP::get_input_mask() const +const TensorObject& InferenceMemoryNLP::get_input_mask() const { auto found = this->tensors.find("input_mask"); if (found == this->tensors.end()) @@ -75,7 +75,7 @@ void InferenceMemoryNLP::set_input_mask(TensorObject input_mask) this->tensors["input_mask"] = std::move(input_mask); } -const TensorObject &InferenceMemoryNLP::get_seq_ids() const +const TensorObject& InferenceMemoryNLP::get_seq_ids() const { auto found = this->tensors.find("seq_ids"); if (found == this->tensors.end()) @@ -104,37 +104,37 @@ std::shared_ptr InferenceMemoryNLPInterfaceProxy::init(cudf: std::move(CupyUtil::cupy_to_tensor(seq_ids))); } -std::size_t InferenceMemoryNLPInterfaceProxy::count(InferenceMemoryNLP &self) +std::size_t InferenceMemoryNLPInterfaceProxy::count(InferenceMemoryNLP& self) { return self.count; } -pybind11::object InferenceMemoryNLPInterfaceProxy::get_input_ids(InferenceMemoryNLP &self) +pybind11::object InferenceMemoryNLPInterfaceProxy::get_input_ids(InferenceMemoryNLP& self) { return CupyUtil::tensor_to_cupy(self.get_input_ids()); } -void InferenceMemoryNLPInterfaceProxy::set_input_ids(InferenceMemoryNLP &self, pybind11::object cupy_values) +void InferenceMemoryNLPInterfaceProxy::set_input_ids(InferenceMemoryNLP& self, pybind11::object cupy_values) { self.set_input_ids(CupyUtil::cupy_to_tensor(cupy_values)); } -pybind11::object InferenceMemoryNLPInterfaceProxy::get_input_mask(InferenceMemoryNLP &self) +pybind11::object InferenceMemoryNLPInterfaceProxy::get_input_mask(InferenceMemoryNLP& self) { return CupyUtil::tensor_to_cupy(self.get_input_mask()); } -void InferenceMemoryNLPInterfaceProxy::set_input_mask(InferenceMemoryNLP &self, pybind11::object cupy_values) +void InferenceMemoryNLPInterfaceProxy::set_input_mask(InferenceMemoryNLP& self, pybind11::object cupy_values) { return self.set_input_mask(CupyUtil::cupy_to_tensor(cupy_values)); } -pybind11::object InferenceMemoryNLPInterfaceProxy::get_seq_ids(InferenceMemoryNLP &self) +pybind11::object InferenceMemoryNLPInterfaceProxy::get_seq_ids(InferenceMemoryNLP& self) { return CupyUtil::tensor_to_cupy(self.get_seq_ids()); } -void InferenceMemoryNLPInterfaceProxy::set_seq_ids(InferenceMemoryNLP &self, pybind11::object cupy_values) +void InferenceMemoryNLPInterfaceProxy::set_seq_ids(InferenceMemoryNLP& self, pybind11::object cupy_values) { return self.set_seq_ids(CupyUtil::cupy_to_tensor(cupy_values)); } diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index f9705ba611..0a9eea54bb 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -17,7 +17,7 @@ #include "morpheus/messages/memory/tensor_memory.hpp" -#include "morpheus/utilities/cupy_util.hpp" // for cupy_to_tensor +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include #include @@ -46,8 +46,9 @@ TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( } /****** TensorMemoryInterfaceProxy *************************/ +namespace py = pybind11; std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, - std::map tensors) + std::map tensors) { return std::make_shared(count, std::move(cupy_to_tensors(tensors))); } @@ -57,11 +58,21 @@ std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) return self.count; } +py::object TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) +{ + return tensors_to_cupy(self.tensors); +} + +void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, std::map tensors) +{ + self.tensors = std::move(cupy_to_tensors(tensors)); +} + TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors( - const std::map& cupy_tensors) + const std::map& cupy_tensors) { TensorMemory::tensor_map_t tensors; - for (const auto tensor : cupy_tensors) + for (const auto& tensor : cupy_tensors) { tensors[tensor.first] = std::move(CupyUtil::cupy_to_tensor(tensor.second)); } @@ -69,4 +80,15 @@ TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors( return tensors; } +py::object TensorMemoryInterfaceProxy::tensors_to_cupy(const TensorMemory::tensor_map_t& tensors) +{ + auto cupy_tensors = py::dict(); + for (const auto& tensor : tensors) + { + cupy_tensors[py::str(tensor.first)] = std::move(CupyUtil::tensor_to_cupy(tensor.second)); + } + + return cupy_tensors; +} + } // namespace morpheus diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index c026b320d9..b6ec4bd972 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -226,7 +226,8 @@ PYBIND11_MODULE(messages, m) .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = std::map()) - .def_readonly("count", &TensorMemory::count); + .def_readonly("count", &TensorMemory::count) + .def_property("tensors", &TensorMemoryInterfaceProxy::get_tensors, &TensorMemoryInterfaceProxy::set_tensors); py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), From 26cb7bff940b3546a670367f307ff36ca09f6df1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 09:04:33 -0800 Subject: [PATCH 05/85] Use a type alias --- .../morpheus/messages/memory/tensor_memory.hpp | 12 +++++++----- .../_lib/src/messages/memory/tensor_memory.cpp | 17 ++++++++--------- morpheus/_lib/src/python_modules/messages.cpp | 6 +++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index d93b3e6c1e..10d642b46c 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -93,6 +93,8 @@ class TensorMemory */ struct TensorMemoryInterfaceProxy { + using py_tensor_map_t = std::map; + /** * @brief Create and initialize a TensorMemory object, and return a shared pointer to the result. Each array in * `tensors` should be of length `count`. @@ -101,7 +103,7 @@ struct TensorMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, std::map tensors); + static std::shared_ptr init(std::size_t count, py_tensor_map_t tensors); /** * @brief Get the count object @@ -111,8 +113,8 @@ struct TensorMemoryInterfaceProxy */ static std::size_t get_count(TensorMemory& self); - static pybind11::object get_tensors(TensorMemory& self); - static void set_tensors(TensorMemory& self, std::map tensors); + static py_tensor_map_t get_tensors(TensorMemory& self); + static void set_tensors(TensorMemory& self, py_tensor_map_t tensors); /** * @brief @@ -120,9 +122,9 @@ struct TensorMemoryInterfaceProxy * @param cupy_tensors * @return TensorMemory::tensor_map_t */ - static TensorMemory::tensor_map_t cupy_to_tensors(const std::map& cupy_tensors); + static TensorMemory::tensor_map_t cupy_to_tensors(const py_tensor_map_t& cupy_tensors); - static pybind11::object tensors_to_cupy(const TensorMemory::tensor_map_t& tensors); + static py_tensor_map_t tensors_to_cupy(const TensorMemory::tensor_map_t& tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 0a9eea54bb..04f714ab76 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -47,8 +47,7 @@ TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( /****** TensorMemoryInterfaceProxy *************************/ namespace py = pybind11; -std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, - std::map tensors) +std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, py_tensor_map_t tensors) { return std::make_shared(count, std::move(cupy_to_tensors(tensors))); } @@ -58,18 +57,17 @@ std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) return self.count; } -py::object TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) +TensorMemoryInterfaceProxy::py_tensor_map_t TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) { return tensors_to_cupy(self.tensors); } -void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, std::map tensors) +void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, py_tensor_map_t tensors) { self.tensors = std::move(cupy_to_tensors(tensors)); } -TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors( - const std::map& cupy_tensors) +TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) { TensorMemory::tensor_map_t tensors; for (const auto& tensor : cupy_tensors) @@ -80,12 +78,13 @@ TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors( return tensors; } -py::object TensorMemoryInterfaceProxy::tensors_to_cupy(const TensorMemory::tensor_map_t& tensors) +TensorMemoryInterfaceProxy::py_tensor_map_t TensorMemoryInterfaceProxy::tensors_to_cupy( + const TensorMemory::tensor_map_t& tensors) { - auto cupy_tensors = py::dict(); + py_tensor_map_t cupy_tensors; for (const auto& tensor : tensors) { - cupy_tensors[py::str(tensor.first)] = std::move(CupyUtil::tensor_to_cupy(tensor.second)); + cupy_tensors[tensor.first] = std::move(CupyUtil::tensor_to_cupy(tensor.second)); } return cupy_tensors; diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index b6ec4bd972..7eaba0b823 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -147,7 +147,7 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "InferenceMemory") .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = std::map()) + py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count); py::class_>(m, "InferenceMemoryNLP") @@ -225,14 +225,14 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "TensorMemory") .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = std::map()) + py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) .def_property("tensors", &TensorMemoryInterfaceProxy::get_tensors, &TensorMemoryInterfaceProxy::set_tensors); py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = std::map()) + py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) .def("get_output_tensor", From 00815f6c88dde3d0afbb299104ce660999224e29 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 10:46:15 -0800 Subject: [PATCH 06/85] WIP --- .../messages/memory/inference_memory.hpp | 5 ++++- .../messages/memory/response_memory.hpp | 6 ++++-- .../src/messages/memory/inference_memory.cpp | 11 +++++++++++ .../src/messages/memory/response_memory.cpp | 19 ++++++++++++------- morpheus/_lib/src/python_modules/messages.cpp | 12 +++++++++++- 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index c5c41978a8..84dc8930d0 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -79,7 +79,7 @@ struct InferenceMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, std::map tensors); + static std::shared_ptr init(std::size_t count, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); /** * @brief Get the count object @@ -88,6 +88,9 @@ struct InferenceMemoryInterfaceProxy * @return std::size_t */ static std::size_t get_count(InferenceMemory& self); + + static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(TensorMemory& self); + static void set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index e7724f079d..0193a2b0c6 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -82,8 +82,7 @@ struct ResponseMemoryInterfaceProxy * @param cupy_tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, - std::map cupy_tensors); + static std::shared_ptr init(std::size_t count, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); /** * @brief Get the output object @@ -102,6 +101,9 @@ struct ResponseMemoryInterfaceProxy * @return TensorObject */ static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name); + + static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(TensorMemory& self); + static void set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 5643d3245b..50634cb59b 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -42,4 +42,15 @@ std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) { return self.count; } + +TensorMemoryInterfaceProxy::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(TensorMemory& self) +{ + return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); +} + +void InferenceMemoryInterfaceProxy::set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +{ + self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); +} + } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 23b0dd0e56..654c52d5c2 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -44,13 +44,7 @@ std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t c pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory& self, const std::string& name) { - // Directly return the tensor object - if (!self.has_tensor(name)) - { - throw pybind11::key_error(); - } - - return CupyUtil::tensor_to_cupy(self.tensors[name]); + return CupyUtil::tensor_to_cupy(get_output_tensor(self, name)); } TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& self, const std::string& name) @@ -63,4 +57,15 @@ TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& sel return self.tensors[name]; } + +TensorMemoryInterfaceProxy::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(TensorMemory& self) +{ + return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); +} + +void ResponseMemoryInterfaceProxy::set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +{ + self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); +} + } // namespace morpheus diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 7eaba0b823..487e2f059f 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -148,7 +148,9 @@ PYBIND11_MODULE(messages, m) .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) - .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count); + .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count) + .def_property( + "tensors", &InferenceMemoryInterfaceProxy::get_tensors, &InferenceMemoryInterfaceProxy::set_tensors); py::class_>(m, "InferenceMemoryNLP") .def(py::init<>(&InferenceMemoryNLPInterfaceProxy::init), @@ -222,6 +224,13 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("offset", &MultiInferenceFILMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiInferenceFILMessageInterfaceProxy::count); + // The tensors property has a limitation in that it always returns a copy so code like: + // >>> m.tensors['c'] = cp.zeros(count) + // won't have the intended outcome. However this will: + // >>> tensors = m.tensors + // >>> tensors['c'] = cp.zeros(count) + // >>> m.tensors = tensors + // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types py::class_>(m, "TensorMemory") .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), @@ -234,6 +243,7 @@ PYBIND11_MODULE(messages, m) py::arg("count"), py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) + .def_property("tensors", &ResponseMemoryInterfaceProxy::get_tensors, &ResponseMemoryInterfaceProxy::set_tensors) .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) .def("get_output_tensor", &ResponseMemoryInterfaceProxy::get_output_tensor, From f9a317c571389791cf54c412b095df3f802b85fe Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 16:23:16 -0800 Subject: [PATCH 07/85] Fix bad copy-paste --- .../include/morpheus/messages/memory/inference_memory.hpp | 7 ++++--- .../include/morpheus/messages/memory/response_memory.hpp | 4 ++-- morpheus/_lib/src/messages/memory/inference_memory.cpp | 5 +++-- morpheus/_lib/src/messages/memory/response_memory.cpp | 5 +++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index 84dc8930d0..4e0f9ae5c2 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -79,7 +79,8 @@ struct InferenceMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, + TensorMemoryInterfaceProxy::py_tensor_map_t tensors); /** * @brief Get the count object @@ -89,8 +90,8 @@ struct InferenceMemoryInterfaceProxy */ static std::size_t get_count(InferenceMemory& self); - static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(TensorMemory& self); - static void set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(InferenceMemory& self); + static void set_tensors(InferenceMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index 0193a2b0c6..cc4e94c0b7 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -102,8 +102,8 @@ struct ResponseMemoryInterfaceProxy */ static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name); - static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(TensorMemory& self); - static void set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(ResponseMemory& self); + static void set_tensors(ResponseMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 50634cb59b..2954dae6db 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -43,12 +43,13 @@ std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) return self.count; } -TensorMemoryInterfaceProxy::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(TensorMemory& self) +TensorMemoryInterfaceProxy::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(InferenceMemory& self) { return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); } -void InferenceMemoryInterfaceProxy::set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +void InferenceMemoryInterfaceProxy::set_tensors(InferenceMemory& self, + TensorMemoryInterfaceProxy::py_tensor_map_t tensors) { self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); } diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 654c52d5c2..4b42a6a363 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -58,12 +58,13 @@ TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& sel return self.tensors[name]; } -TensorMemoryInterfaceProxy::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(TensorMemory& self) +TensorMemoryInterfaceProxy::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(ResponseMemory& self) { return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); } -void ResponseMemoryInterfaceProxy::set_tensors(TensorMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +void ResponseMemoryInterfaceProxy::set_tensors(ResponseMemory& self, + TensorMemoryInterfaceProxy::py_tensor_map_t tensors) { self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); } From 91f58ca420d0fc45d4b2c90c0a573fb9436f1608 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 16:23:38 -0800 Subject: [PATCH 08/85] Set docstring on property method --- morpheus/_lib/src/python_modules/messages.cpp | 34 +++++++++++++------ 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 487e2f059f..9362e78a95 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -55,6 +55,17 @@ namespace morpheus { namespace fs = std::filesystem; namespace py = pybind11; +// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types +const char* TensorPropDocstring{R"pbdoc( + The tensors property has a limitation in that it always returns a copy. Resulting in code like: + >>> m.tensors['c'] = cp.zeros(count) + + Not having the intended outcome. Instead the following work-around can be performed: + >>> tensors = m.tensors + >>> tensors['c'] = cp.zeros(count) + >>> m.tensors = tensors +)pbdoc"}; + // Define the pybind11 module m, as 'pipeline'. PYBIND11_MODULE(messages, m) { @@ -149,8 +160,10 @@ PYBIND11_MODULE(messages, m) py::arg("count"), py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count) - .def_property( - "tensors", &InferenceMemoryInterfaceProxy::get_tensors, &InferenceMemoryInterfaceProxy::set_tensors); + .def_property("tensors", + &InferenceMemoryInterfaceProxy::get_tensors, + &InferenceMemoryInterfaceProxy::set_tensors, + TensorPropDocstring); py::class_>(m, "InferenceMemoryNLP") .def(py::init<>(&InferenceMemoryNLPInterfaceProxy::init), @@ -224,26 +237,25 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("offset", &MultiInferenceFILMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiInferenceFILMessageInterfaceProxy::count); - // The tensors property has a limitation in that it always returns a copy so code like: - // >>> m.tensors['c'] = cp.zeros(count) - // won't have the intended outcome. However this will: - // >>> tensors = m.tensors - // >>> tensors['c'] = cp.zeros(count) - // >>> m.tensors = tensors - // https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types py::class_>(m, "TensorMemory") .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) - .def_property("tensors", &TensorMemoryInterfaceProxy::get_tensors, &TensorMemoryInterfaceProxy::set_tensors); + .def_property("tensors", + &TensorMemoryInterfaceProxy::get_tensors, + &TensorMemoryInterfaceProxy::set_tensors, + TensorPropDocstring); py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) - .def_property("tensors", &ResponseMemoryInterfaceProxy::get_tensors, &ResponseMemoryInterfaceProxy::set_tensors) + .def_property("tensors", + &ResponseMemoryInterfaceProxy::get_tensors, + &ResponseMemoryInterfaceProxy::set_tensors, + TensorPropDocstring) .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) .def("get_output_tensor", &ResponseMemoryInterfaceProxy::get_output_tensor, From 6d85a520fe9575eec674f8cc380df381f94a468b Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 17:23:59 -0800 Subject: [PATCH 09/85] Move cupy_to_tensors and tensors_to_cupy to CupyUtil --- .../messages/memory/inference_memory.hpp | 7 ++-- .../messages/memory/response_memory.hpp | 6 +-- .../messages/memory/tensor_memory.hpp | 18 ++++----- .../include/morpheus/utilities/cupy_util.hpp | 36 ++++++++++++++++-- .../src/messages/memory/inference_memory.cpp | 11 +++--- .../src/messages/memory/response_memory.cpp | 11 +++--- .../src/messages/memory/tensor_memory.cpp | 37 +++---------------- morpheus/_lib/src/python_modules/messages.cpp | 7 ++-- morpheus/_lib/src/utilities/cupy_util.cpp | 23 ++++++++++++ 9 files changed, 88 insertions(+), 68 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index 4e0f9ae5c2..8e7d6daf1e 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -79,8 +79,7 @@ struct InferenceMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, - TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); /** * @brief Get the count object @@ -90,8 +89,8 @@ struct InferenceMemoryInterfaceProxy */ static std::size_t get_count(InferenceMemory& self); - static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(InferenceMemory& self); - static void set_tensors(InferenceMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static CupyUtil::py_tensor_map_t get_tensors(InferenceMemory& self); + static void set_tensors(InferenceMemory& self, CupyUtil::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index cc4e94c0b7..537a9b140e 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -82,7 +82,7 @@ struct ResponseMemoryInterfaceProxy * @param cupy_tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); /** * @brief Get the output object @@ -102,8 +102,8 @@ struct ResponseMemoryInterfaceProxy */ static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name); - static TensorMemoryInterfaceProxy::py_tensor_map_t get_tensors(ResponseMemory& self); - static void set_tensors(ResponseMemory& self, TensorMemoryInterfaceProxy::py_tensor_map_t tensors); + static CupyUtil::py_tensor_map_t get_tensors(ResponseMemory& self); + static void set_tensors(ResponseMemory& self, CupyUtil::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 10d642b46c..071da3b480 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -18,6 +18,7 @@ #pragma once #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include // for object @@ -93,8 +94,6 @@ class TensorMemory */ struct TensorMemoryInterfaceProxy { - using py_tensor_map_t = std::map; - /** * @brief Create and initialize a TensorMemory object, and return a shared pointer to the result. Each array in * `tensors` should be of length `count`. @@ -103,7 +102,7 @@ struct TensorMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); /** * @brief Get the count object @@ -113,18 +112,15 @@ struct TensorMemoryInterfaceProxy */ static std::size_t get_count(TensorMemory& self); - static py_tensor_map_t get_tensors(TensorMemory& self); - static void set_tensors(TensorMemory& self, py_tensor_map_t tensors); - /** - * @brief + * @brief Get the tensors converted to CuPy arrays. Pybind11 will convert the std::map to a Python dict. * - * @param cupy_tensors - * @return TensorMemory::tensor_map_t + * @param self + * @return py_tensor_map_t */ - static TensorMemory::tensor_map_t cupy_to_tensors(const py_tensor_map_t& cupy_tensors); + static CupyUtil::py_tensor_map_t get_tensors(TensorMemory& self); - static py_tensor_map_t tensors_to_cupy(const TensorMemory::tensor_map_t& tensors); + static void set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index 3915049d3a..53254ac9ac 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -22,6 +22,8 @@ #include #include +#include + namespace morpheus { /****** Component public implementations *******************/ /****** CupyUtil****************************************/ @@ -37,22 +39,48 @@ namespace morpheus { */ struct CupyUtil { + using py_tensor_map_t = std::map; + static pybind11::object cp_module; // handle to cupy module /** - * TODO(Documentation) + * @brief Import and return the cupy module. Requires GIL to have already been aqcuired. + * + * @return pybind11::module_ */ static pybind11::module_ get_cp(); /** - * TODO(Documentation) + * @brief Convert a TensorObject to a CuPy array. Requires GIL to have already been aqcuired. + * + * @param tensor + * @return pybind11::object */ - static pybind11::object tensor_to_cupy(const TensorObject &tensor); + static pybind11::object tensor_to_cupy(const TensorObject& tensor); /** - * TODO(Documentation) + * @brief Convert a CuPy array into a TensorObject. Requires GIL to have already been aqcuired. + * + * @param cupy_array + * @return TensorObject */ static TensorObject cupy_to_tensor(pybind11::object cupy_array); + + /** + * @brief Convert a map of CuPy arrays into a map of TensorObjects. Requires GIL to have already been aqcuired. + * + * @param cupy_tensors + * @return tensor_map_t + */ + static std::map cupy_to_tensors(const py_tensor_map_t& cupy_tensors); + + /** + * @brief Convert a map of TensorObjects into a map of CuPy arrays. Requires GIL to have already been aqcuired. + * + * @param tensors + * @return py_tensor_map_t + */ + static py_tensor_map_t tensors_to_cupy(const std::map& tensors); }; /** @} */ // end of group } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 2954dae6db..7041e8db22 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -35,7 +35,7 @@ bool InferenceMemory::has_input(const std::string& name) const std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t count, std::map tensors) { - return std::make_shared(count, std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors))); + return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) @@ -43,15 +43,14 @@ std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) return self.count; } -TensorMemoryInterfaceProxy::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(InferenceMemory& self) +CupyUtil::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(InferenceMemory& self) { - return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); + return CupyUtil::tensors_to_cupy(self.tensors); } -void InferenceMemoryInterfaceProxy::set_tensors(InferenceMemory& self, - TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +void InferenceMemoryInterfaceProxy::set_tensors(InferenceMemory& self, CupyUtil::py_tensor_map_t tensors) { - self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); + self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 4b42a6a363..a98acabd04 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -39,7 +39,7 @@ bool ResponseMemory::has_output(const std::string& name) const std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, std::map tensors) { - return std::make_shared(count, std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors))); + return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory& self, const std::string& name) @@ -58,15 +58,14 @@ TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& sel return self.tensors[name]; } -TensorMemoryInterfaceProxy::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(ResponseMemory& self) +CupyUtil::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(ResponseMemory& self) { - return TensorMemoryInterfaceProxy::tensors_to_cupy(self.tensors); + return CupyUtil::tensors_to_cupy(self.tensors); } -void ResponseMemoryInterfaceProxy::set_tensors(ResponseMemory& self, - TensorMemoryInterfaceProxy::py_tensor_map_t tensors) +void ResponseMemoryInterfaceProxy::set_tensors(ResponseMemory& self, CupyUtil::py_tensor_map_t tensors) { - self.tensors = std::move(TensorMemoryInterfaceProxy::cupy_to_tensors(tensors)); + self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 04f714ab76..c01f047d83 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -17,8 +17,6 @@ #include "morpheus/messages/memory/tensor_memory.hpp" -#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil - #include #include @@ -47,9 +45,9 @@ TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( /****** TensorMemoryInterfaceProxy *************************/ namespace py = pybind11; -std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, py_tensor_map_t tensors) +std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, CupyUtil::py_tensor_map_t tensors) { - return std::make_shared(count, std::move(cupy_to_tensors(tensors))); + return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) @@ -57,37 +55,14 @@ std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) return self.count; } -TensorMemoryInterfaceProxy::py_tensor_map_t TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) -{ - return tensors_to_cupy(self.tensors); -} - -void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, py_tensor_map_t tensors) -{ - self.tensors = std::move(cupy_to_tensors(tensors)); -} - -TensorMemory::tensor_map_t TensorMemoryInterfaceProxy::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) +CupyUtil::py_tensor_map_t TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) { - TensorMemory::tensor_map_t tensors; - for (const auto& tensor : cupy_tensors) - { - tensors[tensor.first] = std::move(CupyUtil::cupy_to_tensor(tensor.second)); - } - - return tensors; + return CupyUtil::tensors_to_cupy(self.tensors); } -TensorMemoryInterfaceProxy::py_tensor_map_t TensorMemoryInterfaceProxy::tensors_to_cupy( - const TensorMemory::tensor_map_t& tensors) +void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors) { - py_tensor_map_t cupy_tensors; - for (const auto& tensor : tensors) - { - cupy_tensors[tensor.first] = std::move(CupyUtil::tensor_to_cupy(tensor.second)); - } - - return cupy_tensors; + self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); } } // namespace morpheus diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 9362e78a95..5a9feaea04 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -30,6 +30,7 @@ #include "morpheus/messages/multi_response_probs.hpp" #include "morpheus/objects/data_table.hpp" #include "morpheus/utilities/cudf_util.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include // for Status #include @@ -158,7 +159,7 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "InferenceMemory") .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) + py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count) .def_property("tensors", &InferenceMemoryInterfaceProxy::get_tensors, @@ -240,7 +241,7 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "TensorMemory") .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) + py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) .def_property("tensors", &TensorMemoryInterfaceProxy::get_tensors, @@ -250,7 +251,7 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), - py::arg("tensors") = TensorMemoryInterfaceProxy::py_tensor_map_t()) + py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) .def_property("tensors", &ResponseMemoryInterfaceProxy::get_tensors, diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index 22993219ae..426009aa93 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -35,6 +35,7 @@ #include // for uintptr_t #include // for make_shared #include // for string +#include // for move #include // for vector namespace morpheus { @@ -129,4 +130,26 @@ TensorObject CupyUtil::cupy_to_tensor(pybind11::object cupy_array) return tensor; } + +std::map CupyUtil::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) +{ + std::map tensors; + for (const auto& tensor : cupy_tensors) + { + tensors[tensor.first] = std::move(cupy_to_tensor(tensor.second)); + } + + return tensors; +} + +CupyUtil::py_tensor_map_t CupyUtil::tensors_to_cupy(const std::map& tensors) +{ + py_tensor_map_t cupy_tensors; + for (const auto& tensor : tensors) + { + cupy_tensors[tensor.first] = std::move(tensor_to_cupy(tensor.second)); + } + + return cupy_tensors; +} } // namespace morpheus From 648ca0a400929948b6e6b0f9118c54df0d70daa6 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 17:44:48 -0800 Subject: [PATCH 10/85] wip - not working --- morpheus/messages/multi_inference_message.py | 25 ++++++++++---------- morpheus/messages/multi_response_message.py | 12 +++++----- morpheus/messages/tensor_memory.py | 2 +- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 1b5f4a1b61..101bce9d73 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -78,7 +78,7 @@ def set_input(instance, name: str, value): instance.tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP): """ This is a container class for data that needs to be submitted to the inference server for NLP category @@ -99,13 +99,14 @@ class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __post_init__(self, input_ids, input_mask, seq_ids): - self.input_ids = input_ids - self.input_mask = input_mask - self.seq_ids = seq_ids + def __init__(self, input_ids, input_mask, seq_ids): + super().__init__(count=len(input_ids), + tensors={ + 'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids + }) -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL): """ This is a container class for data that needs to be submitted to the inference server for FIL category @@ -123,12 +124,11 @@ class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __post_init__(self, input__0, seq_ids): - self.input__0 = input__0 - self.seq_ids = seq_ids + def __init__(self, input__0, seq_ids): + super().__init__(count=len(input__0), tensors={'input__0': input__0, 'seq_ids': seq_ids}) -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class InferenceMemoryAE(InferenceMemory, cpp_class=None): """ This is a container class for data that needs to be submitted to the inference server for FIL category @@ -146,9 +146,8 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): input: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __post_init__(self, input, seq_ids): - self.input = input - self.seq_ids = seq_ids + def __init__(self, input__0, seq_ids): + super().__init__(count=len(input__0), tensors={'input__0': input__0, 'seq_ids': seq_ids}) @dataclasses.dataclass diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index be8008b827..2b1d83889d 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -95,7 +95,7 @@ def get_output(self, name: str): return self.tensors[name] -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProbs): """ Subclass of `ResponseMemory` containng an output tensor named 'probs'. @@ -107,11 +107,11 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_output, set_output) - def __post_init__(self, probs): - self.probs = probs + def __init__(self, probs): + super().__init__(count=len(probs), tensors={'probs': probs}) -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class ResponseMemoryAE(ResponseMemory, cpp_class=None): """ Subclass of `ResponseMemory` specific to the AutoEncoder pipeline. @@ -132,8 +132,8 @@ class ResponseMemoryAE(ResponseMemory, cpp_class=None): user_id = "" explain_df = None - def __post_init__(self, probs): - self.probs = probs + def __init__(self, probs): + super().__init__(count=len(probs), tensors={'probs': probs}) @dataclasses.dataclass diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 8049380b9f..523433367b 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -38,4 +38,4 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ count: int - tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict, init=False) + tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict) From 158e7e76a3770758513bef5cea3c4a9f944c8618 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 27 Jan 2023 17:56:41 -0800 Subject: [PATCH 11/85] wip --- morpheus/messages/multi_inference_message.py | 15 ++++++--------- morpheus/messages/multi_response_message.py | 8 ++++---- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 101bce9d73..2858b702a0 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -99,11 +99,8 @@ class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __init__(self, input_ids, input_mask, seq_ids): - super().__init__(count=len(input_ids), - tensors={ - 'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids - }) + def __init__(self, count, input_ids, input_mask, seq_ids): + super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) @dataclasses.dataclass(init=False) @@ -124,8 +121,8 @@ class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __init__(self, input__0, seq_ids): - super().__init__(count=len(input__0), tensors={'input__0': input__0, 'seq_ids': seq_ids}) + def __init__(self, count, input__0, seq_ids): + super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) @dataclasses.dataclass(init=False) @@ -146,8 +143,8 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): input: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __init__(self, input__0, seq_ids): - super().__init__(count=len(input__0), tensors={'input__0': input__0, 'seq_ids': seq_ids}) + def __init__(self, count, input__0, seq_ids): + super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) @dataclasses.dataclass diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 2b1d83889d..9b121bae68 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -107,8 +107,8 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_output, set_output) - def __init__(self, probs): - super().__init__(count=len(probs), tensors={'probs': probs}) + def __init__(self, count, probs): + super().__init__(count, tensors={'probs': probs}) @dataclasses.dataclass(init=False) @@ -132,8 +132,8 @@ class ResponseMemoryAE(ResponseMemory, cpp_class=None): user_id = "" explain_df = None - def __init__(self, probs): - super().__init__(count=len(probs), tensors={'probs': probs}) + def __init__(self, count, probs): + super().__init__(count, tensors={'probs': probs}) @dataclasses.dataclass From 7341d022ca7d469477e4acc050b459ebcbecdeec Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 07:55:46 -0800 Subject: [PATCH 12/85] Fix constructor args --- morpheus/messages/multi_inference_message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 2858b702a0..1fc040102e 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -143,8 +143,8 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): input: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - def __init__(self, count, input__0, seq_ids): - super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) + def __init__(self, count, input, seq_ids): + super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) @dataclasses.dataclass From 392eb44b4ecbf23b7a3648a98c9ed6713305bd3c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 09:25:15 -0800 Subject: [PATCH 13/85] tests wip --- tests/test_tensor_memory.py | 65 +++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 tests/test_tensor_memory.py diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py new file mode 100644 index 0000000000..482c103c8e --- /dev/null +++ b/tests/test_tensor_memory.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import string + +import cupy as cp +import numpy as np +import pytest + +from morpheus._lib.common import FileTypes +from morpheus.messages import InferenceMemory +from morpheus.messages import ResponseMemory +from morpheus.messages.tensor_memory import TensorMemory +from utils import TEST_DIRS + + +def check_tensor_memory(cls, count, tensors): + other_tensors = {'ones': cp.ones(count), 'zeros': cp.zeros(count)} + + m = cls(count) + assert m.count == count + assert m.tensors == {} + + m.tensors = tensors + assert m.tensors == tensors + + m.tensors = other_tensors + assert m.tensors == other_tensors + + m = cls(count, tensors) + assert m.count == count + assert m.tensors == tensors + + m.tensors = other_tensors + assert m.tensors == other_tensors + + +def test_copy_ranges(config, df_type): + input_file = os.path.join(TEST_DIRS.tests_data_dir, 'filter_probs.csv') + test_data = cp.array(np.loadtxt(input_file, delimiter=",", skiprows=1)) + + # TensorMemory expects a dictionary of { : } + # Convert each column into a 1d cupy array + tensors = {} + for col in range(test_data.shape[1]): + tensors[string.ascii_lowercase[col]] = test_data[:, col] + + count = test_data.shape[0] + + for cls in (TensorMemory, InferenceMemory, ResponseMemory): + check_tensor_memory(cls, count, tensors) From 952cfebb68c190a466985e42cb9b6b1498a8ed12 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 10:43:38 -0800 Subject: [PATCH 14/85] wip --- morpheus/messages/multi_inference_message.py | 4 +- tests/test_tensor_memory.py | 74 ++++++++++++++++---- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 1fc040102e..805a7fff5d 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -128,12 +128,12 @@ def __init__(self, count, input__0, seq_ids): @dataclasses.dataclass(init=False) class InferenceMemoryAE(InferenceMemory, cpp_class=None): """ - This is a container class for data that needs to be submitted to the inference server for FIL category + This is a container class for data that needs to be submitted to the inference server for auto encoder usecases. Parameters ---------- - input__0 : cupy.ndarray + input : cupy.ndarray Inference input. seq_ids : cupy.ndarray Ids used to index from an inference input to a message. Necessary since there can be more inference diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index 482c103c8e..1281b946b4 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -22,11 +22,23 @@ import pytest from morpheus._lib.common import FileTypes -from morpheus.messages import InferenceMemory -from morpheus.messages import ResponseMemory +from morpheus.messages.multi_inference_message import InferenceMemory +from morpheus.messages.multi_inference_message import InferenceMemoryAE +from morpheus.messages.multi_inference_message import InferenceMemoryFIL +from morpheus.messages.multi_inference_message import InferenceMemoryNLP +from morpheus.messages.multi_response_message import ResponseMemory +from morpheus.messages.multi_response_message import ResponseMemoryProbs from morpheus.messages.tensor_memory import TensorMemory from utils import TEST_DIRS +INPUT_FILE = os.path.join(TEST_DIRS.tests_data_dir, 'filter_probs.csv') + + +def compare_tensors(t1, t2): + assert sorted(t1.keys()) == sorted(t2.keys()) + for (k, v1) in t1.items(): + assert (v1 == t2[k]).all() + def check_tensor_memory(cls, count, tensors): other_tensors = {'ones': cp.ones(count), 'zeros': cp.zeros(count)} @@ -36,30 +48,68 @@ def check_tensor_memory(cls, count, tensors): assert m.tensors == {} m.tensors = tensors - assert m.tensors == tensors + compare_tensors(m.tensors, tensors) m.tensors = other_tensors - assert m.tensors == other_tensors + compare_tensors(m.tensors, other_tensors) m = cls(count, tensors) assert m.count == count - assert m.tensors == tensors + compare_tensors(m.tensors, tensors) m.tensors = other_tensors - assert m.tensors == other_tensors + compare_tensors(m.tensors, other_tensors) -def test_copy_ranges(config, df_type): - input_file = os.path.join(TEST_DIRS.tests_data_dir, 'filter_probs.csv') - test_data = cp.array(np.loadtxt(input_file, delimiter=",", skiprows=1)) +def test_tensor_memory(config): + test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) + count = test_data.shape[0] # TensorMemory expects a dictionary of { : } # Convert each column into a 1d cupy array tensors = {} for col in range(test_data.shape[1]): - tensors[string.ascii_lowercase[col]] = test_data[:, col] - - count = test_data.shape[0] + tensors[string.ascii_lowercase[col]] = cp.array(test_data[:, col]) for cls in (TensorMemory, InferenceMemory, ResponseMemory): check_tensor_memory(cls, count, tensors) + + +def test_inference_memory_fil(config): + test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) + count = test_data.shape[0] + + input_0 = cp.array(test_data[:, 0]) + seq_ids = cp.array(test_data[:, 1]) + m = InferenceMemoryFIL(count, input__0=input_0, seq_ids=seq_ids) + + assert m.count == count + compare_tensors(m.tensors, {'input__0': input_0, 'seq_ids': seq_ids}) + assert (m.input__0 == input_0).all() + assert (m.seq_ids == seq_ids).all() + + +def test_inference_memory_nlp(config): + test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) + count = test_data.shape[0] + + input_ids = cp.array(test_data[:, 0]) + input_mask = cp.array(test_data[:, 1]) + seq_ids = cp.array(test_data[:, 2]) + m = InferenceMemoryNLP(count, input_ids=input_ids, input_mask=input_mask, seq_ids=seq_ids) + + assert m.count == count + compare_tensors(m.tensors, {'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) + assert (m.input_ids == input_ids).all() + assert (m.inpinput_maskut_ids == input_mask).all() + assert (m.seq_ids == seq_ids).all() + + +def test_response_memory_probs(config): + test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) + count = test_data.shape[0] + + m = ResponseMemoryProbs(count=count, probs=test_data) + assert m.count == count + compare_tensors(m.tensors, {'probs': test_data}) + assert (m.probs == test_data).all() From b443ac0c5b78631b490e11e2d04167a76a860840 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:01:18 -0800 Subject: [PATCH 15/85] Finish tests --- tests/test_tensor_memory.py | 43 ++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index 1281b946b4..512844e1a4 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -22,11 +22,13 @@ import pytest from morpheus._lib.common import FileTypes +from morpheus.io.deserializers import read_file_to_df from morpheus.messages.multi_inference_message import InferenceMemory from morpheus.messages.multi_inference_message import InferenceMemoryAE from morpheus.messages.multi_inference_message import InferenceMemoryFIL from morpheus.messages.multi_inference_message import InferenceMemoryNLP from morpheus.messages.multi_response_message import ResponseMemory +from morpheus.messages.multi_response_message import ResponseMemoryAE from morpheus.messages.multi_response_message import ResponseMemoryProbs from morpheus.messages.tensor_memory import TensorMemory from utils import TEST_DIRS @@ -75,6 +77,21 @@ def test_tensor_memory(config): check_tensor_memory(cls, count, tensors) +@pytest.mark.use_python +def test_inference_memory_ae(config): + test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) + count = test_data.shape[0] + + input = cp.array(test_data[:, 0]) + seq_ids = cp.array(test_data[:, 1]) + m = InferenceMemoryAE(count, input=input, seq_ids=seq_ids) + + assert m.count == count + compare_tensors(m.tensors, {'input': input, 'seq_ids': seq_ids}) + assert (m.input == input).all() + assert (m.seq_ids == seq_ids).all() + + def test_inference_memory_fil(config): test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) count = test_data.shape[0] @@ -101,15 +118,35 @@ def test_inference_memory_nlp(config): assert m.count == count compare_tensors(m.tensors, {'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) assert (m.input_ids == input_ids).all() - assert (m.inpinput_maskut_ids == input_mask).all() + assert (m.input_mask == input_mask).all() assert (m.seq_ids == seq_ids).all() -def test_response_memory_probs(config): +def check_response_memory_probs_and_ae(cls): test_data = cp.array(np.loadtxt(INPUT_FILE, delimiter=",", skiprows=1)) count = test_data.shape[0] - m = ResponseMemoryProbs(count=count, probs=test_data) + m = cls(count=count, probs=test_data) assert m.count == count compare_tensors(m.tensors, {'probs': test_data}) assert (m.probs == test_data).all() + return m + + +@pytest.mark.use_python +def test_response_memory_ae(config): + m = check_response_memory_probs_and_ae(ResponseMemoryAE) + + assert m.user_id == "" + assert m.explain_df is None + + df = read_file_to_df(INPUT_FILE, file_type=FileTypes.Auto, df_type='pandas') + m.user_id = "testy" + m.explain_df = df + + assert m.user_id == "testy" + assert (m.explain_df.values == df.values).all() + + +def test_response_memory_probs(config): + check_response_memory_probs_and_ae(ResponseMemoryProbs) From 9a6d1cd96db27e00c2b1def5dbe00793d031ec09 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:19:46 -0800 Subject: [PATCH 16/85] Ensure popen is defined --- tests/conftest.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 9ca54369e0..b2a9a9f3d4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -377,6 +377,7 @@ def _camouflage_is_running(): # Actually launch camoflague if launch_camouflage: + popen = None try: popen = subprocess.Popen(["camouflage", "--config", "config.yml"], cwd=root_dir, @@ -402,20 +403,20 @@ def _camouflage_is_running(): logger.exception("Error launching camouflage") raise finally: - - logger.info("Killing camouflage with pid {}".format(popen.pid)) - - elapsed_time = 0.0 - sleep_time = 0.1 - stopped = False - - # It takes a little while to shutdown - while not stopped and elapsed_time < shutdown_timeout: - popen.kill() - stopped = (popen.poll() is not None) - if not stopped: - time.sleep(sleep_time) - elapsed_time += sleep_time + if popen is not None: + logger.info("Killing camouflage with pid {}".format(popen.pid)) + + elapsed_time = 0.0 + sleep_time = 0.1 + stopped = False + + # It takes a little while to shutdown + while not stopped and elapsed_time < shutdown_timeout: + popen.kill() + stopped = (popen.poll() is not None) + if not stopped: + time.sleep(sleep_time) + elapsed_time += sleep_time else: From 87b63c33c4407d51ca56f639c3f15d88b521b7ee Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:23:44 -0800 Subject: [PATCH 17/85] Move tensor_map_t --- .../morpheus/messages/memory/inference_memory.hpp | 2 +- .../morpheus/messages/memory/response_memory.hpp | 2 +- .../messages/memory/response_memory_probs.hpp | 10 +++++----- .../morpheus/messages/memory/tensor_memory.hpp | 12 +++++------- .../_lib/include/morpheus/utilities/cupy_util.hpp | 1 + .../_lib/src/messages/memory/inference_memory.cpp | 4 +++- .../_lib/src/messages/memory/response_memory.cpp | 3 ++- .../src/messages/memory/response_memory_probs.cpp | 10 +++++----- morpheus/_lib/src/messages/memory/tensor_memory.cpp | 9 +++++---- 9 files changed, 28 insertions(+), 25 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index 8e7d6daf1e..1ccdf33fb7 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -53,7 +53,7 @@ class InferenceMemory : public TensorMemory * @param count * @param tensors */ - InferenceMemory(size_t count, tensor_map_t&& tensors); + InferenceMemory(size_t count, CupyUtil::tensor_map_t&& tensors); /** * @brief Checks if a tensor named `name` exists in `tensors` diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index 537a9b140e..b9df93e1a2 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -54,7 +54,7 @@ class ResponseMemory : public TensorMemory * @param count * @param tensors */ - ResponseMemory(size_t count, tensor_map_t&& tensors); + ResponseMemory(size_t count, CupyUtil::tensor_map_t&& tensors); /** * @brief Checks if a tensor named `name` exists in `tensors` diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index ef32dcdc3f..9685e03c6c 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -57,14 +57,14 @@ class ResponseMemoryProbs : public ResponseMemory * @param count * @param tensors */ - ResponseMemoryProbs(size_t count, tensor_map_t &&tensors); + ResponseMemoryProbs(size_t count, CupyUtil::tensor_map_t&& tensors); /** * @brief Returns the tensor named 'probs', throws a `std::runtime_error` if it does not exist * * @return const TensorObject& */ - const TensorObject &get_probs() const; + const TensorObject& get_probs() const; /** * @brief Update the tensor named 'probs' @@ -96,7 +96,7 @@ struct ResponseMemoryProbsInterfaceProxy * @param self * @return std::size_t */ - static std::size_t count(ResponseMemoryProbs &self); + static std::size_t count(ResponseMemoryProbs& self); /** * @brief Get the response memory probs object @@ -104,7 +104,7 @@ struct ResponseMemoryProbsInterfaceProxy * @param self * @return pybind11::object */ - static pybind11::object get_probs(ResponseMemoryProbs &self); + static pybind11::object get_probs(ResponseMemoryProbs& self); /** * @brief Set the response memory probs object @@ -112,7 +112,7 @@ struct ResponseMemoryProbsInterfaceProxy * @param self * @param cupy_values */ - static void set_probs(ResponseMemoryProbs &self, pybind11::object cupy_values); + static void set_probs(ResponseMemoryProbs& self, pybind11::object cupy_values); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 071da3b480..a9959f8ecc 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -47,8 +47,6 @@ namespace morpheus { class TensorMemory { public: - using tensor_map_t = std::map; - /** * @brief Construct a new Tensor Memory object * @@ -62,11 +60,11 @@ class TensorMemory * @param count * @param tensors */ - TensorMemory(size_t count, tensor_map_t&& tensors); + TensorMemory(size_t count, CupyUtil::tensor_map_t&& tensors); virtual ~TensorMemory() = default; size_t count{0}; - tensor_map_t tensors; + CupyUtil::tensor_map_t tensors; /** * @brief Verify whether the specified tensor name is present in the tensor memory @@ -82,10 +80,10 @@ class TensorMemory * * @param ranges * @param num_selected_rows - * @return tensor_map_t + * @return CupyUtil::tensor_map_t */ - tensor_map_t copy_tensor_ranges(const std::vector>& ranges, - size_t num_selected_rows) const; + CupyUtil::tensor_map_t copy_tensor_ranges(const std::vector>& ranges, + size_t num_selected_rows) const; }; /****** TensorMemoryInterfaceProxy *************************/ diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index 53254ac9ac..ce2082f25e 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -39,6 +39,7 @@ namespace morpheus { */ struct CupyUtil { + using tensor_map_t = std::map; using py_tensor_map_t = std::map; static pybind11::object cp_module; // handle to cupy module diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 7041e8db22..8cd237d5a6 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -24,7 +24,9 @@ namespace morpheus { /****** Component public implementations *******************/ /****** InferenceMemory****************************************/ InferenceMemory::InferenceMemory(size_t count) : TensorMemory(count) {} -InferenceMemory::InferenceMemory(size_t count, tensor_map_t&& tensors) : TensorMemory(count, std::move(tensors)) {} +InferenceMemory::InferenceMemory(size_t count, CupyUtil::tensor_map_t&& tensors) : + TensorMemory(count, std::move(tensors)) +{} bool InferenceMemory::has_input(const std::string& name) const { diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index a98acabd04..8670f8b5d5 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -28,7 +28,8 @@ namespace morpheus { /****** Component public implementations *******************/ /****** ResponseMemory****************************************/ ResponseMemory::ResponseMemory(size_t count) : TensorMemory(count) {} -ResponseMemory::ResponseMemory(size_t count, tensor_map_t&& tensors) : TensorMemory(count, std::move(tensors)) {} +ResponseMemory::ResponseMemory(size_t count, CupyUtil::tensor_map_t&& tensors) : TensorMemory(count, std::move(tensors)) +{} bool ResponseMemory::has_output(const std::string& name) const { diff --git a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp index 4f50f7d6e8..4f4e4195c7 100644 --- a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp @@ -36,13 +36,13 @@ ResponseMemoryProbs::ResponseMemoryProbs(size_t count, TensorObject probs) : Res this->tensors["probs"] = std::move(probs); } -ResponseMemoryProbs::ResponseMemoryProbs(size_t count, tensor_map_t &&tensors) : +ResponseMemoryProbs::ResponseMemoryProbs(size_t count, CupyUtil::tensor_map_t&& tensors) : ResponseMemory(count, std::move(tensors)) { CHECK(has_tensor("probs")) << "Tensor: 'probs' not found in memory"; } -const TensorObject &ResponseMemoryProbs::get_probs() const +const TensorObject& ResponseMemoryProbs::get_probs() const { auto found = this->tensors.find("probs"); if (found == this->tensors.end()) @@ -66,17 +66,17 @@ std::shared_ptr ResponseMemoryProbsInterfaceProxy::init(cud return std::make_shared(count, std::move(CupyUtil::cupy_to_tensor(probs))); } -std::size_t ResponseMemoryProbsInterfaceProxy::count(ResponseMemoryProbs &self) +std::size_t ResponseMemoryProbsInterfaceProxy::count(ResponseMemoryProbs& self) { return self.count; } -pybind11::object ResponseMemoryProbsInterfaceProxy::get_probs(ResponseMemoryProbs &self) +pybind11::object ResponseMemoryProbsInterfaceProxy::get_probs(ResponseMemoryProbs& self) { return CupyUtil::tensor_to_cupy(self.get_probs()); } -void ResponseMemoryProbsInterfaceProxy::set_probs(ResponseMemoryProbs &self, pybind11::object cupy_values) +void ResponseMemoryProbsInterfaceProxy::set_probs(ResponseMemoryProbs& self, pybind11::object cupy_values) { self.set_probs(CupyUtil::cupy_to_tensor(cupy_values)); } diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index c01f047d83..7650dcc56c 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -24,17 +24,18 @@ namespace morpheus { /****** Component public implementations *******************/ /****** TensorMemory****************************************/ TensorMemory::TensorMemory(size_t count) : count(count) {} -TensorMemory::TensorMemory(size_t count, tensor_map_t&& tensors) : count(count), tensors(std::move(tensors)) {} +TensorMemory::TensorMemory(size_t count, CupyUtil::tensor_map_t&& tensors) : count(count), tensors(std::move(tensors)) +{} bool TensorMemory::has_tensor(const std::string& name) const { return this->tensors.find(name) != this->tensors.end(); } -TensorMemory::tensor_map_t TensorMemory::copy_tensor_ranges( - const std::vector>& ranges, size_t num_selected_rows) const +CupyUtil::tensor_map_t TensorMemory::copy_tensor_ranges(const std::vector>& ranges, + size_t num_selected_rows) const { - tensor_map_t tensors; + CupyUtil::tensor_map_t tensors; for (const auto& p : this->tensors) { tensors.insert(std::pair{p.first, p.second.copy_rows(ranges, num_selected_rows)}); From f64c8953659240637ae658832c80028efadfc6b4 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:26:41 -0800 Subject: [PATCH 18/85] wip --- morpheus/_lib/include/morpheus/utilities/cupy_util.hpp | 4 ++-- morpheus/_lib/src/utilities/cupy_util.cpp | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index ce2082f25e..e66fd32fe3 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -73,7 +73,7 @@ struct CupyUtil * @param cupy_tensors * @return tensor_map_t */ - static std::map cupy_to_tensors(const py_tensor_map_t& cupy_tensors); + static tensor_map_t cupy_to_tensors(const py_tensor_map_t& cupy_tensors); /** * @brief Convert a map of TensorObjects into a map of CuPy arrays. Requires GIL to have already been aqcuired. @@ -81,7 +81,7 @@ struct CupyUtil * @param tensors * @return py_tensor_map_t */ - static py_tensor_map_t tensors_to_cupy(const std::map& tensors); + static py_tensor_map_t tensors_to_cupy(const tensor_map_t& tensors); }; /** @} */ // end of group } // namespace morpheus diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index 93887b10a7..71ad4dba66 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -130,9 +130,9 @@ TensorObject CupyUtil::cupy_to_tensor(pybind11::object cupy_array) return tensor; } -std::map CupyUtil::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) +CupyUtil::tensor_map_t CupyUtil::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) { - std::map tensors; + tensor_map_t tensors; for (const auto& tensor : cupy_tensors) { tensors[tensor.first] = std::move(cupy_to_tensor(tensor.second)); @@ -141,7 +141,7 @@ std::map CupyUtil::cupy_to_tensors(const py_tensor_ma return tensors; } -CupyUtil::py_tensor_map_t CupyUtil::tensors_to_cupy(const std::map& tensors) +CupyUtil::py_tensor_map_t CupyUtil::tensors_to_cupy(const tensor_map_t& tensors) { py_tensor_map_t cupy_tensors; for (const auto& tensor : tensors) From 7fc1c715e9d4716990ffbcdf4bea84c6c5a2d4eb Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:32:08 -0800 Subject: [PATCH 19/85] Use typedef --- morpheus/_lib/src/messages/memory/inference_memory.cpp | 2 +- morpheus/_lib/src/messages/memory/response_memory.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 8cd237d5a6..63e716611b 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -35,7 +35,7 @@ bool InferenceMemory::has_input(const std::string& name) const /****** InferenceMemoryInterfaceProxy *************************/ std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t count, - std::map tensors) + CupyUtil::py_tensor_map_t tensors) { return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 8670f8b5d5..f942579b55 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -37,8 +37,7 @@ bool ResponseMemory::has_output(const std::string& name) const } /****** ResponseMemoryInterfaceProxy *************************/ -std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, - std::map tensors) +std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, CupyUtil::py_tensor_map_t tensors) { return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } From db2bb9f742f16c8c8546dd1800607cdafbed6863 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 30 Jan 2023 11:36:43 -0800 Subject: [PATCH 20/85] Remove unused namespace alias --- morpheus/_lib/src/messages/memory/tensor_memory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 7650dcc56c..2df786bd7f 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -45,7 +45,6 @@ CupyUtil::tensor_map_t TensorMemory::copy_tensor_ranges(const std::vector TensorMemoryInterfaceProxy::init(std::size_t count, CupyUtil::py_tensor_map_t tensors) { return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); From d428e16d81192190c1f2b550aa518b168582fc91 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 8 Feb 2023 09:40:33 -0800 Subject: [PATCH 21/85] Replace the tensor property with explicit get_tensors and set_tensors methods --- morpheus/_lib/src/python_modules/messages.cpp | 29 +++---------- morpheus/messages/multi_inference_message.py | 35 +++++++++------- morpheus/messages/multi_response_message.py | 42 +++++++++++-------- morpheus/messages/tensor_memory.py | 36 ++++++++++++++-- tests/test_tensor_memory.py | 24 +++++------ 5 files changed, 93 insertions(+), 73 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index f1ad39550e..0aa4e5bec2 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -57,17 +57,6 @@ namespace morpheus { namespace fs = std::filesystem; namespace py = pybind11; -// https://pybind11.readthedocs.io/en/stable/advanced/cast/stl.html?highlight=opaque#making-opaque-types -const char* TensorPropDocstring{R"pbdoc( - The tensors property has a limitation in that it always returns a copy. Resulting in code like: - >>> m.tensors['c'] = cp.zeros(count) - - Not having the intended outcome. Instead the following work-around can be performed: - >>> tensors = m.tensors - >>> tensors['c'] = cp.zeros(count) - >>> m.tensors = tensors -)pbdoc"}; - // Define the pybind11 module m, as 'pipeline'. PYBIND11_MODULE(messages, m) { @@ -173,10 +162,8 @@ PYBIND11_MODULE(messages, m) py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count) - .def_property("tensors", - &InferenceMemoryInterfaceProxy::get_tensors, - &InferenceMemoryInterfaceProxy::set_tensors, - TensorPropDocstring); + .def("get_tensors", &InferenceMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors); py::class_>(m, "InferenceMemoryNLP") .def(py::init<>(&InferenceMemoryNLPInterfaceProxy::init), @@ -255,20 +242,16 @@ PYBIND11_MODULE(messages, m) py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) - .def_property("tensors", - &TensorMemoryInterfaceProxy::get_tensors, - &TensorMemoryInterfaceProxy::set_tensors, - TensorPropDocstring); + .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors); py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) - .def_property("tensors", - &ResponseMemoryInterfaceProxy::get_tensors, - &ResponseMemoryInterfaceProxy::set_tensors, - TensorPropDocstring) + .def("get_tensors", &ResponseMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &ResponseMemoryInterfaceProxy::set_tensors) .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) .def("get_output_tensor", &ResponseMemoryInterfaceProxy::get_output_tensor, diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 667b921401..a522adefdd 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -24,7 +24,7 @@ from morpheus.messages.tensor_memory import TensorMemory -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): """ This is a base container class for data that will be used for inference stages. This class is designed to @@ -54,11 +54,11 @@ def get_input(instance, name: str): AttributeError If input name does not exist in message container. """ - if (name not in instance.tensors): + try: + return instance.get_tensors()[name] + except KeyError: raise AttributeError - return instance.tensors[name] - def set_input(instance, name: str, value): """ @@ -75,7 +75,9 @@ def set_input(instance, name: str, value): Value to set for input. """ # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - instance.tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + tensors = instance.get_tensors() + tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + instance.set_tensors(tensors) @dataclasses.dataclass(init=False) @@ -184,8 +186,8 @@ def inputs(self): Inference inputs. """ - - return {key: self.get_input(key) for key in self.memory.tensors.keys()} + tensors = self.memory.get_tensors() + return {key: self.get_input(key) for key in tensors.keys()} def __getstate__(self): return self.__dict__ @@ -194,13 +196,10 @@ def __setstate__(self, d): self.__dict__ = d def __getattr__(self, name: str) -> typing.Any: - - input_val = self.memory.tensors.get(name, None) - - if (input_val is not None): - return input_val[self.offset:self.offset + self.count, :] - - raise AttributeError + try: + self.get_input(name) + except KeyError: + raise AttributeError def get_input(self, name: str): """ @@ -216,9 +215,13 @@ def get_input(self, name: str): cupy.ndarray Inference input. + Raises + ------ + KeyError + When no matching input tensor exists. """ - - return self.memory.tensors[name][self.offset:self.offset + self.count, :] + tensors = self.memory.get_tensors() + return tensors[name][self.offset:self.offset + self.count, :] def get_slice(self, start, stop): """ diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 9b121bae68..019726ad04 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -48,12 +48,11 @@ def get_output(instance: "ResponseMemory", name: str): If output name does not exist in message container. """ - - if (name not in instance.tensors): + try: + return instance.get_tensors()[name] + except KeyError: raise AttributeError - return instance.tensors[name] - def set_output(instance: "ResponseMemory", name: str, value): """ @@ -71,10 +70,12 @@ def set_output(instance: "ResponseMemory", name: str, value): """ # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - instance.tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + tensors = instance.get_tensors() + tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + instance.set_tensors(tensors) -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): """Output memory block holding the results of inference.""" @@ -91,8 +92,16 @@ def get_output(self, name: str): ------- cupy.ndarray Tensor corresponding to name. + + Raises + ------ + AttributeError + If input name does not exist in message container. """ - return self.tensors[name] + try: + return self.get_tensors()[name] + except KeyError: + raise AttributeError @dataclasses.dataclass(init=False) @@ -166,17 +175,14 @@ def outputs(self): Inference outputs. """ - - return {key: self.get_output(key) for key in self.memory.tensors.keys()} + tensors = self.memory.get_tensors() + return {key: self.get_output(key) for key in tensors.keys()} def __getattr__(self, name: str) -> typing.Any: - - output_val = self.memory.tensors.get(name, None) - - if (output_val is not None): - return output_val[self.offset:self.offset + self.count, :] - - raise AttributeError + try: + self.get_output(name) + except KeyError: + raise AttributeError def get_output(self, name: str): """ @@ -193,8 +199,8 @@ def get_output(self, name: str): Inference output. """ - - return self.memory.tensors[name][self.offset:self.offset + self.count, :] + tensors = self.memory.get_tensors() + return tensors[name][self.offset:self.offset + self.count, :] def copy_output_ranges(self, ranges, mask=None): """ diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 523433367b..25497f55ab 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -22,7 +22,7 @@ from morpheus.messages.message_base import MessageData -@dataclasses.dataclass +@dataclasses.dataclass(init=False) class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ This is a base container class for data that will be used for inference stages. This class is designed to @@ -31,11 +31,39 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): Parameters ---------- count : int - Number of inference inputs. - inputs : typing.Dict[str, cupy.ndarray] - Inference inputs to model. + Length of each tensor contained in `tensors`. + tensors : typing.Dict[str, cupy.ndarray] + Collection of tensors uniquely identified by a name. """ count: int tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict) + + def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = {}): + self.count = count + self._tensors = tensors + + def get_tensors(self): + """ + Get the tensors contained by this instance. It is important to note that when C++ execution is enabled the + returned tensors will be a Python copy of the tensors stored in the C++ object. As such any changes made to the + tensors will need to be updated with a call to `set_tensors`. + + Returns + ------- + typing.Dict[str, cp.ndarray] + """ + return self._tensors + + def set_tensors(self, tensors): + """ + Overwrite the tensors stored by this instance. If the length of the tensors has changed, then the `count` + properte should also be updated. + + Parameters + ---------- + tensors : typing.Dict[str, cupy.ndarray] + Collection of tensors uniquely identified by a name. + """ + self._tensors = tensors diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index 512844e1a4..f1eda298f5 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -47,20 +47,20 @@ def check_tensor_memory(cls, count, tensors): m = cls(count) assert m.count == count - assert m.tensors == {} + assert m.get_tensors() == {} - m.tensors = tensors - compare_tensors(m.tensors, tensors) + m.set_tensors(tensors) + compare_tensors(m.get_tensors(), tensors) - m.tensors = other_tensors - compare_tensors(m.tensors, other_tensors) + m.set_tensors(other_tensors) + compare_tensors(m.get_tensors(), other_tensors) m = cls(count, tensors) assert m.count == count - compare_tensors(m.tensors, tensors) + compare_tensors(m.get_tensors(), tensors) - m.tensors = other_tensors - compare_tensors(m.tensors, other_tensors) + m.set_tensors(other_tensors) + compare_tensors(m.get_tensors(), other_tensors) def test_tensor_memory(config): @@ -87,7 +87,7 @@ def test_inference_memory_ae(config): m = InferenceMemoryAE(count, input=input, seq_ids=seq_ids) assert m.count == count - compare_tensors(m.tensors, {'input': input, 'seq_ids': seq_ids}) + compare_tensors(m.get_tensors(), {'input': input, 'seq_ids': seq_ids}) assert (m.input == input).all() assert (m.seq_ids == seq_ids).all() @@ -101,7 +101,7 @@ def test_inference_memory_fil(config): m = InferenceMemoryFIL(count, input__0=input_0, seq_ids=seq_ids) assert m.count == count - compare_tensors(m.tensors, {'input__0': input_0, 'seq_ids': seq_ids}) + compare_tensors(m.get_tensors(), {'input__0': input_0, 'seq_ids': seq_ids}) assert (m.input__0 == input_0).all() assert (m.seq_ids == seq_ids).all() @@ -116,7 +116,7 @@ def test_inference_memory_nlp(config): m = InferenceMemoryNLP(count, input_ids=input_ids, input_mask=input_mask, seq_ids=seq_ids) assert m.count == count - compare_tensors(m.tensors, {'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) + compare_tensors(m.get_tensors(), {'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) assert (m.input_ids == input_ids).all() assert (m.input_mask == input_mask).all() assert (m.seq_ids == seq_ids).all() @@ -128,7 +128,7 @@ def check_response_memory_probs_and_ae(cls): m = cls(count=count, probs=test_data) assert m.count == count - compare_tensors(m.tensors, {'probs': test_data}) + compare_tensors(m.get_tensors(), {'probs': test_data}) assert (m.probs == test_data).all() return m From 98c02e96d65d71c0aceea6b8f6cb408440c63b01 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 8 Feb 2023 09:46:33 -0800 Subject: [PATCH 22/85] Exclude tensors from the repr --- morpheus/messages/tensor_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 25497f55ab..5f223e10ce 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -38,7 +38,7 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ count: int - tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict) + tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict, repr=False) def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = {}): self.count = count From fc58151d634f4e78af0e6e522c4a2867e55240d2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 8 Feb 2023 09:50:53 -0800 Subject: [PATCH 23/85] Exclude tensors from generated eq and hash --- morpheus/messages/tensor_memory.py | 5 ++++- tests/test_inference_stage.py | 1 - 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 5f223e10ce..4df83dda29 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -38,7 +38,10 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ count: int - tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict, repr=False) + tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict, + repr=False, + compare=False, + hash=False) def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = {}): self.count = count diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index e8b0f1a299..3670d3b210 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -303,7 +303,6 @@ def test_convert_one_response(config): assert mpm.meta == inf.meta assert mpm.mess_offset == 0 assert mpm.mess_count == 1 - assert mpm.memory == mem assert mpm.offset == 0 assert mpm.count == 1 assert mem.get_output('probs').tolist() == [[1.0, 2.0, 3.0]] From e12fb17fac1123936c1b254008e8485d821dd008 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 8 Feb 2023 10:15:03 -0800 Subject: [PATCH 24/85] Actually return the attr :) --- morpheus/messages/multi_inference_message.py | 2 +- morpheus/messages/multi_response_message.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index a522adefdd..16b06db2eb 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -197,7 +197,7 @@ def __setstate__(self, d): def __getattr__(self, name: str) -> typing.Any: try: - self.get_input(name) + return self.get_input(name) except KeyError: raise AttributeError diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 019726ad04..da2fb53462 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -180,7 +180,7 @@ def outputs(self): def __getattr__(self, name: str) -> typing.Any: try: - self.get_output(name) + return self.get_output(name) except KeyError: raise AttributeError From 3749c459146c0d1882b669170b81d802b2723a19 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 13 Feb 2023 12:07:44 -0800 Subject: [PATCH 25/85] Add & adopt get_tensor and set_tensor methods --- morpheus/messages/multi_inference_message.py | 17 ++++------ morpheus/messages/multi_response_message.py | 17 ++++------ morpheus/messages/tensor_memory.py | 35 +++++++++++++++++++- 3 files changed, 46 insertions(+), 23 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 16b06db2eb..64759d84c6 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -55,7 +55,7 @@ def get_input(instance, name: str): If input name does not exist in message container. """ try: - return instance.get_tensors()[name] + return instance.get_tensor(name) except KeyError: raise AttributeError @@ -75,9 +75,8 @@ def set_input(instance, name: str, value): Value to set for input. """ # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensors = instance.get_tensors() - tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - instance.set_tensors(tensors) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + instance.set_tensor(name, tensor) @dataclasses.dataclass(init=False) @@ -196,10 +195,7 @@ def __setstate__(self, d): self.__dict__ = d def __getattr__(self, name: str) -> typing.Any: - try: - return self.get_input(name) - except KeyError: - raise AttributeError + return self.get_input(name) def get_input(self, name: str): """ @@ -217,11 +213,10 @@ def get_input(self, name: str): Raises ------ - KeyError + AttributeError When no matching input tensor exists. """ - tensors = self.memory.get_tensors() - return tensors[name][self.offset:self.offset + self.count, :] + return self.memory.get_input(name)[self.offset:self.offset + self.count, :] def get_slice(self, start, stop): """ diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index da2fb53462..7035bfbd75 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -49,7 +49,7 @@ def get_output(instance: "ResponseMemory", name: str): """ try: - return instance.get_tensors()[name] + return instance.get_tensor(name) except KeyError: raise AttributeError @@ -70,9 +70,8 @@ def set_output(instance: "ResponseMemory", name: str, value): """ # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensors = instance.get_tensors() - tensors[name] = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - instance.set_tensors(tensors) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + instance.set_tensor(name, tensor) @dataclasses.dataclass(init=False) @@ -99,7 +98,7 @@ def get_output(self, name: str): If input name does not exist in message container. """ try: - return self.get_tensors()[name] + return self.get_tensor(name) except KeyError: raise AttributeError @@ -179,10 +178,7 @@ def outputs(self): return {key: self.get_output(key) for key in tensors.keys()} def __getattr__(self, name: str) -> typing.Any: - try: - return self.get_output(name) - except KeyError: - raise AttributeError + return self.get_output(name) def get_output(self, name: str): """ @@ -199,8 +195,7 @@ def get_output(self, name: str): Inference output. """ - tensors = self.memory.get_tensors() - return tensors[name][self.offset:self.offset + self.count, :] + return self.memory.get_output(name)[self.offset:self.offset + self.count, :] def copy_output_ranges(self, ranges, mask=None): """ diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 4df83dda29..3c1675eb27 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -62,7 +62,7 @@ def get_tensors(self): def set_tensors(self, tensors): """ Overwrite the tensors stored by this instance. If the length of the tensors has changed, then the `count` - properte should also be updated. + property should also be updated. Parameters ---------- @@ -70,3 +70,36 @@ def set_tensors(self, tensors): Collection of tensors uniquely identified by a name. """ self._tensors = tensors + + def get_tensor(self, name): + """ + Get the Tensor stored in the TensorMemory container identified by `name`. + + Parameters + ---------- + name : str + Tensor key name. + + Returns + ------- + cupy.ndarray + Tensor. + + Raises + ------ + KeyError + When no matching tensor exists. + """ + return self._tensors[name] + + def set_tensor(self, name, tensor): + """ + Update the tensor identified by `name`. If the length of the tensor has changed, then the `count` + property should also be updated. + + Parameters + ---------- + tensors : typing.Dict[str, cupy.ndarray] + Collection of tensors uniquely identified by a name. + """ + self._tensors[name] = tensor From c4ba089a0142be9f3069195bebb10d5f90d8b24f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 13 Feb 2023 16:19:49 -0800 Subject: [PATCH 26/85] wip --- .../messages/memory/inference_memory.hpp | 13 +----- .../messages/memory/inference_memory_fil.hpp | 19 +------- .../messages/memory/inference_memory_nlp.hpp | 10 +---- .../messages/memory/response_memory.hpp | 23 +--------- .../messages/memory/response_memory_probs.hpp | 17 +++----- .../messages/memory/tensor_memory.hpp | 33 ++++++++++++++ .../src/messages/memory/inference_memory.cpp | 15 ------- .../messages/memory/inference_memory_fil.cpp | 22 +++------- .../messages/memory/inference_memory_nlp.cpp | 5 --- .../src/messages/memory/response_memory.cpp | 28 +----------- .../messages/memory/response_memory_probs.cpp | 5 --- .../src/messages/memory/tensor_memory.cpp | 27 ++++++++++++ morpheus/_lib/src/python_modules/messages.cpp | 43 +++++++++++++------ 13 files changed, 106 insertions(+), 154 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index 1ccdf33fb7..fc303089ee 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -69,7 +69,7 @@ class InferenceMemory : public TensorMemory /** * @brief Interface proxy, used to insulate python bindings. */ -struct InferenceMemoryInterfaceProxy +struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy { /** * @brief Create and initialize a InferenceMemory object, and return a shared pointer to the result. Each array in @@ -80,17 +80,6 @@ struct InferenceMemoryInterfaceProxy * @return std::shared_ptr */ static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); - - /** - * @brief Get the count object - * - * @param self - * @return std::size_t - */ - static std::size_t get_count(InferenceMemory& self); - - static CupyUtil::py_tensor_map_t get_tensors(InferenceMemory& self); - static void set_tensors(InferenceMemory& self, CupyUtil::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp index 26e285627c..9f27dfe73e 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp @@ -93,7 +93,7 @@ class InferenceMemoryFIL : public InferenceMemory /** * @brief Interface proxy, used to insulate python bindings */ -struct InferenceMemoryFILInterfaceProxy +struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy { /** * @brief Create and initialize an InferenceMemoryFIL object, and return a shared pointer to the result @@ -108,23 +108,6 @@ struct InferenceMemoryFILInterfaceProxy pybind11::object input__0, pybind11::object seq_ids); - /** - * Get messages count in the inference memory instance - * - * @param self - * @return std::size_t - */ - static std::size_t count(InferenceMemoryFIL& self); - - /** - * Return the requested tensor for a given name - * - * @param self - * @param name Tensor name - * @return TensorObject - */ - static TensorObject get_tensor(InferenceMemoryFIL& self, const std::string& name); - /** * @brief Returns the 'input__0' as cupy array * diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp index 07ba1e87b9..abf138ab2b 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp @@ -102,7 +102,7 @@ class InferenceMemoryNLP : public InferenceMemory /** * @brief Interface proxy, used to insulate python bindings. */ -struct InferenceMemoryNLPInterfaceProxy +struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy { /** * @brief Create and initialize an InferenceMemoryNLP object, and return a shared pointer to the result @@ -119,14 +119,6 @@ struct InferenceMemoryNLPInterfaceProxy pybind11::object input_mask, pybind11::object seq_ids); - /** - * Get messages count in the inference memory object - * - * @param self - * @return std::size_t - */ - static std::size_t count(InferenceMemoryNLP& self); - /** * @brief : Returns token-ids for each string padded with 0s to max_length as python object * diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index b9df93e1a2..93bec4f6b1 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -72,7 +72,7 @@ class ResponseMemory : public TensorMemory * @brief Interface proxy, used to insulate python bindings. * */ -struct ResponseMemoryInterfaceProxy +struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy { /** * @brief Create and initialize a ResponseMemory object, and return a shared pointer to the result. Each array in @@ -83,27 +83,6 @@ struct ResponseMemoryInterfaceProxy * @return std::shared_ptr */ static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); - - /** - * @brief Get the output object - * - * @param self - * @param name - * @return pybind11::object - */ - static pybind11::object get_output(ResponseMemory& self, const std::string& name); - - /** - * @brief Get the output tensor object - * - * @param self - * @param name - * @return TensorObject - */ - static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name); - - static CupyUtil::py_tensor_map_t get_tensors(ResponseMemory& self); - static void set_tensors(ResponseMemory& self, CupyUtil::py_tensor_map_t tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index 9685e03c6c..e8dce0ec5f 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -79,7 +79,7 @@ class ResponseMemoryProbs : public ResponseMemory /** * @brief Interface proxy, used to insulate python bindings */ -struct ResponseMemoryProbsInterfaceProxy +struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy { /** * @brief Create and initialize a ResponseMemoryProbs object, and return a shared pointer to the result @@ -91,28 +91,21 @@ struct ResponseMemoryProbsInterfaceProxy static std::shared_ptr init(cudf::size_type count, pybind11::object probs); /** - * @brief Get messages count in the response memory probs object - * - * @param self - * @return std::size_t - */ - static std::size_t count(ResponseMemoryProbs& self); - - /** - * @brief Get the response memory probs object + * @brief Get the response memory probs object (alias for `get_tensor("probs")`) * * @param self * @return pybind11::object + * @throws pybind11::key_error When no matching tensor exists. */ static pybind11::object get_probs(ResponseMemoryProbs& self); /** - * @brief Set the response memory probs object + * @brief Set the response memory probs object (alias for `set_tensor("probs", cupy_value)`) * * @param self * @param cupy_values */ - static void set_probs(ResponseMemoryProbs& self, pybind11::object cupy_values); + static void set_probs(ResponseMemoryProbs& self, pybind11::object cupy_value); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index a9959f8ecc..8c2ea9ec5a 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -118,7 +118,40 @@ struct TensorMemoryInterfaceProxy */ static CupyUtil::py_tensor_map_t get_tensors(TensorMemory& self); + /** + * @brief Set the tensors object converting a map of CuPy arrays to Tensors + * + * @param self + * @param tensors + */ static void set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors); + + /** + * @brief Get the output tensor + * + * @param self + * @param name + * @return const TensorObject& + */ + static const TensorObject& get_tensor_object(TensorMemory& self, const std::string& name); + + /** + * @brief Get the tensor object identified by `name` + * + * @param self + * @param name + * @return pybind11::object + * @throws pybind11::key_error When no matching tensor exists. + */ + static pybind11::object get_tensor(TensorMemory& self, const std::string name); + + /** + * @brief Set the tensor object identified by `name` + * + * @param self + * @param cupy_tensor + */ + static void set_tensor(TensorMemory& self, const std::string name, const pybind11::object& cupy_tensor); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 63e716611b..85e5f7bc67 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -40,19 +40,4 @@ std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } -std::size_t InferenceMemoryInterfaceProxy::get_count(InferenceMemory& self) -{ - return self.count; -} - -CupyUtil::py_tensor_map_t InferenceMemoryInterfaceProxy::get_tensors(InferenceMemory& self) -{ - return CupyUtil::tensors_to_cupy(self.tensors); -} - -void InferenceMemoryInterfaceProxy::set_tensors(InferenceMemory& self, CupyUtil::py_tensor_map_t tensors) -{ - self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); -} - } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index b5c6039c91..2d2e921de5 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -40,7 +40,7 @@ InferenceMemoryFIL::InferenceMemoryFIL(size_t count, TensorObject input__0, Tens this->tensors["seq_ids"] = std::move(seq_ids); } -const TensorObject &InferenceMemoryFIL::get_input__0() const +const TensorObject& InferenceMemoryFIL::get_input__0() const { auto found = this->tensors.find("input__0"); if (found == this->tensors.end()) @@ -56,7 +56,7 @@ void InferenceMemoryFIL::set_input__0(TensorObject input__0) this->tensors["input__0"] = std::move(input__0); } -const TensorObject &InferenceMemoryFIL::get_seq_ids() const +const TensorObject& InferenceMemoryFIL::get_seq_ids() const { auto found = this->tensors.find("seq_ids"); if (found == this->tensors.end()) @@ -81,32 +81,22 @@ std::shared_ptr InferenceMemoryFILInterfaceProxy::init(cudf: count, std::move(CupyUtil::cupy_to_tensor(input__0)), std::move(CupyUtil::cupy_to_tensor(seq_ids))); } -std::size_t InferenceMemoryFILInterfaceProxy::count(InferenceMemoryFIL &self) -{ - return self.count; -} - -TensorObject InferenceMemoryFILInterfaceProxy::get_tensor(InferenceMemoryFIL &self, const std::string &name) -{ - return self.tensors[name]; -} - -pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL &self) +pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL& self) { return CupyUtil::tensor_to_cupy(self.get_input__0()); } -void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL &self, pybind11::object cupy_values) +void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL& self, pybind11::object cupy_values) { self.set_input__0(CupyUtil::cupy_to_tensor(cupy_values)); } -pybind11::object InferenceMemoryFILInterfaceProxy::get_seq_ids(InferenceMemoryFIL &self) +pybind11::object InferenceMemoryFILInterfaceProxy::get_seq_ids(InferenceMemoryFIL& self) { return CupyUtil::tensor_to_cupy(self.get_seq_ids()); } -void InferenceMemoryFILInterfaceProxy::set_seq_ids(InferenceMemoryFIL &self, pybind11::object cupy_values) +void InferenceMemoryFILInterfaceProxy::set_seq_ids(InferenceMemoryFIL& self, pybind11::object cupy_values) { return self.set_seq_ids(CupyUtil::cupy_to_tensor(cupy_values)); } diff --git a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp index 031a60fe0f..3c2e7107c2 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp @@ -104,11 +104,6 @@ std::shared_ptr InferenceMemoryNLPInterfaceProxy::init(cudf: std::move(CupyUtil::cupy_to_tensor(seq_ids))); } -std::size_t InferenceMemoryNLPInterfaceProxy::count(InferenceMemoryNLP& self) -{ - return self.count; -} - pybind11::object InferenceMemoryNLPInterfaceProxy::get_input_ids(InferenceMemoryNLP& self) { return CupyUtil::tensor_to_cupy(self.get_input_ids()); diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index f942579b55..954331739f 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -19,7 +19,7 @@ #include "morpheus/utilities/cupy_util.hpp" -#include +#include // for key_error & object #include #include // for move @@ -42,30 +42,4 @@ std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t c return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); } -pybind11::object ResponseMemoryInterfaceProxy::get_output(ResponseMemory& self, const std::string& name) -{ - return CupyUtil::tensor_to_cupy(get_output_tensor(self, name)); -} - -TensorObject ResponseMemoryInterfaceProxy::get_output_tensor(ResponseMemory& self, const std::string& name) -{ - // Directly return the tensor object - if (!self.has_tensor(name)) - { - throw pybind11::key_error(); - } - - return self.tensors[name]; -} - -CupyUtil::py_tensor_map_t ResponseMemoryInterfaceProxy::get_tensors(ResponseMemory& self) -{ - return CupyUtil::tensors_to_cupy(self.tensors); -} - -void ResponseMemoryInterfaceProxy::set_tensors(ResponseMemory& self, CupyUtil::py_tensor_map_t tensors) -{ - self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); -} - } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp index 4f4e4195c7..8d5e09b998 100644 --- a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp @@ -66,11 +66,6 @@ std::shared_ptr ResponseMemoryProbsInterfaceProxy::init(cud return std::make_shared(count, std::move(CupyUtil::cupy_to_tensor(probs))); } -std::size_t ResponseMemoryProbsInterfaceProxy::count(ResponseMemoryProbs& self) -{ - return self.count; -} - pybind11::object ResponseMemoryProbsInterfaceProxy::get_probs(ResponseMemoryProbs& self) { return CupyUtil::tensor_to_cupy(self.get_probs()); diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 2df786bd7f..6449953b14 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -17,6 +17,10 @@ #include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/utilities/cupy_util.hpp" + +#include // for key_error & object + #include #include @@ -65,4 +69,27 @@ void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, CupyUtil::py_te self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); } +const TensorObject& TensorMemoryInterfaceProxy::get_tensor_object(TensorMemory& self, const std::string& name) +{ + const auto tensor_itr = self.tensors.find(name); + if (tensor_itr == self.tensors.end()) + { + throw pybind11::key_error{}; + } + + return tensor_itr->second; +} + +pybind11::object TensorMemoryInterfaceProxy::get_tensor(TensorMemory& self, const std::string name) +{ + return CupyUtil::tensor_to_cupy(TensorMemoryInterfaceProxy::get_tensor_object(self, name)); +} + +void TensorMemoryInterfaceProxy::set_tensor(TensorMemory& self, + const std::string name, + const pybind11::object& cupy_tensor) +{ + self.tensors.insert_or_assign(name, CupyUtil::cupy_to_tensor(cupy_tensor)); +} + } // namespace morpheus diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 0aa4e5bec2..e44c49df0f 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -161,9 +161,11 @@ PYBIND11_MODULE(messages, m) .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) - .def_property_readonly("count", &InferenceMemoryInterfaceProxy::get_count) + .def_readonly("count", &InferenceMemory::count) .def("get_tensors", &InferenceMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors); + .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors) + .def("get_tensor", &InferenceMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryInterfaceProxy::set_tensor); py::class_>(m, "InferenceMemoryNLP") .def(py::init<>(&InferenceMemoryNLPInterfaceProxy::init), @@ -171,7 +173,11 @@ PYBIND11_MODULE(messages, m) py::arg("input_ids"), py::arg("input_mask"), py::arg("seq_ids")) - .def_property_readonly("count", &InferenceMemoryNLPInterfaceProxy::count) + .def_readonly("count", &InferenceMemoryNLP::count) + .def("get_tensors", &InferenceMemoryNLPInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &InferenceMemoryNLPInterfaceProxy::set_tensors) + .def("get_tensor", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryNLPInterfaceProxy::set_tensor) .def_property("input_ids", &InferenceMemoryNLPInterfaceProxy::get_input_ids, &InferenceMemoryNLPInterfaceProxy::set_input_ids) @@ -186,8 +192,11 @@ PYBIND11_MODULE(messages, m) py::arg("count"), py::arg("input__0"), py::arg("seq_ids")) - .def_property_readonly("count", &InferenceMemoryFILInterfaceProxy::count) - .def("get_tensor", &InferenceMemoryFILInterfaceProxy::get_tensor) + .def_readonly("count", &InferenceMemoryFIL::count) + .def("get_tensors", &InferenceMemoryFILInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &InferenceMemoryFILInterfaceProxy::set_tensors) + .def("get_tensor", &InferenceMemoryFILInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryFILInterfaceProxy::set_tensor) .def_property("input__0", &InferenceMemoryFILInterfaceProxy::get_input__0, &InferenceMemoryFILInterfaceProxy::set_input__0) @@ -243,7 +252,9 @@ PYBIND11_MODULE(messages, m) py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors); + .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors) + .def("get_tensor", &TensorMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor); py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), @@ -252,16 +263,22 @@ PYBIND11_MODULE(messages, m) .def_readonly("count", &ResponseMemory::count) .def("get_tensors", &ResponseMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) .def("set_tensors", &ResponseMemoryInterfaceProxy::set_tensors) - .def("get_output", &ResponseMemoryInterfaceProxy::get_output, py::return_value_policy::reference_internal) - .def("get_output_tensor", - &ResponseMemoryInterfaceProxy::get_output_tensor, - py::return_value_policy::reference_internal); + .def("get_tensor", &ResponseMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &ResponseMemoryInterfaceProxy::set_tensor) + .def("get_output", &ResponseMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_output", &ResponseMemoryInterfaceProxy::set_tensor); py::class_>(m, "ResponseMemoryProbs") .def(py::init<>(&ResponseMemoryProbsInterfaceProxy::init), py::arg("count"), py::arg("probs")) - .def_property_readonly("count", &ResponseMemoryProbsInterfaceProxy::count) - .def_property( - "probs", &ResponseMemoryProbsInterfaceProxy::get_probs, &ResponseMemoryProbsInterfaceProxy::set_probs); + .def_readonly("count", &ResponseMemoryProbs::count) + .def("get_tensors", &ResponseMemoryProbsInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &ResponseMemoryProbsInterfaceProxy::set_tensors) + .def("get_tensor", &ResponseMemoryProbsInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &ResponseMemoryProbsInterfaceProxy::set_tensor) + .def("get_output", &ResponseMemoryProbsInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_output", &ResponseMemoryProbsInterfaceProxy::set_tensor) + .def("get_probs", &ResponseMemoryProbsInterfaceProxy::get_probs, py::return_value_policy::move) + .def("set_probs", &ResponseMemoryProbsInterfaceProxy::set_probs); py::class_>(m, "MultiResponseMessage") .def(py::init<>(&MultiResponseMessageInterfaceProxy::init), From bce1ec3a7de09a79667f758cab650efecca24813 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 13 Feb 2023 16:49:32 -0800 Subject: [PATCH 27/85] wip --- .../messages/memory/inference_memory_fil.cpp | 2 ++ morpheus/_lib/src/python_modules/messages.cpp | 24 +++++++++---------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index 2d2e921de5..b37e2582fd 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -83,11 +83,13 @@ std::shared_ptr InferenceMemoryFILInterfaceProxy::init(cudf: pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL& self) { + std::cerr << "get_input__0\n"; return CupyUtil::tensor_to_cupy(self.get_input__0()); } void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL& self, pybind11::object cupy_values) { + std::cerr << "set_input__0\n"; self.set_input__0(CupyUtil::cupy_to_tensor(cupy_values)); } diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index e44c49df0f..17184f7447 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -157,7 +157,17 @@ PYBIND11_MODULE(messages, m) py::return_value_policy::move) .def("get_meta_list", &MultiMessageInterfaceProxy::get_meta_list, py::return_value_policy::move); - py::class_>(m, "InferenceMemory") + py::class_>(m, "TensorMemory") + .def(py::init<>(&TensorMemoryInterfaceProxy::init), + py::arg("count"), + py::arg("tensors") = CupyUtil::py_tensor_map_t()) + .def_readonly("count", &TensorMemory::count) + .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) + .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors) + .def("get_tensor", &TensorMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor); + + py::class_>(m, "InferenceMemory") .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) @@ -246,17 +256,7 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("offset", &MultiInferenceFILMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiInferenceFILMessageInterfaceProxy::count); - py::class_>(m, "TensorMemory") - .def(py::init<>(&TensorMemoryInterfaceProxy::init), - py::arg("count"), - py::arg("tensors") = CupyUtil::py_tensor_map_t()) - .def_readonly("count", &TensorMemory::count) - .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors) - .def("get_tensor", &TensorMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor); - - py::class_>(m, "ResponseMemory") + py::class_>(m, "ResponseMemory") .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = CupyUtil::py_tensor_map_t()) From 24b62f81c9ca241b6fdec49e186889a8ff815265 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 13 Feb 2023 16:56:26 -0800 Subject: [PATCH 28/85] Interface proxy classes for TensorMemory subclasses now inherit from eachother, method aliases now mapped in messages.cpp --- morpheus/_lib/src/python_modules/messages.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 17184f7447..1a4685776a 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -188,6 +188,8 @@ PYBIND11_MODULE(messages, m) .def("set_tensors", &InferenceMemoryNLPInterfaceProxy::set_tensors) .def("get_tensor", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::return_value_policy::move) .def("set_tensor", &InferenceMemoryNLPInterfaceProxy::set_tensor) + .def("get_input", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_input", &InferenceMemoryNLPInterfaceProxy::set_tensor) .def_property("input_ids", &InferenceMemoryNLPInterfaceProxy::get_input_ids, &InferenceMemoryNLPInterfaceProxy::set_input_ids) @@ -207,6 +209,8 @@ PYBIND11_MODULE(messages, m) .def("set_tensors", &InferenceMemoryFILInterfaceProxy::set_tensors) .def("get_tensor", &InferenceMemoryFILInterfaceProxy::get_tensor, py::return_value_policy::move) .def("set_tensor", &InferenceMemoryFILInterfaceProxy::set_tensor) + .def("get_input", &InferenceMemoryFILInterfaceProxy::get_tensor, py::return_value_policy::move) + .def("set_set", &InferenceMemoryFILInterfaceProxy::set_tensor) .def_property("input__0", &InferenceMemoryFILInterfaceProxy::get_input__0, &InferenceMemoryFILInterfaceProxy::set_input__0) @@ -277,8 +281,8 @@ PYBIND11_MODULE(messages, m) .def("set_tensor", &ResponseMemoryProbsInterfaceProxy::set_tensor) .def("get_output", &ResponseMemoryProbsInterfaceProxy::get_tensor, py::return_value_policy::move) .def("set_output", &ResponseMemoryProbsInterfaceProxy::set_tensor) - .def("get_probs", &ResponseMemoryProbsInterfaceProxy::get_probs, py::return_value_policy::move) - .def("set_probs", &ResponseMemoryProbsInterfaceProxy::set_probs); + .def_property( + "probs", &ResponseMemoryProbsInterfaceProxy::get_probs, &ResponseMemoryProbsInterfaceProxy::set_probs); py::class_>(m, "MultiResponseMessage") .def(py::init<>(&MultiResponseMessageInterfaceProxy::init), From cfb6cb9cb4447146f0096d63704fa416ab0794dc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 08:40:21 -0800 Subject: [PATCH 29/85] Make get_input, set_input, get_output & set_output methods actual class methods on the base --- morpheus/messages/multi_inference_message.py | 90 +++++++++----------- morpheus/messages/multi_response_message.py | 80 ++++++----------- 2 files changed, 67 insertions(+), 103 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 64759d84c6..3d32e1f52a 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -31,52 +31,46 @@ class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): hold generic tensor data in cupy arrays. """ + def get_input(self, name: str): + """ + Getter function used with DataClassProp for getting inference input from message containers derived + from InferenceMemory. -def get_input(instance, name: str): - """ - Getter function used with DataClassProp for getting inference input from message containers derived - from InferenceMemory. + Parameters + ---------- + name : str + Key used to do lookup in inputs dict of message container. - Parameters - ---------- - instance : `InferenceMemory` - Message container holding inputs. - name : str - Key used to do lookup in inputs dict of message container. - - Returns - ------- - cupy.ndarray - Inputs corresponding to name. - - Raises - ------ - AttributeError - If input name does not exist in message container. - """ - try: - return instance.get_tensor(name) - except KeyError: - raise AttributeError + Returns + ------- + cupy.ndarray + Inputs corresponding to name. + Raises + ------ + AttributeError + If input name does not exist in message container. + """ + try: + return self.get_tensor(name) + except KeyError: + raise AttributeError -def set_input(instance, name: str, value): - """ - Setter function used with DataClassProp for setting inference input in message containers derived - from InferenceMemory. + def set_input(self, name: str, value): + """ + Setter function used with DataClassProp for setting inference input in message containers derived + from InferenceMemory. - Parameters - ---------- - instance : `InferenceMemory` - Message container holding inputs. - name : str - Key used to do lookup in inputs dict of message container. - value : cupy.ndarray - Value to set for input. - """ - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - instance.set_tensor(name, tensor) + Parameters + ---------- + name : str + Key used to do lookup in inputs dict of message container. + value : cupy.ndarray + Value to set for input. + """ + # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + self.set_tensor(name, tensor) @dataclasses.dataclass(init=False) @@ -96,9 +90,9 @@ class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP inputs than messages (i.e., if some messages get broken into multiple inference requests). """ - input_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) + input_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) def __init__(self, count, input_ids, input_mask, seq_ids): super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) @@ -119,8 +113,8 @@ class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL inputs than messages (i.e., if some messages get broken into multiple inference requests). """ - input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) + input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) def __init__(self, count, input__0, seq_ids): super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) @@ -140,8 +134,8 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): inputs than messages (i.e., if some messages get broken into multiple inference requests). """ - input: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_input, set_input) + input: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) def __init__(self, count, input, seq_ids): super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 7035bfbd75..3e6df2a453 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -25,83 +25,53 @@ from morpheus.messages.tensor_memory import TensorMemory -def get_output(instance: "ResponseMemory", name: str): - """ - Getter function used with DataClassProp for getting inference output from message containers derived - from ResponseMemory. - - Parameters - ---------- - instance : `ResponseMemory` - Message container holding tensors. - name : str - Key used to do lookup in tensors dict of message container. - - Returns - ------- - cupy.ndarray - Tensors corresponding to name. - - Raises - ------ - AttributeError - If output name does not exist in message container. - - """ - try: - return instance.get_tensor(name) - except KeyError: - raise AttributeError - - -def set_output(instance: "ResponseMemory", name: str, value): - """ - Setter function used with DataClassProp for setting output in message containers derived - from ResponseMemory. - - Parameters - ---------- - instance : `ResponseMemory` - Message container holding tensors. - name : str - Key used to do lookup in tensors dict of message container. - value : cupy.ndarray - Value to set for input. - """ - - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - instance.set_tensor(name, tensor) - - @dataclasses.dataclass(init=False) class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): """Output memory block holding the results of inference.""" def get_output(self, name: str): """ - Return the output tensor specified by `name`. + Getter function used with DataClassProp for getting inference output from message containers derived + from ResponseMemory. Parameters ---------- name : str - Name of output tensor. + Key used to do lookup in tensors dict of message container. Returns ------- cupy.ndarray - Tensor corresponding to name. + Tensors corresponding to name. Raises ------ AttributeError - If input name does not exist in message container. + If output name does not exist in message container. + """ try: return self.get_tensor(name) except KeyError: raise AttributeError + def set_output(self, name: str, value): + """ + Setter function used with DataClassProp for setting output in message containers derived + from ResponseMemory. + + Parameters + ---------- + name : str + Key used to do lookup in tensors dict of message container. + value : cupy.ndarray + Value to set for input. + """ + + # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + self.set_tensor(name, tensor) + @dataclasses.dataclass(init=False) class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProbs): @@ -113,7 +83,7 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb probs : cupy.ndarray Probabilities tensor """ - probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_output, set_output) + probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) def __init__(self, count, probs): super().__init__(count, tensors={'probs': probs}) @@ -136,7 +106,7 @@ class ResponseMemoryAE(ResponseMemory, cpp_class=None): Explainability Dataframe, for each feature a column will exist with a name in the form of: `{feature}_z_loss` containing the loss z-score along with `max_abs_z` and `mean_abs_z` columns """ - probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(get_output, set_output) + probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) user_id = "" explain_df = None From aac60f98260ebd6bfcac969ab9ff15b32b59c645 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 08:58:51 -0800 Subject: [PATCH 30/85] Add py::args to bindings --- morpheus/_lib/src/python_modules/messages.cpp | 60 +++++++++++-------- 1 file changed, 34 insertions(+), 26 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 1a4685776a..df8afc532a 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -163,9 +163,9 @@ PYBIND11_MODULE(messages, m) py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &TensorMemory::count) .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors) - .def("get_tensor", &TensorMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor); + .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors, py::arg("tensors")) + .def("get_tensor", &TensorMemoryInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")); py::class_>(m, "InferenceMemory") .def(py::init<>(&InferenceMemoryInterfaceProxy::init), @@ -173,9 +173,9 @@ PYBIND11_MODULE(messages, m) py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &InferenceMemory::count) .def("get_tensors", &InferenceMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors) - .def("get_tensor", &InferenceMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &InferenceMemoryInterfaceProxy::set_tensor); + .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors, py::arg("tensors")) + .def("get_tensor", &InferenceMemoryInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")); py::class_>(m, "InferenceMemoryNLP") .def(py::init<>(&InferenceMemoryNLPInterfaceProxy::init), @@ -185,11 +185,12 @@ PYBIND11_MODULE(messages, m) py::arg("seq_ids")) .def_readonly("count", &InferenceMemoryNLP::count) .def("get_tensors", &InferenceMemoryNLPInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &InferenceMemoryNLPInterfaceProxy::set_tensors) - .def("get_tensor", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &InferenceMemoryNLPInterfaceProxy::set_tensor) - .def("get_input", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_input", &InferenceMemoryNLPInterfaceProxy::set_tensor) + .def("set_tensors", &InferenceMemoryNLPInterfaceProxy::set_tensors, py::arg("tensors")) + .def( + "get_tensor", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryNLPInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) + .def("get_input", &InferenceMemoryNLPInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_input", &InferenceMemoryNLPInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) .def_property("input_ids", &InferenceMemoryNLPInterfaceProxy::get_input_ids, &InferenceMemoryNLPInterfaceProxy::set_input_ids) @@ -206,11 +207,12 @@ PYBIND11_MODULE(messages, m) py::arg("seq_ids")) .def_readonly("count", &InferenceMemoryFIL::count) .def("get_tensors", &InferenceMemoryFILInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &InferenceMemoryFILInterfaceProxy::set_tensors) - .def("get_tensor", &InferenceMemoryFILInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &InferenceMemoryFILInterfaceProxy::set_tensor) - .def("get_input", &InferenceMemoryFILInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_set", &InferenceMemoryFILInterfaceProxy::set_tensor) + .def("set_tensors", &InferenceMemoryFILInterfaceProxy::set_tensors, py::arg("tensors")) + .def( + "get_tensor", &InferenceMemoryFILInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_tensor", &InferenceMemoryFILInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) + .def("get_input", &InferenceMemoryFILInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_set", &InferenceMemoryFILInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) .def_property("input__0", &InferenceMemoryFILInterfaceProxy::get_input__0, &InferenceMemoryFILInterfaceProxy::set_input__0) @@ -266,21 +268,27 @@ PYBIND11_MODULE(messages, m) py::arg("tensors") = CupyUtil::py_tensor_map_t()) .def_readonly("count", &ResponseMemory::count) .def("get_tensors", &ResponseMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &ResponseMemoryInterfaceProxy::set_tensors) - .def("get_tensor", &ResponseMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &ResponseMemoryInterfaceProxy::set_tensor) - .def("get_output", &ResponseMemoryInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_output", &ResponseMemoryInterfaceProxy::set_tensor); + .def("set_tensors", &ResponseMemoryInterfaceProxy::set_tensors, py::arg("tensors")) + .def("get_tensor", &ResponseMemoryInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_tensor", &ResponseMemoryInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) + .def("get_output", &ResponseMemoryInterfaceProxy::get_tensor, py::arg("name"), py::return_value_policy::move) + .def("set_output", &ResponseMemoryInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")); py::class_>(m, "ResponseMemoryProbs") .def(py::init<>(&ResponseMemoryProbsInterfaceProxy::init), py::arg("count"), py::arg("probs")) .def_readonly("count", &ResponseMemoryProbs::count) .def("get_tensors", &ResponseMemoryProbsInterfaceProxy::get_tensors, py::return_value_policy::move) - .def("set_tensors", &ResponseMemoryProbsInterfaceProxy::set_tensors) - .def("get_tensor", &ResponseMemoryProbsInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_tensor", &ResponseMemoryProbsInterfaceProxy::set_tensor) - .def("get_output", &ResponseMemoryProbsInterfaceProxy::get_tensor, py::return_value_policy::move) - .def("set_output", &ResponseMemoryProbsInterfaceProxy::set_tensor) + .def("set_tensors", &ResponseMemoryProbsInterfaceProxy::set_tensors, py::arg("tensors")) + .def("get_tensor", + &ResponseMemoryProbsInterfaceProxy::get_tensor, + py::arg("name"), + py::return_value_policy::move) + .def("set_tensor", &ResponseMemoryProbsInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) + .def("get_output", + &ResponseMemoryProbsInterfaceProxy::get_tensor, + py::arg("name"), + py::return_value_policy::move) + .def("set_output", &ResponseMemoryProbsInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")) .def_property( "probs", &ResponseMemoryProbsInterfaceProxy::get_probs, &ResponseMemoryProbsInterfaceProxy::set_probs); From def8a98e8433f835ca723f89205381d0451ccaad Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 09:11:11 -0800 Subject: [PATCH 31/85] Rempove debug print statements --- morpheus/_lib/src/messages/memory/inference_memory_fil.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index b37e2582fd..2d2e921de5 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -83,13 +83,11 @@ std::shared_ptr InferenceMemoryFILInterfaceProxy::init(cudf: pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL& self) { - std::cerr << "get_input__0\n"; return CupyUtil::tensor_to_cupy(self.get_input__0()); } void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL& self, pybind11::object cupy_values) { - std::cerr << "set_input__0\n"; self.set_input__0(CupyUtil::cupy_to_tensor(cupy_values)); } From 1a784f97937e905ee70c2d34da0650f37baec756 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 14:57:59 -0800 Subject: [PATCH 32/85] Add util function for logging deprecated message classes, and unittest to verify that ResponseMemoryProbs and MultiResponseProbsMessage are deprecated --- morpheus/utils/logger.py | 7 +++++ tests/test_messages.py | 61 ++++++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/morpheus/utils/logger.py b/morpheus/utils/logger.py index 4ff6a37e83..fa49970bfd 100644 --- a/morpheus/utils/logger.py +++ b/morpheus/utils/logger.py @@ -218,3 +218,10 @@ def deprecated_stage_warning(logger, cls, name): "It has no effect and acts as a pass through stage."), cls.__name__, name) + + +def deprecated_message_warning(logger, cls, new_cls_name): + """ + Log a warning about a deprecated message + """ + logger.warning(("The '%s' message has been deprecated in favor of '%s'. "), cls.__name__, new_cls_name) diff --git a/tests/test_messages.py b/tests/test_messages.py index f36beec3b0..2a5200bea5 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -15,6 +15,7 @@ import importlib import os +from unittest import mock import cupy as cp import pytest @@ -25,7 +26,14 @@ from morpheus.messages import tensor_memory -def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, no_cpp_class: bool, args: tuple): +@mock.patch('morpheus.utils.logger.deprecated_message_warning') +def check_message(python_type: type, + cpp_type: type, + should_be_cpp: bool, + no_cpp_class: bool, + args: tuple, + is_deprecated: bool, + mock_deprecated_fn: mock.MagicMock): instance = python_type(*args) # Check that the C++ type is set in the class @@ -39,72 +47,89 @@ def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, no_cpp expected_class = cpp_type if should_be_cpp and cpp_type is not None else python_type assert instance.__class__ is expected_class + if is_deprecated: + mock_deprecated_fn.assert_called_once() + else: + mock_deprecated_fn.assert_not_called() + def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): - check_message(messages.MessageMeta, _messages.MessageMeta, should_be_cpp, no_cpp_class, (None, )) + check_message(messages.MessageMeta, _messages.MessageMeta, should_be_cpp, no_cpp_class, (None, ), False) # UserMessageMeta doesn't contain a C++ impl, so we should # always received the python impl - check_message(messages.UserMessageMeta, None, should_be_cpp, no_cpp_class, (None, None)) + check_message(messages.UserMessageMeta, None, should_be_cpp, no_cpp_class, (None, None), False) - check_message(messages.MultiMessage, _messages.MultiMessage, should_be_cpp, no_cpp_class, (None, 0, 1)) + check_message(messages.MultiMessage, _messages.MultiMessage, should_be_cpp, no_cpp_class, (None, 0, 1), False) - check_message(tensor_memory.TensorMemory, _messages.TensorMemory, should_be_cpp, no_cpp_class, (1, )) - check_message(messages.InferenceMemory, _messages.InferenceMemory, should_be_cpp, no_cpp_class, (1, )) + check_message(tensor_memory.TensorMemory, _messages.TensorMemory, should_be_cpp, no_cpp_class, (1, ), False) + check_message(messages.InferenceMemory, _messages.InferenceMemory, should_be_cpp, no_cpp_class, (1, ), False) cp_array = cp.zeros((1, 2)) check_message(messages.InferenceMemoryNLP, _messages.InferenceMemoryNLP, should_be_cpp, - no_cpp_class, (1, cp_array, cp_array, cp_array)) + no_cpp_class, (1, cp_array, cp_array, cp_array), + False) check_message(messages.InferenceMemoryFIL, _messages.InferenceMemoryFIL, should_be_cpp, - no_cpp_class, (1, cp_array, cp_array)) + no_cpp_class, (1, cp_array, cp_array), + False) # No C++ impl, should always get the Python class - check_message(messages.InferenceMemoryAE, None, should_be_cpp, no_cpp_class, (1, cp_array, cp_array)) + check_message(messages.InferenceMemoryAE, None, should_be_cpp, no_cpp_class, (1, cp_array, cp_array), False) check_message(messages.MultiInferenceMessage, _messages.MultiInferenceMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) check_message(messages.MultiInferenceNLPMessage, _messages.MultiInferenceNLPMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) check_message(messages.MultiInferenceFILMessage, _messages.MultiInferenceFILMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) - check_message(messages.ResponseMemory, _messages.ResponseMemory, should_be_cpp, no_cpp_class, (1, )) + check_message(messages.ResponseMemory, _messages.ResponseMemory, should_be_cpp, no_cpp_class, (1, ), False) check_message(messages.ResponseMemoryProbs, _messages.ResponseMemoryProbs, should_be_cpp, - no_cpp_class, (1, cp_array)) + no_cpp_class, (1, cp_array), + True) # No C++ impl - check_message(messages.ResponseMemoryAE, None, should_be_cpp, no_cpp_class, (1, cp_array)) + check_message(messages.ResponseMemoryAE, None, should_be_cpp, no_cpp_class, (1, cp_array), False) check_message(messages.MultiResponseMessage, _messages.MultiResponseMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) check_message(messages.MultiResponseProbsMessage, _messages.MultiResponseProbsMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + True) # No C++ impl - check_message(messages.MultiResponseAEMessage, None, should_be_cpp, no_cpp_class, (None, 0, 1, None, 0, 1, '')) + check_message(messages.MultiResponseAEMessage, + None, + should_be_cpp, + no_cpp_class, (None, 0, 1, None, 0, 1, ''), + False) def test_constructor_cpp(config): From 1482be19180ee5613eded343ae3a73aeb2f371cd Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 15:21:51 -0800 Subject: [PATCH 33/85] fix --- morpheus/utils/logger.py | 4 ++-- tests/test_messages.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/morpheus/utils/logger.py b/morpheus/utils/logger.py index fa49970bfd..7a961941ca 100644 --- a/morpheus/utils/logger.py +++ b/morpheus/utils/logger.py @@ -220,8 +220,8 @@ def deprecated_stage_warning(logger, cls, name): name) -def deprecated_message_warning(logger, cls, new_cls_name): +def deprecated_message_warning(logger, cls, new_cls): """ Log a warning about a deprecated message """ - logger.warning(("The '%s' message has been deprecated in favor of '%s'. "), cls.__name__, new_cls_name) + logger.warning(("The '%s' message has been deprecated in favor of '%s'. "), cls.__name__, new_cls.__name__) diff --git a/tests/test_messages.py b/tests/test_messages.py index 2a5200bea5..fda52d80f3 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -26,7 +26,7 @@ from morpheus.messages import tensor_memory -@mock.patch('morpheus.utils.logger.deprecated_message_warning') +@mock.patch('morpheus.messages.multi_response_message.deprecated_message_warning') def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, From 9c1f997e19066b17c5f39813264b7c5216129f86 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 15:22:39 -0800 Subject: [PATCH 34/85] Deprecate ResponseMemory and MultiResponseProbsMessage, MultiResponseAEMessage now inherits from MultiResponseMessage --- morpheus/messages/multi_response_message.py | 28 ++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 3e6df2a453..811cf581d6 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -14,6 +14,7 @@ # limitations under the License. import dataclasses +import logging import typing import cupy as cp @@ -23,6 +24,9 @@ from morpheus.messages.message_meta import MessageMeta from morpheus.messages.multi_message import MultiMessage from morpheus.messages.tensor_memory import TensorMemory +from morpheus.utils.logger import deprecated_message_warning + +logger = logging.getLogger(__name__) @dataclasses.dataclass(init=False) @@ -85,6 +89,10 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) + def __new__(cls, *args, **kwargs): + deprecated_message_warning(logger, cls, ResponseMemory) + return super(ResponseMemory, cls).__new__(cls, *args, **kwargs) + def __init__(self, count, probs): super().__init__(count, tensors={'probs': probs}) @@ -265,9 +273,13 @@ def probs(self): return self.get_output("probs") + def __new__(cls, *args, **kwargs): + deprecated_message_warning(logger, cls, MultiResponseMessage) + return super(MultiResponseMessage, cls).__new__(cls, *args, **kwargs) + @dataclasses.dataclass -class MultiResponseAEMessage(MultiResponseProbsMessage, cpp_class=None): +class MultiResponseAEMessage(MultiResponseMessage, cpp_class=None): """ A stronger typed version of `MultiResponseProbsMessage` that is used for inference workloads that return a probability array. Helps ensure the proper outputs are set and eases debugging. @@ -275,6 +287,20 @@ class MultiResponseAEMessage(MultiResponseProbsMessage, cpp_class=None): user_id: str = None + @property + def probs(self): + """ + Probabilities of prediction. + + Returns + ------- + cupy.ndarray + probabilities + + """ + + return self.get_output("probs") + def copy_ranges(self, ranges): """ Perform a copy of the current message, dataframe and tensors for the given `ranges` of rows. From 13bcbf30d16cf9a3d83b23c7d21ff7485636ed48 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 15:54:36 -0800 Subject: [PATCH 35/85] wip --- .../morpheus/stages/add_classification.hpp | 14 +++++----- .../include/morpheus/stages/add_scores.hpp | 15 ++++++----- .../morpheus/stages/triton_inference.hpp | 6 ++--- morpheus/_lib/src/python_modules/stages.cpp | 6 +++-- .../_lib/src/stages/add_classification.cpp | 14 ++++++---- morpheus/_lib/src/stages/add_scores.cpp | 15 +++++++---- morpheus/_lib/src/stages/triton_inference.cpp | 27 +++++++++---------- 7 files changed, 56 insertions(+), 41 deletions(-) diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index 666e811725..6fd37165ee 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -48,12 +48,11 @@ namespace morpheus { * @brief Add detected classifications to each message. Classification labels based on probabilities calculated in * inference stage. Label indexes will be looked up in the idx2label property. */ -class AddClassificationsStage : public mrc::pymrc::PythonNode, - std::shared_ptr> +class AddClassificationsStage + : public mrc::pymrc::PythonNode, std::shared_ptr> { public: - using base_t = - mrc::pymrc::PythonNode, std::shared_ptr>; + using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; using typename base_t::sink_type_t; using typename base_t::source_type_t; using typename base_t::subscribe_fn_t; @@ -67,7 +66,8 @@ class AddClassificationsStage : public mrc::pymrc::PythonNode idx2label); + std::map idx2label, + std::string tensor_name = "probs"); private: /** @@ -78,6 +78,7 @@ class AddClassificationsStage : public mrc::pymrc::PythonNode m_idx2label; + std::string m_tensor_name; }; /****** AddClassificationStageInterfaceProxy******************/ @@ -102,7 +103,8 @@ struct AddClassificationStageInterfaceProxy const std::string& name, float threshold, std::size_t num_class_labels, - std::map idx2label); + std::map idx2label, + std::string tensor_name); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index 86ebca7aba..73eb631598 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -48,12 +48,11 @@ namespace morpheus { * @brief Add probability scores to each message. Score labels based on probabilities calculated in inference stage. * Label indexes will be looked up in the idx2label property. */ -class AddScoresStage : public mrc::pymrc::PythonNode, - std::shared_ptr> +class AddScoresStage + : public mrc::pymrc::PythonNode, std::shared_ptr> { public: - using base_t = - mrc::pymrc::PythonNode, std::shared_ptr>; + using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; using typename base_t::sink_type_t; using typename base_t::source_type_t; using typename base_t::subscribe_fn_t; @@ -64,7 +63,9 @@ class AddScoresStage : public mrc::pymrc::PythonNode idx2label); + AddScoresStage(std::size_t num_class_labels, + std::map idx2label, + std::string tensor_name = "probs"); /** * TODO(Documentation) @@ -73,6 +74,7 @@ class AddScoresStage : public mrc::pymrc::PythonNode m_idx2label; + std::string m_tensor_name; }; /****** AddScoresStageInterfaceProxy******************/ @@ -93,7 +95,8 @@ struct AddScoresStageInterfaceProxy static std::shared_ptr> init(mrc::segment::Builder& builder, const std::string& name, std::size_t num_class_labels, - std::map idx2label); + std::map idx2label, + std::string tensor_name); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp index 74bd1f803d..b8ba23be4a 100644 --- a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp +++ b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp @@ -51,11 +51,11 @@ namespace morpheus { * This class specifies which inference implementation category (Ex: NLP/FIL) is needed for inferencing. */ class InferenceClientStage - : public mrc::pymrc::PythonNode, std::shared_ptr> + : public mrc::pymrc::PythonNode, std::shared_ptr> { public: using base_t = - mrc::pymrc::PythonNode, std::shared_ptr>; + mrc::pymrc::PythonNode, std::shared_ptr>; using typename base_t::sink_type_t; using typename base_t::source_type_t; using typename base_t::subscribe_fn_t; @@ -85,7 +85,7 @@ class InferenceClientStage /** * TODO(Documentation) */ - bool is_default_grpc_port(std::string &server_url); + bool is_default_grpc_port(std::string& server_url); /** * TODO(Documentation) diff --git a/morpheus/_lib/src/python_modules/stages.cpp b/morpheus/_lib/src/python_modules/stages.cpp index 521b3cea54..b71dcf9a0d 100644 --- a/morpheus/_lib/src/python_modules/stages.cpp +++ b/morpheus/_lib/src/python_modules/stages.cpp @@ -68,7 +68,8 @@ PYBIND11_MODULE(stages, m) py::arg("name"), py::arg("threshold"), py::arg("num_class_labels"), - py::arg("idx2label")); + py::arg("idx2label"), + py::arg("tensor_name") = "probs"); py::class_, mrc::segment::ObjectProperties, @@ -77,7 +78,8 @@ PYBIND11_MODULE(stages, m) py::arg("builder"), py::arg("name"), py::arg("num_class_labels"), - py::arg("idx2label")); + py::arg("idx2label"), + py::arg("tensor_name") = "probs"); py::class_, mrc::segment::ObjectProperties, diff --git a/morpheus/_lib/src/stages/add_classification.cpp b/morpheus/_lib/src/stages/add_classification.cpp index de0394ce4e..31e0b252db 100644 --- a/morpheus/_lib/src/stages/add_classification.cpp +++ b/morpheus/_lib/src/stages/add_classification.cpp @@ -46,11 +46,13 @@ namespace morpheus { // ************ AddClassificationStage **************************** // AddClassificationsStage::AddClassificationsStage(float threshold, std::size_t num_class_labels, - std::map idx2label) : + std::map idx2label, + std::string tensor_name) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), m_threshold(threshold), m_num_class_labels(num_class_labels), - m_idx2label(std::move(idx2label)) + m_idx2label(std::move(idx2label)), + m_tensor_name(std::move(tensor_name)) { CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; } @@ -60,7 +62,7 @@ AddClassificationsStage::subscribe_fn_t AddClassificationsStage::build_operator( return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_probs(); + const auto& probs = x->get_output(m_tensor_name); const auto& shape = probs.get_shape(); // Depending on the input the stride is given in bytes or elements, convert to elements @@ -121,9 +123,11 @@ std::shared_ptr> AddClassification const std::string& name, float threshold, std::size_t num_class_labels, - std::map idx2label) + std::map idx2label, + std::string tensor_name) { - auto stage = builder.construct_object(name, threshold, num_class_labels, idx2label); + auto stage = builder.construct_object( + name, threshold, num_class_labels, std::move(idx2label), std::move(tensor_name)); return stage; } diff --git a/morpheus/_lib/src/stages/add_scores.cpp b/morpheus/_lib/src/stages/add_scores.cpp index 487faa3b2f..c49d70e7ed 100644 --- a/morpheus/_lib/src/stages/add_scores.cpp +++ b/morpheus/_lib/src/stages/add_scores.cpp @@ -39,10 +39,13 @@ namespace morpheus { // Component public implementations // ************ AddScoresStage **************************** // -AddScoresStage::AddScoresStage(std::size_t num_class_labels, std::map idx2label) : +AddScoresStage::AddScoresStage(std::size_t num_class_labels, + std::map idx2label, + std::string tensor_name) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), m_num_class_labels(num_class_labels), - m_idx2label(std::move(idx2label)) + m_idx2label(std::move(idx2label)), + m_tensor_name(std::move(tensor_name)) { CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; } @@ -52,7 +55,7 @@ AddScoresStage::subscribe_fn_t AddScoresStage::build_operator() return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_probs(); + const auto& probs = x->get_output(m_tensor_name); const auto& shape = probs.get_shape(); CHECK(shape.size() == 2 && shape[1] == m_num_class_labels) @@ -90,8 +93,10 @@ std::shared_ptr> AddScoresStageInterfacePro mrc::segment::Builder& builder, const std::string& name, std::size_t num_class_labels, - std::map idx2label) + std::map idx2label, + std::string tensor_name) { - return builder.construct_object(name, num_class_labels, std::move(idx2label)); + return builder.construct_object( + name, num_class_labels, std::move(idx2label), std::move(tensor_name)); } } // namespace morpheus diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 903183ce6d..e0eba68e38 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -17,12 +17,12 @@ #include "morpheus/stages/triton_inference.hpp" -#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory -#include "morpheus/messages/memory/response_memory_probs.hpp" // for ResponseMemoryProbs -#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t -#include "morpheus/messages/multi_response_probs.hpp" -#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo -#include "morpheus/objects/dtype.hpp" // for DType +#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory +#include "morpheus/messages/memory/response_memory.hpp" // for ResponseMemory +#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t +#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage +#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo +#include "morpheus/objects/dtype.hpp" // for DType #include "morpheus/objects/tensor.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject #include "morpheus/objects/triton_in_out.hpp" @@ -126,14 +126,13 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() } // This will be the final output of all mini-batches - auto response_mem_probs = - std::make_shared(x->mess_count, std::move(response_outputs)); - auto response = std::make_shared(x->meta, - x->mess_offset, - x->mess_count, - std::move(response_mem_probs), - 0, - response_mem_probs->count); + auto response_mem_probs = std::make_shared(x->mess_count, std::move(response_outputs)); + auto response = std::make_shared(x->meta, + x->mess_offset, + x->mess_count, + std::move(response_mem_probs), + 0, + response_mem_probs->count); std::unique_ptr> host_seq_ids{nullptr}; if (needs_seq_ids) From 14a6b0c057ec38a689b0f5870d9dd6059d75bd9a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:08:40 -0800 Subject: [PATCH 36/85] WIP AddClass and AddScores now accept the output name as a constructor arg --- .../morpheus/stages/add_classification.hpp | 8 ++++--- .../include/morpheus/stages/add_scores.hpp | 8 ++++--- morpheus/_lib/src/python_modules/stages.cpp | 4 ++-- .../_lib/src/stages/add_classification.cpp | 10 ++++----- morpheus/_lib/src/stages/add_scores.cpp | 10 ++++----- .../postprocess/add_classifications_stage.py | 22 ++++++++++++++----- .../stages/postprocess/add_scores_stage.py | 19 +++++++++++----- tests/test_add_classifications_stage.py | 4 ++-- tests/test_add_scores_stage.py | 4 ++-- 9 files changed, 56 insertions(+), 33 deletions(-) diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index 6fd37165ee..15edfebcf6 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -63,11 +63,12 @@ class AddClassificationsStage * @param threshold : Threshold to consider true/false for each class * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map + * @param output_name : Name of the output tensor containing probabilities */ AddClassificationsStage(float threshold, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name = "probs"); + std::string output_name = "probs"); private: /** @@ -78,7 +79,7 @@ class AddClassificationsStage float m_threshold; std::size_t m_num_class_labels; std::map m_idx2label; - std::string m_tensor_name; + std::string m_output_name; }; /****** AddClassificationStageInterfaceProxy******************/ @@ -96,6 +97,7 @@ struct AddClassificationStageInterfaceProxy * @param threshold : Threshold to consider true/false for each class * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map + * @param output_name : Name of the output tensor containing probabilities * @return std::shared_ptr> */ static std::shared_ptr> init( @@ -104,7 +106,7 @@ struct AddClassificationStageInterfaceProxy float threshold, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name); + std::string output_name); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index 73eb631598..d5ed8e9340 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -62,10 +62,11 @@ class AddScoresStage * * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map + * @param output_name : Name of the output tensor containing probabilities */ AddScoresStage(std::size_t num_class_labels, std::map idx2label, - std::string tensor_name = "probs"); + std::string output_name = "probs"); /** * TODO(Documentation) @@ -74,7 +75,7 @@ class AddScoresStage std::size_t m_num_class_labels; std::map m_idx2label; - std::string m_tensor_name; + std::string m_output_name; }; /****** AddScoresStageInterfaceProxy******************/ @@ -90,13 +91,14 @@ struct AddScoresStageInterfaceProxy * @param name : Name of a stage reference * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map + * @param output_name : Name of the output tensor containing probabilities * @return std::shared_ptr> */ static std::shared_ptr> init(mrc::segment::Builder& builder, const std::string& name, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name); + std::string output_name); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/src/python_modules/stages.cpp b/morpheus/_lib/src/python_modules/stages.cpp index b71dcf9a0d..6f5fd680c9 100644 --- a/morpheus/_lib/src/python_modules/stages.cpp +++ b/morpheus/_lib/src/python_modules/stages.cpp @@ -69,7 +69,7 @@ PYBIND11_MODULE(stages, m) py::arg("threshold"), py::arg("num_class_labels"), py::arg("idx2label"), - py::arg("tensor_name") = "probs"); + py::arg("output_name") = "probs"); py::class_, mrc::segment::ObjectProperties, @@ -79,7 +79,7 @@ PYBIND11_MODULE(stages, m) py::arg("name"), py::arg("num_class_labels"), py::arg("idx2label"), - py::arg("tensor_name") = "probs"); + py::arg("output_name") = "probs"); py::class_, mrc::segment::ObjectProperties, diff --git a/morpheus/_lib/src/stages/add_classification.cpp b/morpheus/_lib/src/stages/add_classification.cpp index 31e0b252db..0bd1b764a5 100644 --- a/morpheus/_lib/src/stages/add_classification.cpp +++ b/morpheus/_lib/src/stages/add_classification.cpp @@ -47,12 +47,12 @@ namespace morpheus { AddClassificationsStage::AddClassificationsStage(float threshold, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name) : + std::string output_name) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), m_threshold(threshold), m_num_class_labels(num_class_labels), m_idx2label(std::move(idx2label)), - m_tensor_name(std::move(tensor_name)) + m_output_name(std::move(output_name)) { CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; } @@ -62,7 +62,7 @@ AddClassificationsStage::subscribe_fn_t AddClassificationsStage::build_operator( return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_output(m_tensor_name); + const auto& probs = x->get_output(m_output_name); const auto& shape = probs.get_shape(); // Depending on the input the stride is given in bytes or elements, convert to elements @@ -124,10 +124,10 @@ std::shared_ptr> AddClassification float threshold, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name) + std::string output_name) { auto stage = builder.construct_object( - name, threshold, num_class_labels, std::move(idx2label), std::move(tensor_name)); + name, threshold, num_class_labels, std::move(idx2label), std::move(output_name)); return stage; } diff --git a/morpheus/_lib/src/stages/add_scores.cpp b/morpheus/_lib/src/stages/add_scores.cpp index c49d70e7ed..510542f747 100644 --- a/morpheus/_lib/src/stages/add_scores.cpp +++ b/morpheus/_lib/src/stages/add_scores.cpp @@ -41,11 +41,11 @@ namespace morpheus { // ************ AddScoresStage **************************** // AddScoresStage::AddScoresStage(std::size_t num_class_labels, std::map idx2label, - std::string tensor_name) : + std::string output_name) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), m_num_class_labels(num_class_labels), m_idx2label(std::move(idx2label)), - m_tensor_name(std::move(tensor_name)) + m_output_name(std::move(output_name)) { CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; } @@ -55,7 +55,7 @@ AddScoresStage::subscribe_fn_t AddScoresStage::build_operator() return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_output(m_tensor_name); + const auto& probs = x->get_output(m_output_name); const auto& shape = probs.get_shape(); CHECK(shape.size() == 2 && shape[1] == m_num_class_labels) @@ -94,9 +94,9 @@ std::shared_ptr> AddScoresStageInterfacePro const std::string& name, std::size_t num_class_labels, std::map idx2label, - std::string tensor_name) + std::string output_name) { return builder.construct_object( - name, num_class_labels, std::move(idx2label), std::move(tensor_name)); + name, num_class_labels, std::move(idx2label), std::move(output_name)); } } // namespace morpheus diff --git a/morpheus/stages/postprocess/add_classifications_stage.py b/morpheus/stages/postprocess/add_classifications_stage.py index a03eb9a9e5..653eb11006 100644 --- a/morpheus/stages/postprocess/add_classifications_stage.py +++ b/morpheus/stages/postprocess/add_classifications_stage.py @@ -47,15 +47,22 @@ class AddClassificationsStage(SinglePortStage): the Config.class_labels property. prefix : str, default = "" Prefix to add to each label. Allows adding labels different from the `Config.class_labels` property. - + output_name : str, default = "probs" + Name of the output tensor containing the probabilities """ - def __init__(self, c: Config, threshold: float = 0.5, labels: typing.List[str] = None, prefix: str = ""): + def __init__(self, + c: Config, + threshold: float = 0.5, + labels: typing.List[str] = None, + prefix: str = "", + output_name: str = "probs"): super().__init__(c) self._feature_length = c.feature_length self._threshold = threshold self._prefix = prefix + self._output_name = output_name self._class_labels = c.class_labels self._labels = labels if labels is not None and len(labels) > 0 else c.class_labels @@ -96,11 +103,13 @@ def supports_cpp_node(self): def _add_labels(self, x: MultiResponseProbsMessage): - if (x.probs.shape[1] != len(self._class_labels)): + probs = x.get_output(self._output_name) + + if (probs.shape[1] != len(self._class_labels)): raise RuntimeError("Label count does not match output of model. Label count: {}, Model output: {}".format( - len(self._class_labels), x.probs.shape[1])) + len(self._class_labels), probs.shape[1])) - probs_np = (x.probs > self._threshold).astype(bool).get() + probs_np = (probs > self._threshold).astype(bool).get() for i, label in self._idx2label.items(): x.set_meta(label, probs_np[:, i].tolist()) @@ -117,7 +126,8 @@ def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> Strea self.unique_name, self._threshold, len(self._class_labels), - self._idx2label) + self._idx2label, + output_name=self._output_name) else: stream = builder.make_node(self.unique_name, self._add_labels) diff --git a/morpheus/stages/postprocess/add_scores_stage.py b/morpheus/stages/postprocess/add_scores_stage.py index 811fcadf20..15a167c235 100644 --- a/morpheus/stages/postprocess/add_scores_stage.py +++ b/morpheus/stages/postprocess/add_scores_stage.py @@ -48,17 +48,21 @@ class AddScoresStage(SinglePortStage): Prefix to add to each label. Allows adding labels different from the `Config.class_labels` property. probs_type : `morpheus._lib.common.TypeId`, default = "float32" Datatype of the scores columns. + output_name : str, default = "probs" + Name of the output tensor containing the probabilities """ def __init__(self, c: Config, labels: typing.List[str] = None, prefix: str = "", - probs_type: TypeId = TypeId.FLOAT32): + probs_type: TypeId = TypeId.FLOAT32, + output_name: str = "probs"): super().__init__(c) self._feature_length = c.feature_length self._prefix = prefix + self._output_name = output_name self._class_labels = c.class_labels self._labels = labels if labels is not None and len(labels) > 0 else c.class_labels @@ -98,12 +102,13 @@ def supports_cpp_node(self): return True def _add_labels(self, x: MultiResponseProbsMessage): + probs = x.get_output(self._output_name) - if (x.probs.shape[1] != len(self._class_labels)): + if (probs.shape[1] != len(self._class_labels)): raise RuntimeError("Label count does not match output of model. Label count: {}, Model output: {}".format( - len(self._class_labels), x.probs.shape[1])) + len(self._class_labels), probs.shape[1])) - probs_np = x.probs.get() + probs_np = probs.get() for i, label in self._idx2label.items(): x.set_meta(label, probs_np[:, i].tolist()) @@ -115,7 +120,11 @@ def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> Strea # Convert the messages to rows of strings if self._build_cpp_node(): - stream = _stages.AddScoresStage(builder, self.unique_name, len(self._class_labels), self._idx2label) + stream = _stages.AddScoresStage(builder, + self.unique_name, + len(self._class_labels), + self._idx2label, + output_name=self._output_name) else: stream = builder.make_node(self.unique_name, self._add_labels) diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py index 1d9b499a0b..11941ef430 100755 --- a/tests/test_add_classifications_stage.py +++ b/tests/test_add_classifications_stage.py @@ -47,7 +47,7 @@ def test_constructor(config): @pytest.mark.use_python def test_add_labels(config): mock_message = mock.MagicMock() - mock_message.probs = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) + mock_message.get_output.return_value = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) config.class_labels = ['frogs', 'lizards', 'toads'] @@ -61,7 +61,7 @@ def test_add_labels(config): ]) wrong_shape = mock.MagicMock() - wrong_shape.probs = cp.array([[0.1, 0.5], [0.2, 0.6]]) + wrong_shape.get_output.return_value = cp.array([[0.1, 0.5], [0.2, 0.6]]) pytest.raises(RuntimeError, ac._add_labels, wrong_shape) diff --git a/tests/test_add_scores_stage.py b/tests/test_add_scores_stage.py index bfae5a1c48..903b4b407c 100755 --- a/tests/test_add_scores_stage.py +++ b/tests/test_add_scores_stage.py @@ -48,7 +48,7 @@ def test_constructor(config): @pytest.mark.use_python def test_add_labels(config): mock_message = mock.MagicMock() - mock_message.probs = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) + mock_message.get_output.return_value = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) config.class_labels = ['frogs', 'lizards', 'toads'] @@ -62,7 +62,7 @@ def test_add_labels(config): ]) wrong_shape = mock.MagicMock() - wrong_shape.probs = cp.array([[0.1, 0.5], [0.2, 0.6]]) + mock_message.get_output.return_value = cp.array([[0.1, 0.5], [0.2, 0.6]]) pytest.raises(RuntimeError, a._add_labels, wrong_shape) From 76e55ae1874a6585a603094fd9188ec4b57c7117 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:45:19 -0800 Subject: [PATCH 37/85] WIP --- .../inference/auto_encoder_inference_stage.py | 4 +- .../inference/identity_inference_stage.py | 10 +-- morpheus/stages/inference/inference_stage.py | 70 +++++++++---------- .../inference/pytorch_inference_stage.py | 3 +- .../inference/triton_inference_stage.py | 19 +++-- .../postprocess/add_classifications_stage.py | 10 +-- .../stages/postprocess/add_scores_stage.py | 8 +-- .../postprocess/generate_viz_frames_stage.py | 15 ++-- .../stages/postprocess/ml_flow_drift_stage.py | 7 +- tests/test_abp.py | 14 ++-- tests/test_add_classifications_stage_pipe.py | 6 +- tests/test_add_scores_stage_pipe.py | 6 +- tests/test_dfp.py | 10 +-- tests/test_filter_detections_stage.py | 8 +-- tests/test_filter_detections_stage_pipe.py | 6 +- tests/test_inference_stage.py | 34 ++++----- tests/test_preallocation_pipe.py | 6 +- tests/test_tensor_memory.py | 2 +- tests/utils.py | 12 ++-- 19 files changed, 124 insertions(+), 126 deletions(-) diff --git a/morpheus/stages/inference/auto_encoder_inference_stage.py b/morpheus/stages/inference/auto_encoder_inference_stage.py index 8bffdec821..43c5f9df0d 100644 --- a/morpheus/stages/inference/auto_encoder_inference_stage.py +++ b/morpheus/stages/inference/auto_encoder_inference_stage.py @@ -161,7 +161,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re # Two scenarios: if (inf.mess_count == inf.count): # In message and out message have same count. Just use probs as is - probs[inf.offset:inf.count + inf.offset, :] = res.probs + probs[inf.offset:inf.count + inf.offset, :] = res.get_output('probs') else: assert inf.count == res.count @@ -169,7 +169,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re # Out message has more reponses, so we have to do key based blending of probs for i, idx in enumerate(mess_ids): - probs[idx, :] = cp.maximum(probs[idx, :], res.probs[i, :]) + probs[idx, :] = cp.maximum(probs[idx, :], res.get_output('probs')[i, :]) return MultiResponseAEMessage(meta=inf.meta, mess_offset=inf.mess_offset, diff --git a/morpheus/stages/inference/identity_inference_stage.py b/morpheus/stages/inference/identity_inference_stage.py index 902a5547be..22f0fb2a4a 100644 --- a/morpheus/stages/inference/identity_inference_stage.py +++ b/morpheus/stages/inference/identity_inference_stage.py @@ -21,7 +21,6 @@ from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage from morpheus.messages import ResponseMemory -from morpheus.messages import ResponseMemoryProbs from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -52,10 +51,11 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMem def tmp(b: MultiInferenceMessage, f): - f(ResponseMemoryProbs( - count=b.count, - probs=cp.zeros((b.count, self._seq_length), dtype=cp.float32), - )) + f( + ResponseMemory( + count=b.count, + tensors={'probs': cp.zeros((b.count, self._seq_length), dtype=cp.float32)}, + )) # Call directly instead of enqueing tmp(batch, cb) diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index eb2bb0554e..b392df470d 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -23,9 +23,8 @@ from morpheus.config import Config from morpheus.messages import MultiInferenceMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.messages import ResponseMemory -from morpheus.messages import ResponseMemoryProbs from morpheus.pipeline.multi_message_stage import MultiMessageStage from morpheus.pipeline.stream_pair import StreamPair from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -63,7 +62,7 @@ def stop(self): pass - def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseProbsMessage: + def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseMessage: """ Create initial inference response message with result values initialized to zero. Results will be set in message as each inference mini-batch is processed. @@ -75,21 +74,21 @@ def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseProbsMe Returns ------- - `morpheus.pipeline.messages.MultiResponseProbsMessage` + `morpheus.pipeline.messages.MultiResponseMessage` Response message with probabilities calculated from inference results. """ dims = self.calc_output_dims(x) output_dims = (x.mess_count, *dims[1:]) - memory = ResponseMemoryProbs(count=output_dims[0], probs=cp.zeros(output_dims)) + memory = ResponseMemory(count=output_dims[0], tensors={'probs': cp.zeros(output_dims)}) - output_message = MultiResponseProbsMessage(meta=x.meta, - mess_offset=x.mess_offset, - mess_count=x.mess_count, - memory=memory, - offset=0, - count=memory.count) + output_message = MultiResponseMessage(meta=x.meta, + mess_offset=x.mess_offset, + mess_count=x.mess_count, + memory=memory, + offset=0, + count=memory.count) return output_message @abstractmethod @@ -217,7 +216,7 @@ def _get_cpp_inference_node(self, builder: mrc.Builder) -> mrc.SegmentObject: def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: stream = input_stream[0] - out_type = MultiResponseProbsMessage + out_type = MultiResponseMessage def py_inference_fn(obs: mrc.Observable, sub: mrc.Subscriber): @@ -243,7 +242,7 @@ def on_next(x: MultiInferenceMessage): completion_future = mrc.Future() - def set_output_fut(resp: ResponseMemoryProbs, b, batch_future: mrc.Future): + def set_output_fut(resp: ResponseMemory, b, batch_future: mrc.Future): nonlocal outstanding_requests m = self._convert_one_response(memory, b, resp) @@ -344,9 +343,9 @@ def _split_batches(x: MultiInferenceMessage, max_batch_size: int) -> typing.List return out_resp @staticmethod - def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[ResponseMemoryProbs]]): + def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[ResponseMemory]]): - # Convert a MultiResponse into a MultiResponseProbsMessage + # Convert a MultiInferenceMessage into a MultiResponseMessage in_message = x[0] out_message = x[1] @@ -356,8 +355,7 @@ def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing total_mess_count = reduce(lambda y, z: y + z.mess_count, in_message, 0) # Create a message data to store the entire list - memory = ResponseMemoryProbs(count=total_mess_count, - probs=cp.zeros((total_mess_count, out_message[0].probs.shape[1]))) + probs = cp.zeros((total_mess_count, out_message[0].get_output('probs').shape[1])) saved_meta = in_message[0].meta saved_offset = in_message[0].mess_offset @@ -374,7 +372,7 @@ def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing # Two scenarios: if (inf.mess_count == inf.count): # In message and out message have same count. Just use probs as is - memory.probs[inf.offset:inf.offset + inf.count, :] = res.probs + probs[inf.offset:inf.offset + inf.count, :] = res.get_output('probs') else: assert inf.count == res.count @@ -382,21 +380,23 @@ def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing # Out message has more reponses, so we have to do key based blending of probs for i, idx in enumerate(mess_ids): - memory.probs[idx, :] = cp.maximum(memory.probs[idx, :], res.probs[i, :]) + probs[idx, :] = cp.maximum(probs[idx, :], res.get_output('probs')[i, :]) saved_count += inf.mess_count assert saved_count == total_mess_count, "Did not set every element in output" - return MultiResponseProbsMessage(meta=saved_meta, - mess_offset=saved_offset, - mess_count=saved_count, - memory=memory, - offset=0, - count=memory.count) + memory = ResponseMemory(count=total_mess_count, tensors={'probs': probs}) + + return MultiResponseMessage(meta=saved_meta, + mess_offset=saved_offset, + mess_count=saved_count, + memory=memory, + offset=0, + count=memory.count) @staticmethod - def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, res: ResponseMemoryProbs): + def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, res: ResponseMemory): # Make sure we have a continuous list # assert inf.mess_offset == saved_offset + saved_count @@ -410,7 +410,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re assert seq_count == res.count # In message and out message have same count. Just use probs as is - probs[seq_offset:seq_offset + seq_count, :] = res.probs + probs[seq_offset:seq_offset + seq_count, :] = res.get_output('probs') else: assert inf.count == res.count @@ -418,11 +418,11 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re # Out message has more reponses, so we have to do key based blending of probs for i, idx in enumerate(mess_ids): - probs[idx, :] = cp.maximum(probs[idx, :], res.probs[i, :]) - - return MultiResponseProbsMessage(meta=inf.meta, - mess_offset=inf.mess_offset, - mess_count=inf.mess_count, - memory=memory, - offset=inf.offset, - count=inf.count) + probs[idx, :] = cp.maximum(probs[idx, :], res.get_output('probs')[i, :]) + + return MultiResponseMessage(meta=inf.meta, + mess_offset=inf.mess_offset, + mess_count=inf.mess_count, + memory=memory, + offset=inf.offset, + count=inf.count) diff --git a/morpheus/stages/inference/pytorch_inference_stage.py b/morpheus/stages/inference/pytorch_inference_stage.py index d787fbba34..a7b9398ee5 100644 --- a/morpheus/stages/inference/pytorch_inference_stage.py +++ b/morpheus/stages/inference/pytorch_inference_stage.py @@ -22,7 +22,6 @@ from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage from morpheus.messages import ResponseMemory -from morpheus.messages import ResponseMemoryProbs from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -102,7 +101,7 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMem if (len(probs_cp.shape) == 1): probs_cp = cp.expand_dims(probs_cp, axis=1) - response_mem = ResponseMemoryProbs(count=batch.count, probs=probs_cp) + response_mem = ResponseMemory(count=batch.count, tensors={'probs': probs_cp}) # Return the response cb(response_mem) diff --git a/morpheus/stages/inference/triton_inference_stage.py b/morpheus/stages/inference/triton_inference_stage.py index a15a39d92f..362b51f4b4 100644 --- a/morpheus/stages/inference/triton_inference_stage.py +++ b/morpheus/stages/inference/triton_inference_stage.py @@ -36,7 +36,6 @@ from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage from morpheus.messages import ResponseMemory -from morpheus.messages import ResponseMemoryProbs from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -686,16 +685,16 @@ def default_inout_mapping(cls) -> typing.Dict[str, str]: "output": "probs", } - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemoryProbs: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: output = {output.mapped_name: result.as_numpy(output.name) for output in self._outputs.values()} if (self._needs_logits): output = {key: 1.0 / (1.0 + np.exp(-val)) for key, val in output.items()} - mem = ResponseMemoryProbs( + mem = ResponseMemory( count=output["probs"].shape[0], - probs=cp.array(output["probs"]), # For now, only support one output + tensors={'probs': cp.array(output["probs"])} # For now, only support one output ) return mem @@ -761,7 +760,7 @@ def default_inout_mapping(cls) -> typing.Dict[str, str]: "output__0": "probs", } - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemoryProbs: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: output = {output.mapped_name: result.as_numpy(output.name) for output in self._outputs.values()} @@ -769,9 +768,9 @@ def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.Inf if (len(val.shape) == 1): output[key] = np.expand_dims(val, 1) - mem = ResponseMemoryProbs( + mem = ResponseMemory( count=output["probs"].shape[0], - probs=cp.array(output["probs"]), # For now, only support one output + tensors={'probs': cp.array(output["probs"])} # For now, only support one output ) return mem @@ -838,7 +837,7 @@ def supports_cpp_node(cls): # Enable support by default return False - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemoryProbs: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: import torch @@ -860,9 +859,9 @@ def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.Inf ae_scores = cp.asarray(net_loss) ae_scores = ae_scores.reshape((batch.count, 1)) - mem = ResponseMemoryProbs( + mem = ResponseMemory( count=batch.count, - probs=ae_scores, # For now, only support one output + tensors={'probs': ae_scores} # For now, only support one output ) return mem diff --git a/morpheus/stages/postprocess/add_classifications_stage.py b/morpheus/stages/postprocess/add_classifications_stage.py index 653eb11006..b1651090c7 100644 --- a/morpheus/stages/postprocess/add_classifications_stage.py +++ b/morpheus/stages/postprocess/add_classifications_stage.py @@ -20,7 +20,7 @@ from morpheus._lib.common import TypeId from morpheus.cli.register_stage import register_stage from morpheus.config import Config -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair @@ -91,17 +91,17 @@ def accepted_types(self) -> typing.Tuple: Returns ------- - typing.Tuple[`morpheus.pipeline.messages.MultiResponseProbsMessage`, ] + typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ] Accepted input types. """ - return (MultiResponseProbsMessage, ) + return (MultiResponseMessage, ) def supports_cpp_node(self): # Enable support by default return True - def _add_labels(self, x: MultiResponseProbsMessage): + def _add_labels(self, x: MultiResponseMessage): probs = x.get_output(self._output_name) @@ -134,4 +134,4 @@ def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> Strea builder.make_edge(input_stream[0], stream) # Return input unchanged - return stream, MultiResponseProbsMessage + return stream, MultiResponseMessage diff --git a/morpheus/stages/postprocess/add_scores_stage.py b/morpheus/stages/postprocess/add_scores_stage.py index 15a167c235..0eda6dc7f4 100644 --- a/morpheus/stages/postprocess/add_scores_stage.py +++ b/morpheus/stages/postprocess/add_scores_stage.py @@ -21,7 +21,7 @@ from morpheus._lib.common import TypeId from morpheus.cli.register_stage import register_stage from morpheus.config import Config -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair @@ -91,17 +91,17 @@ def accepted_types(self) -> typing.Tuple: Returns ------- - typing.Tuple[`morpheus.pipeline.messages.MultiResponseProbsMessage`, ] + typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ] Accepted input types. """ - return (MultiResponseProbsMessage, ) + return (MultiResponseMessage, ) def supports_cpp_node(self): # Enable support by default return True - def _add_labels(self, x: MultiResponseProbsMessage): + def _add_labels(self, x: MultiResponseMessage): probs = x.get_output(self._output_name) if (probs.shape[1] != len(self._class_labels)): diff --git a/morpheus/stages/postprocess/generate_viz_frames_stage.py b/morpheus/stages/postprocess/generate_viz_frames_stage.py index 2756400504..a99163c38e 100644 --- a/morpheus/stages/postprocess/generate_viz_frames_stage.py +++ b/morpheus/stages/postprocess/generate_viz_frames_stage.py @@ -32,7 +32,7 @@ from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.config import PipelineModes -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair from morpheus.utils.producer_consumer_queue import AsyncIOProducerConsumerQueue @@ -79,11 +79,11 @@ def accepted_types(self) -> typing.Tuple: Returns ------- - typing.Tuple[morpheus.pipeline.messages.MultiResponseProbsMessage, ] + typing.Tuple[morpheus.pipeline.messages.MultiResponseMessage, ] Accepted input types """ - return (MultiResponseProbsMessage, ) + return (MultiResponseMessage, ) def supports_cpp_node(self): return False @@ -106,7 +106,7 @@ def round_to_sec(x): """ return int(round(x / 1000.0) * 1000) - def _to_vis_df(self, x: MultiResponseProbsMessage): + def _to_vis_df(self, x: MultiResponseMessage): idx2label = { 0: 'address', @@ -131,8 +131,9 @@ def indent_data(y: str): df["data"] = df["data"].apply(indent_data) - pass_thresh = (x.probs >= 0.5).any(axis=1) - max_arg = x.probs.argmax(axis=1) + probs = x.get_output('probs') + pass_thresh = (probs >= 0.5).any(axis=1) + max_arg = probs.argmax(axis=1) condlist = [pass_thresh] @@ -240,7 +241,7 @@ def _build_single(self, seg: mrc.Builder, input_stream: StreamPair) -> StreamPai def node_fn(input, output): - def write_batch(x: MultiResponseProbsMessage): + def write_batch(x: MultiResponseMessage): sink = pa.BufferOutputStream() diff --git a/morpheus/stages/postprocess/ml_flow_drift_stage.py b/morpheus/stages/postprocess/ml_flow_drift_stage.py index 664950a90d..92f7454fbd 100644 --- a/morpheus/stages/postprocess/ml_flow_drift_stage.py +++ b/morpheus/stages/postprocess/ml_flow_drift_stage.py @@ -24,7 +24,6 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiResponseMessage -from morpheus.messages import MultiResponseProbsMessage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.stream_pair import StreamPair @@ -123,15 +122,15 @@ def accepted_types(self) -> typing.Tuple: Accepted input types. """ - return (MultiResponseProbsMessage, ) + return (MultiResponseMessage, ) def supports_cpp_node(self): return False - def _calc_drift(self, x: MultiResponseProbsMessage): + def _calc_drift(self, x: MultiResponseMessage): # All probs in a batch will be calculated - shifted = cp.abs(x.probs - 0.5) + 0.5 + shifted = cp.abs(x.get_output('probs') - 0.5) + 0.5 # Make sure the labels list is long enough for x in range(len(self._labels), shifted.shape[1]): diff --git a/tests/test_abp.py b/tests/test_abp.py index 8ef9bf9e74..571a7c7947 100755 --- a/tests/test_abp.py +++ b/tests/test_abp.py @@ -26,7 +26,7 @@ from morpheus.messages import MessageMeta from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage @@ -223,17 +223,17 @@ def async_infer(callback=None, **k): pipe.add_stage( TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='test:0000', force_convert_inputs=True)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 3 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 3 pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf")) pipe.add_stage(AddClassificationsStage(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 4 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 4 pipe.add_stage( ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 5 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 5 pipe.add_stage(SerializeStage(config)) @@ -283,17 +283,17 @@ def test_abp_multi_segment_cpp(config, tmp_path): TritonInferenceStage(config, model_name='abp-nvsmi-xgb', server_url='localhost:8001', force_convert_inputs=True)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 3 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 3 pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf")) pipe.add_stage(AddClassificationsStage(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 4 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 4 pipe.add_stage( ValidationStage(config, val_file_name=val_file_name, results_file_name=results_file_name, rel_tol=0.05)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 5 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 5 pipe.add_stage(SerializeStage(config)) diff --git a/tests/test_add_classifications_stage_pipe.py b/tests/test_add_classifications_stage_pipe.py index 50263a9b4e..85c42f9498 100755 --- a/tests/test_add_classifications_stage_pipe.py +++ b/tests/test_add_classifications_stage_pipe.py @@ -22,7 +22,7 @@ from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.stages.input.file_source_stage import FileSourceStage from morpheus.stages.output.write_to_file_stage import WriteToFileStage @@ -86,9 +86,9 @@ def test_add_classifications_stage_multi_segment_pipe(config, tmp_path): pipe.add_stage(DeserializeStage(config)) pipe.add_segment_boundary(MultiMessage) pipe.add_stage(ConvMsg(config, input_file)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(AddClassificationsStage(config, threshold=threshold)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) diff --git a/tests/test_add_scores_stage_pipe.py b/tests/test_add_scores_stage_pipe.py index c3589c9e39..9ec5b2fe26 100755 --- a/tests/test_add_scores_stage_pipe.py +++ b/tests/test_add_scores_stage_pipe.py @@ -22,7 +22,7 @@ from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.stages.input.file_source_stage import FileSourceStage from morpheus.stages.output.write_to_file_stage import WriteToFileStage @@ -94,9 +94,9 @@ def test_add_scores_stage_multi_segment_pipe(config, tmp_path, repeat): pipe.add_stage(DeserializeStage(config)) pipe.add_segment_boundary(MultiMessage) pipe.add_stage(ConvMsg(config, columns=get_column_names_from_file(input_file))) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(AddScoresStage(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) diff --git a/tests/test_dfp.py b/tests/test_dfp.py index 08698ea731..f23b53a1fd 100755 --- a/tests/test_dfp.py +++ b/tests/test_dfp.py @@ -27,7 +27,7 @@ from morpheus.messages.message_meta import UserMessageMeta from morpheus.messages.multi_ae_message import MultiAEMessage from morpheus.messages.multi_inference_message import MultiInferenceMessage -from morpheus.messages.multi_response_message import MultiResponseProbsMessage +from morpheus.messages.multi_response_message import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.inference.auto_encoder_inference_stage import AutoEncoderInferenceStage @@ -264,9 +264,9 @@ def test_dfp_user123_multi_segment(mock_ae, config, tmp_path): pipe.add_stage(preprocess_ae_stage.PreprocessAEStage(config)) pipe.add_segment_boundary(MultiInferenceMessage) # Boundary 3 pipe.add_stage(AutoEncoderInferenceStage(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 4 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 4 pipe.add_stage(AddScoresStage(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 5 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 5 pipe.add_stage( TimeSeriesStage(config, resolution="1m", @@ -275,7 +275,7 @@ def test_dfp_user123_multi_segment(mock_ae, config, tmp_path): cold_end=False, filter_percent=90.0, zscore_threshold=8.0)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 6 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 6 pipe.add_stage(MonitorStage(config, description="Inference Rate", smoothing=0.001, unit="inf")) pipe.add_stage( ValidationStage(config, @@ -284,7 +284,7 @@ def test_dfp_user123_multi_segment(mock_ae, config, tmp_path): index_col="_index_", exclude=("event_dt", "zscore"), rel_tol=0.1)) - pipe.add_segment_boundary(MultiResponseProbsMessage) # Boundary 7 + pipe.add_segment_boundary(MultiResponseMessage) # Boundary 7 pipe.add_stage(SerializeStage(config, include=[])) pipe.add_segment_boundary(MessageMeta) # Boundary 9 pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) diff --git a/tests/test_filter_detections_stage.py b/tests/test_filter_detections_stage.py index fae5d13ede..5e0e8ea14d 100755 --- a/tests/test_filter_detections_stage.py +++ b/tests/test_filter_detections_stage.py @@ -23,8 +23,8 @@ from morpheus._lib.common import FileTypes from morpheus._lib.common import FilterSource from morpheus.io.deserializers import read_file_to_df -from morpheus.messages import MultiResponseProbsMessage -from morpheus.messages import ResponseMemoryProbs +from morpheus.messages import MultiResponseMessage +from morpheus.messages import ResponseMemory from morpheus.messages.message_meta import MessageMeta from morpheus.stages.postprocess.filter_detections_stage import FilterDetectionsStage from utils import TEST_DIRS @@ -32,8 +32,8 @@ def _make_message(df, probs): df_ = df[0:len(probs)] - mem = ResponseMemoryProbs(count=len(df_), probs=probs) - return MultiResponseProbsMessage(MessageMeta(df_), 0, len(df_), mem, 0, len(df_)) + mem = ResponseMemory(count=len(df_), tensors={'probs': probs}) + return MultiResponseMessage(MessageMeta(df_), 0, len(df_), mem, 0, len(df_)) def test_constructor(config): diff --git a/tests/test_filter_detections_stage_pipe.py b/tests/test_filter_detections_stage_pipe.py index 3068d0a4d8..e2a2a42c67 100755 --- a/tests/test_filter_detections_stage_pipe.py +++ b/tests/test_filter_detections_stage_pipe.py @@ -21,7 +21,7 @@ from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.stages.input.file_source_stage import FileSourceStage from morpheus.stages.output.write_to_file_stage import WriteToFileStage @@ -84,9 +84,9 @@ def _test_filter_detections_stage_multi_segment_pipe(config, tmp_path, copy=True pipe.add_stage(DeserializeStage(config)) pipe.add_segment_boundary(MultiMessage) pipe.add_stage(ConvMsg(config)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(FilterDetectionsStage(config, threshold=threshold, copy=copy)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(SerializeStage(config)) pipe.add_segment_boundary(MessageMeta) pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 3670d3b210..8ecab9894e 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -20,7 +20,7 @@ import cupy as cp import pytest -from morpheus.messages import ResponseMemoryProbs +from morpheus.messages import ResponseMemory from morpheus.stages.inference import inference_stage from utils import IW @@ -39,7 +39,7 @@ def _mk_message(count=1, mess_count=1, offset=0, mess_offset=0): m.offset = offset m.mess_offset = mess_offset m.mess_count = mess_count - m.probs = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) + m.set_output('probs', cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]])) m.seq_ids = cp.array([list(range(count)), list(range(count)), list(range(count))]) m.get_input.return_value = cp.array([[0, 1, 2], [0, 1, 2], [0, 1, 2]]) return m @@ -153,7 +153,7 @@ def test_py_inf_fn_on_next(mock_ops, mock_future, config): IW.process.assert_called_once() set_output_fut = IW.process.call_args[0][1] - set_output_fut(ResponseMemoryProbs(count=1, probs=cp.zeros((1, 2)))) + set_output_fut(ResponseMemory(count=1, tensors={'probs': cp.zeros((1, 2))})) mock_future.set_result.assert_called_once() @@ -231,32 +231,32 @@ def test_convert_response(config): mm2 = _mk_message(mess_offset=1) out_msg1 = _mk_message() - out_msg1.probs = cp.array([[0.1, 0.5, 0.8]]) + out_msg1.get_output.return_value = cp.array([[0.1, 0.5, 0.8]]) out_msg2 = _mk_message(mess_offset=1) - out_msg2.probs = cp.array([[0.1, 0.5, 0.8]]) + out_msg2.get_output.return_value = cp.array([[0.1, 0.5, 0.8]]) resp = inference_stage.InferenceStage._convert_response(([mm1, mm2], [out_msg1, out_msg2])) assert resp.meta == mm1.meta assert resp.mess_offset == 0 assert resp.mess_count == 2 - assert isinstance(resp.memory, ResponseMemoryProbs) + assert isinstance(resp.memory, ResponseMemory) assert resp.offset == 0 assert resp.count == 2 - assert resp.memory.probs.tolist() == [[0.1, 0.5, 0.8], [0, 0, 0]] + assert resp.memory.get_output('probs').tolist() == [[0.1, 0.5, 0.8], [0, 0, 0]] mm2.count = 2 - out_msg2.probs = cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]]) + out_msg2.get_output.return_value = cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]]) mm2.seq_ids = cp.array([[0], [1]]) out_msg2.count = 2 resp = inference_stage.InferenceStage._convert_response(([mm1, mm2], [out_msg1, out_msg2])) assert resp.meta == mm1.meta assert resp.mess_offset == 0 assert resp.mess_count == 2 - assert isinstance(resp.memory, ResponseMemoryProbs) + assert isinstance(resp.memory, ResponseMemory) assert resp.offset == 0 assert resp.count == 2 - assert resp.memory.probs.tolist() == [[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]] + assert resp.memory.get_output('probs').tolist() == [[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]] def test_convert_response_errors(): @@ -268,10 +268,10 @@ def test_convert_response_errors(): mm2 = _mk_message(mess_offset=12) out_msg1 = _mk_message() - out_msg1.probs = cp.array([[0.1, 0.5, 0.8]]) + out_msg1.get_output.return_value = cp.array([[0.1, 0.5, 0.8]]) out_msg2 = _mk_message(mess_offset=1) - out_msg2.probs = cp.array([[0.1, 0.5, 0.8]]) + out_msg2.get_output.return_value = cp.array([[0.1, 0.5, 0.8]]) pytest.raises(AssertionError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) @@ -294,10 +294,10 @@ def test_convert_response_errors(): @pytest.mark.use_python def test_convert_one_response(config): # Test first branch where `inf.mess_count == inf.count` - mem = ResponseMemoryProbs(1, probs=cp.zeros((1, 3))) + mem = ResponseMemory(1, tensors={'probs': cp.zeros((1, 3))}) inf = _mk_message() - res = ResponseMemoryProbs(count=1, probs=cp.array([[1, 2, 3]])) + res = ResponseMemory(count=1, tensors={'probs': cp.array([[1, 2, 3]])}) mpm = inference_stage.InferenceStage._convert_one_response(mem, inf, res) assert mpm.meta == inf.meta @@ -310,15 +310,15 @@ def test_convert_one_response(config): # Test for the second branch inf.mess_count = 2 inf.seq_ids = cp.array([[0], [1]]) - res = ResponseMemoryProbs(count=1, probs=cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])) + res = ResponseMemory(count=1, tensors={'probs': cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])}) - mem = ResponseMemoryProbs(1, probs=cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])) + mem = ResponseMemory(1, tensors={'probs': cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])}) mpm = inference_stage.InferenceStage._convert_one_response(mem, inf, res) assert mem.get_output('probs').tolist() == [[0.1, 0.6, 0.8], [5.6, 6.7, 9.2]] def test_convert_one_response_error(): - mem = ResponseMemoryProbs(1, probs=cp.zeros((1, 3))) + mem = ResponseMemory(1, tensors={'probs': cp.zeros((1, 3))}) inf = _mk_message(mess_count=2) res = _mk_message(count=2) diff --git a/tests/test_preallocation_pipe.py b/tests/test_preallocation_pipe.py index cc6d114b42..303932d471 100755 --- a/tests/test_preallocation_pipe.py +++ b/tests/test_preallocation_pipe.py @@ -25,7 +25,7 @@ from morpheus._lib.common import tyepid_to_numpy_str from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage +from morpheus.messages import MultiResponseMessage from morpheus.pipeline import LinearPipeline from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.stages.input.file_source_stage import FileSourceStage @@ -129,9 +129,9 @@ def test_preallocation_multi_segment_pipe(config, tmp_path, probs_type): pipe.add_segment_boundary(MultiMessage) pipe.add_stage( ConvMsg(config, columns=get_column_names_from_file(input_file), probs_type=tyepid_to_numpy_str(probs_type))) - (_, boundary_ingress) = pipe.add_segment_boundary(MultiResponseProbsMessage) + (_, boundary_ingress) = pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(CheckPreAlloc(config, probs_type=probs_type)) - pipe.add_segment_boundary(MultiResponseProbsMessage) + pipe.add_segment_boundary(MultiResponseMessage) pipe.add_stage(SerializeStage(config, include=["^{}$".format(c) for c in config.class_labels])) pipe.add_segment_boundary(MessageMeta) pipe.add_stage(WriteToFileStage(config, filename=out_file, overwrite=False)) diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index f1eda298f5..0f46daa418 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -129,7 +129,7 @@ def check_response_memory_probs_and_ae(cls): m = cls(count=count, probs=test_data) assert m.count == count compare_tensors(m.get_tensors(), {'probs': test_data}) - assert (m.probs == test_data).all() + assert (m.get_output('probs') == test_data).all() return m diff --git a/tests/utils.py b/tests/utils.py index 8a355732bc..114de1f3f5 100755 --- a/tests/utils.py +++ b/tests/utils.py @@ -30,8 +30,8 @@ from morpheus.io.deserializers import read_file_to_df from morpheus.io.serializers import df_to_csv from morpheus.messages import MultiMessage -from morpheus.messages import MultiResponseProbsMessage -from morpheus.messages import ResponseMemoryProbs +from morpheus.messages import MultiResponseMessage +from morpheus.messages import ResponseMemory from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.stages.inference import inference_stage @@ -56,7 +56,7 @@ def __init__(self, cur_file=__file__) -> None: @register_stage("unittest-conv-msg") class ConvMsg(SinglePortStage): """ - Simple test stage to convert a MultiMessage to a MultiResponseProbsMessage + Simple test stage to convert a MultiMessage to a MultiResponseMessage Basically a cheap replacement for running an inference stage. Setting `expected_data_file` to the path of a cav/json file will cause the probs array to be read from file. @@ -104,14 +104,14 @@ def _conv_message(self, m): else: probs = cp.array(df.values, dtype=self._probs_type, copy=True, order=self._order) - memory = ResponseMemoryProbs(count=len(probs), probs=probs) - return MultiResponseProbsMessage(m.meta, m.mess_offset, len(probs), memory, 0, len(probs)) + memory = ResponseMemory(count=len(probs), tensors={'probs': probs}) + return MultiResponseMessage(m.meta, m.mess_offset, len(probs), memory, 0, len(probs)) def _build_single(self, builder: mrc.Builder, input_stream): stream = builder.make_node(self.unique_name, self._conv_message) builder.make_edge(input_stream[0], stream) - return stream, MultiResponseProbsMessage + return stream, MultiResponseMessage class IW(inference_stage.InferenceWorker): From b8dd5b0a7a52bec6af5b48196da4949cc3bfc583 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:53:43 -0800 Subject: [PATCH 38/85] Replace MultiResponseProbsMessage from abp_nvsmi_detection example output --- examples/abp_nvsmi_detection/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 6e5bd8dc10..02e1820bc9 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -187,13 +187,13 @@ Added stage: Added stage: └─ morpheus.MultiMessage -> morpheus.MultiInferenceFILMessage Added stage: - └─ morpheus.MultiInferenceFILMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiInferenceFILMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MessageMeta + └─ morpheus.MultiResponseMessage -> morpheus.MessageMeta Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== From a8904dcdeb0838563eb956230c5109b6da878e77 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:54:05 -0800 Subject: [PATCH 39/85] Replace MultiResponseProbsMessage from nlp_si_detection example output --- examples/nlp_si_detection/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index d545355ffe..f94652293e 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -175,13 +175,13 @@ Added stage: Added stage: └─ morpheus.MultiMessage -> morpheus.MultiInferenceNLPMessage Added stage: - └─ morpheus.MultiInferenceNLPMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiInferenceNLPMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MessageMeta + └─ morpheus.MultiResponseMessage -> morpheus.MessageMeta Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta ====Building Pipeline Complete!==== From 59f9837198e0f974fdb721b6fee34c77478871e1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:56:32 -0800 Subject: [PATCH 40/85] Replace MultiResponseProbsMessage from rca example output --- examples/root_cause_analysis/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/root_cause_analysis/README.md b/examples/root_cause_analysis/README.md index 828f5e461a..000fc7b030 100644 --- a/examples/root_cause_analysis/README.md +++ b/examples/root_cause_analysis/README.md @@ -167,13 +167,13 @@ Added stage: Added stage: └─ morpheus.MultiMessage -> morpheus.MultiInferenceNLPMessage Added stage: - └─ morpheus.MultiInferenceNLPMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiInferenceNLPMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MultiResponseProbsMessage + └─ morpheus.MultiResponseMessage -> morpheus.MultiResponseMessage Added stage: - └─ morpheus.MultiResponseProbsMessage -> morpheus.MessageMeta + └─ morpheus.MultiResponseMessage -> morpheus.MessageMeta Added stage: └─ morpheus.MessageMeta -> morpheus.MessageMeta Inference rate[Complete]: 473 inf [00:01, 340.43 inf/s] From 35275e7346a848ed9cb5c8b1fcb02b5d8f8689c2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 14 Feb 2023 16:58:05 -0800 Subject: [PATCH 41/85] WIP --- examples/digital_fingerprinting/starter/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/digital_fingerprinting/starter/README.md b/examples/digital_fingerprinting/starter/README.md index 0b511314b9..271d69103f 100644 --- a/examples/digital_fingerprinting/starter/README.md +++ b/examples/digital_fingerprinting/starter/README.md @@ -135,7 +135,7 @@ The `PreprocessAEStage` is responsible for creating a Morpheus message that cont **Postprocessing stage** - The DFP pipeline uses the `AddScoresStage` for postprocessing to add anomaly scores and zscores from previous inference stage with matching labels. -**Serialize stage** - `SerializeStage` is used to convert `MultiResponseProbsMessage` from previous stage to a `MessageMeta` to make it suitable for output (i.e. write to file or Kafka). +**Serialize stage** - `SerializeStage` is used to convert `MultiResponseMessage` from previous stage to a `MessageMeta` to make it suitable for output (i.e. write to file or Kafka). **Write stage** - `WriteToFileStage` writes input data with inference results to an output file path. From 0bdeefb3ec28d4269b08afaa9b0fe2a32f4c6f31 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 21 Feb 2023 19:48:40 -0700 Subject: [PATCH 42/85] Testing removing some pybind includes to see if IWYU flags it. --- .../_lib/include/morpheus/messages/memory/inference_memory.hpp | 2 -- .../_lib/include/morpheus/messages/memory/response_memory.hpp | 2 -- morpheus/_lib/src/messages/memory/response_memory.cpp | 2 -- 3 files changed, 6 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index fc303089ee..fca2efebe0 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -19,8 +19,6 @@ #include "morpheus/messages/memory/tensor_memory.hpp" -#include // for object - #include // for size_t #include #include diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index 93bec4f6b1..0dcdba5bd4 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -20,8 +20,6 @@ #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include // for object - #include // for size_t #include #include diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 954331739f..77f65181dc 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -19,8 +19,6 @@ #include "morpheus/utilities/cupy_util.hpp" -#include // for key_error & object - #include #include // for move From a90e4c5f3560814556738111c2d080ace45694b3 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 22 Feb 2023 12:33:12 -0800 Subject: [PATCH 43/85] Fix tensor_map_t got moved --- morpheus/_lib/src/stages/triton_inference.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index ec4a21451e..97502c79e3 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -27,6 +27,7 @@ #include "morpheus/objects/tensor.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject #include "morpheus/objects/triton_in_out.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for tensor_map_t #include "morpheus/utilities/matx_util.hpp" #include "morpheus/utilities/stage_util.hpp" // for foreach_map #include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR @@ -58,7 +59,7 @@ namespace { using namespace morpheus; -using tensor_map_t = TensorMemory::tensor_map_t; +using tensor_map_t = CupyUtil::tensor_map_t; using buffer_map_t = std::map>; // Component-private free functions. From 59c03cb016c5e5bc051a2f18f94ec09f6d614e51 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 11:57:23 -0800 Subject: [PATCH 44/85] Fix bug where tensors was static --- morpheus/_lib/src/stages/triton_inference.cpp | 5 +++-- morpheus/messages/tensor_memory.py | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 3ce3f89e2f..760e220187 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -27,8 +27,9 @@ #include "morpheus/objects/tensor.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject #include "morpheus/objects/triton_in_out.hpp" +#include "morpheus/utilities/cupy_util.hpp" #include "morpheus/utilities/matx_util.hpp" -#include "morpheus/utilities/stage_util.hpp" // for foreach_map +#include "morpheus/utilities/stage_util.hpp" // for foreach_map #include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR #include "morpheus/utilities/tensor_util.hpp" // for get_elem_count @@ -58,7 +59,7 @@ namespace { using namespace morpheus; -using tensor_map_t = TensorMemory::tensor_map_t; +using tensor_map_t = CupyUtil::tensor_map_t; using buffer_map_t = std::map>; // Component-private free functions. diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 3c1675eb27..1d07697df2 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -43,8 +43,12 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): compare=False, hash=False) - def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = {}): + def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = None): self.count = count + + if tensors is None: + tensors = {} + self._tensors = tensors def get_tensors(self): From 2e993cdff095cd8beb55f8c99cd8b6a06aa516ac Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 12:55:42 -0800 Subject: [PATCH 45/85] Set default value for tensors to none --- .../messages/memory/inference_memory.hpp | 2 +- .../morpheus/messages/memory/response_memory.hpp | 2 +- .../morpheus/messages/memory/tensor_memory.hpp | 2 +- .../src/messages/memory/inference_memory.cpp | 16 +++++++++++++--- .../_lib/src/messages/memory/response_memory.cpp | 15 +++++++++++++-- .../_lib/src/messages/memory/tensor_memory.cpp | 13 +++++++++++-- morpheus/_lib/src/python_modules/messages.cpp | 12 +++--------- 7 files changed, 43 insertions(+), 19 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index fca2efebe0..bb7f6b61eb 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -77,7 +77,7 @@ struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, pybind11::object& tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index 0dcdba5bd4..a81a7cf6e6 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -80,7 +80,7 @@ struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy * @param cupy_tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, pybind11::object& tensors); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 8c2ea9ec5a..21c13494f1 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -100,7 +100,7 @@ struct TensorMemoryInterfaceProxy * @param tensors : Map of string on to cupy arrays * @return std::shared_ptr */ - static std::shared_ptr init(std::size_t count, CupyUtil::py_tensor_map_t tensors); + static std::shared_ptr init(std::size_t count, pybind11::object& tensors); /** * @brief Get the count object diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 85e5f7bc67..382280cb84 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -17,6 +17,9 @@ #include "morpheus/messages/memory/inference_memory.hpp" +#include // for object +#include + #include #include // for move @@ -34,10 +37,17 @@ bool InferenceMemory::has_input(const std::string& name) const } /****** InferenceMemoryInterfaceProxy *************************/ -std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t count, - CupyUtil::py_tensor_map_t tensors) +std::shared_ptr InferenceMemoryInterfaceProxy::init(std::size_t count, pybind11::object& tensors) { - return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); + if (tensors.is_none()) + { + return std::make_shared(count); + } + else + { + return std::make_shared( + count, std::move(CupyUtil::cupy_to_tensors(tensors.cast()))); + } } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 77f65181dc..00ae4c0118 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -19,6 +19,9 @@ #include "morpheus/utilities/cupy_util.hpp" +#include // for object +#include + #include #include // for move @@ -35,9 +38,17 @@ bool ResponseMemory::has_output(const std::string& name) const } /****** ResponseMemoryInterfaceProxy *************************/ -std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, CupyUtil::py_tensor_map_t tensors) +std::shared_ptr ResponseMemoryInterfaceProxy::init(std::size_t count, pybind11::object& tensors) { - return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); + if (tensors.is_none()) + { + return std::make_shared(count); + } + else + { + return std::make_shared( + count, std::move(CupyUtil::cupy_to_tensors(tensors.cast()))); + } } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 6449953b14..75528ed427 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -20,6 +20,7 @@ #include "morpheus/utilities/cupy_util.hpp" #include // for key_error & object +#include #include #include @@ -49,9 +50,17 @@ CupyUtil::tensor_map_t TensorMemory::copy_tensor_ranges(const std::vector TensorMemoryInterfaceProxy::init(std::size_t count, CupyUtil::py_tensor_map_t tensors) +std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, pybind11::object& tensors) { - return std::make_shared(count, std::move(CupyUtil::cupy_to_tensors(tensors))); + if (tensors.is_none()) + { + return std::make_shared(count); + } + else + { + return std::make_shared( + count, std::move(CupyUtil::cupy_to_tensors(tensors.cast()))); + } } std::size_t TensorMemoryInterfaceProxy::get_count(TensorMemory& self) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index df8afc532a..5a441ffd13 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -158,9 +158,7 @@ PYBIND11_MODULE(messages, m) .def("get_meta_list", &MultiMessageInterfaceProxy::get_meta_list, py::return_value_policy::move); py::class_>(m, "TensorMemory") - .def(py::init<>(&TensorMemoryInterfaceProxy::init), - py::arg("count"), - py::arg("tensors") = CupyUtil::py_tensor_map_t()) + .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = py::none()) .def_readonly("count", &TensorMemory::count) .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors, py::arg("tensors")) @@ -168,9 +166,7 @@ PYBIND11_MODULE(messages, m) .def("set_tensor", &TensorMemoryInterfaceProxy::set_tensor, py::arg("name"), py::arg("tensor")); py::class_>(m, "InferenceMemory") - .def(py::init<>(&InferenceMemoryInterfaceProxy::init), - py::arg("count"), - py::arg("tensors") = CupyUtil::py_tensor_map_t()) + .def(py::init<>(&InferenceMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = py::none()) .def_readonly("count", &InferenceMemory::count) .def("get_tensors", &InferenceMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) .def("set_tensors", &InferenceMemoryInterfaceProxy::set_tensors, py::arg("tensors")) @@ -263,9 +259,7 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("count", &MultiInferenceFILMessageInterfaceProxy::count); py::class_>(m, "ResponseMemory") - .def(py::init<>(&ResponseMemoryInterfaceProxy::init), - py::arg("count"), - py::arg("tensors") = CupyUtil::py_tensor_map_t()) + .def(py::init<>(&ResponseMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = py::none()) .def_readonly("count", &ResponseMemory::count) .def("get_tensors", &ResponseMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) .def("set_tensors", &ResponseMemoryInterfaceProxy::set_tensors, py::arg("tensors")) From c70d3bf961d4cb0fc3967f6f1921afd2f5885bbb Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 13:01:45 -0800 Subject: [PATCH 46/85] Remove out of date dataclass --- morpheus/messages/tensor_memory.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 1d07697df2..b5b42824ef 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -38,11 +38,6 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ count: int - tensors: typing.Dict[str, cp.ndarray] = dataclasses.field(default_factory=dict, - repr=False, - compare=False, - hash=False) - def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = None): self.count = count From 94bbe6bb74cc4146236dd466615cbe7e4fe50584 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 13:07:30 -0800 Subject: [PATCH 47/85] Remove get_tensor_object --- .../morpheus/messages/memory/tensor_memory.hpp | 9 --------- morpheus/_lib/src/messages/memory/tensor_memory.cpp | 12 +++--------- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 21c13494f1..9966be0b44 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -126,15 +126,6 @@ struct TensorMemoryInterfaceProxy */ static void set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors); - /** - * @brief Get the output tensor - * - * @param self - * @param name - * @return const TensorObject& - */ - static const TensorObject& get_tensor_object(TensorMemory& self, const std::string& name); - /** * @brief Get the tensor object identified by `name` * diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 75528ed427..a505042fc5 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -78,20 +78,14 @@ void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, CupyUtil::py_te self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); } -const TensorObject& TensorMemoryInterfaceProxy::get_tensor_object(TensorMemory& self, const std::string& name) +pybind11::object TensorMemoryInterfaceProxy::get_tensor(TensorMemory& self, const std::string name) { - const auto tensor_itr = self.tensors.find(name); - if (tensor_itr == self.tensors.end()) + if (!self.has_tensor(name)) { throw pybind11::key_error{}; } - return tensor_itr->second; -} - -pybind11::object TensorMemoryInterfaceProxy::get_tensor(TensorMemory& self, const std::string name) -{ - return CupyUtil::tensor_to_cupy(TensorMemoryInterfaceProxy::get_tensor_object(self, name)); + return CupyUtil::tensor_to_cupy(self.tensors[name]); } void TensorMemoryInterfaceProxy::set_tensor(TensorMemory& self, From b9eb78abe8b7c3b6cfba66bab8383c2255c9934a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 13:57:59 -0800 Subject: [PATCH 48/85] Fix missed merge conflict --- .../morpheus/messages/memory/response_memory_probs.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index 1ae8d56f45..afb43fd17b 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -57,11 +57,7 @@ class ResponseMemoryProbs : public ResponseMemory * @param count * @param tensors */ -<<<<<<< HEAD ResponseMemoryProbs(size_t count, CupyUtil::tensor_map_t&& tensors); -======= - ResponseMemoryProbs(size_t count, tensor_map_t&& tensors); ->>>>>>> branch-23.03 /** * @brief Returns the tensor named 'probs', throws a `std::runtime_error` if it does not exist From 8380a830303cb9c33df4a1654e513b6c602c236a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 15:00:14 -0800 Subject: [PATCH 49/85] Check tensor lengths match the count, use moves for tensor objects --- .../messages/memory/inference_memory_fil.hpp | 6 +-- .../messages/memory/inference_memory_nlp.hpp | 8 ++-- .../messages/memory/response_memory_probs.hpp | 4 +- .../messages/memory/tensor_memory.hpp | 36 ++++++++++++++++ .../messages/memory/inference_memory_fil.cpp | 14 +++---- .../messages/memory/inference_memory_nlp.cpp | 24 +++++------ .../messages/memory/response_memory_probs.cpp | 8 ++-- .../src/messages/memory/tensor_memory.cpp | 42 +++++++++++++++++-- morpheus/_lib/src/stages/preprocess_fil.cpp | 3 +- 9 files changed, 109 insertions(+), 36 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp index 9f27dfe73e..02f50e832b 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp @@ -52,7 +52,7 @@ class InferenceMemoryFIL : public InferenceMemory * @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more * inference inputs than messages (i.e., if some messages get broken into multiple inference requests) */ - InferenceMemoryFIL(size_t count, TensorObject input__0, TensorObject seq_ids); + InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids); /** * @brief Returns the 'input__0' tensor, throws a `std::runtime_error` if it does not exist @@ -77,7 +77,7 @@ class InferenceMemoryFIL : public InferenceMemory * @throw std::runtime_error * @throw std::runtime_error */ - void set_input__0(TensorObject input_ids); + void set_input__0(TensorObject&& input_ids); /** * @brief Sets a tensor named 'seq_ids' @@ -85,7 +85,7 @@ class InferenceMemoryFIL : public InferenceMemory * @param seq_ids * @throw std::runtime_error */ - void set_seq_ids(TensorObject seq_ids); + void set_seq_ids(TensorObject&& seq_ids); }; /****** InferenceMemoryFILInterfaceProxy *************************/ diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp index abf138ab2b..954f2b229b 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp @@ -52,7 +52,7 @@ class InferenceMemoryNLP : public InferenceMemory * @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more inference inputs than messages (i.e., if some messages get broken into multiple inference requests) */ - InferenceMemoryNLP(std::size_t count, TensorObject input_ids, TensorObject input_mask, TensorObject seq_ids); + InferenceMemoryNLP(std::size_t count, TensorObject&& input_ids, TensorObject&& input_mask, TensorObject&& seq_ids); /** * @brief Get the input ids object @@ -80,21 +80,21 @@ class InferenceMemoryNLP : public InferenceMemory * * @param input_ids */ - void set_input_ids(TensorObject input_ids); + void set_input_ids(TensorObject&& input_ids); /** * @brief Set the input mask object * * @param input_mask */ - void set_input_mask(TensorObject input_mask); + void set_input_mask(TensorObject&& input_mask); /** * @brief Set the seq ids object * * @param seq_ids */ - void set_seq_ids(TensorObject seq_ids); + void set_seq_ids(TensorObject&& seq_ids); }; /****** InferenceMemoryNLPInterfaceProxy********************/ diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index afb43fd17b..2fb45cf44d 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -50,7 +50,7 @@ class ResponseMemoryProbs : public ResponseMemory * @param count * @param probs */ - ResponseMemoryProbs(size_t count, TensorObject probs); + ResponseMemoryProbs(size_t count, TensorObject&& probs); /** * @brief Construct a new Response Memory Probs object * @@ -71,7 +71,7 @@ class ResponseMemoryProbs : public ResponseMemory * * @param probs */ - void set_probs(TensorObject probs); + void set_probs(TensorObject&& probs); }; /****** ResponseMemoryProbsInterfaceProxy*******************/ diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 9966be0b44..46887f22d7 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -84,6 +84,42 @@ class TensorMemory */ CupyUtil::tensor_map_t copy_tensor_ranges(const std::vector>& ranges, size_t num_selected_rows) const; + + /** + * @brief Set the tensor object identified by `name` + * + * @param name + * @param tensor + * @throws std::length_error If the number of rows in `tensor` does not match `count`. + */ + void set_tensor(const std::string& name, TensorObject&& tensor); + + /** + * @brief Set the tensors object + * + * @param tensors + * @throws std::length_error If the number of rows in the `tensors` do not match `count`. + */ + void set_tensors(CupyUtil::tensor_map_t&& tensors); + + protected: + /** + * @brief Checks if the number of rows in `tensor` matches count + * + * @param tensor + * @throws std::length_error If the number of rows in `tensor` do not match `count`. + */ + void check_tensor_length(const TensorObject& tensor); + + /** + * @brief Checks each tesnor in `tensors` verifying that the number of rows matches count + * + * @param tensor + * @throws std::length_error If the number of rows in the `tensors` do not match `count`. + * + * @param tensors + */ + void check_tensors_length(const CupyUtil::tensor_map_t& tensors); }; /****** TensorMemoryInterfaceProxy *************************/ diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index 2d2e921de5..b3a1029300 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -33,11 +33,11 @@ namespace morpheus { /****** Component public implementations *******************/ /****** InferenceMemoryFIL****************************************/ -InferenceMemoryFIL::InferenceMemoryFIL(size_t count, TensorObject input__0, TensorObject seq_ids) : +InferenceMemoryFIL::InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids) : InferenceMemory(count) { - this->tensors["input__0"] = std::move(input__0); - this->tensors["seq_ids"] = std::move(seq_ids); + set_tensor("input__0", std::move(input__0)); + set_tensor("seq_ids", std::move(seq_ids)); } const TensorObject& InferenceMemoryFIL::get_input__0() const @@ -51,9 +51,9 @@ const TensorObject& InferenceMemoryFIL::get_input__0() const return found->second; } -void InferenceMemoryFIL::set_input__0(TensorObject input__0) +void InferenceMemoryFIL::set_input__0(TensorObject&& input__0) { - this->tensors["input__0"] = std::move(input__0); + set_tensor("input__0", std::move(input__0)); } const TensorObject& InferenceMemoryFIL::get_seq_ids() const @@ -67,9 +67,9 @@ const TensorObject& InferenceMemoryFIL::get_seq_ids() const return found->second; } -void InferenceMemoryFIL::set_seq_ids(TensorObject seq_ids) +void InferenceMemoryFIL::set_seq_ids(TensorObject&& seq_ids) { - this->tensors["seq_ids"] = std::move(seq_ids); + set_tensor("seq_ids", std::move(seq_ids)); } /****** InferenceMemoryFILInterfaceProxy *************************/ std::shared_ptr InferenceMemoryFILInterfaceProxy::init(cudf::size_type count, diff --git a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp index 3c2e7107c2..538334d34a 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp @@ -33,14 +33,14 @@ namespace morpheus { /****** Component public implementations *******************/ /****** InferenceMemoryNLP ****************************************/ InferenceMemoryNLP::InferenceMemoryNLP(std::size_t count, - TensorObject input_ids, - TensorObject input_mask, - TensorObject seq_ids) : + TensorObject&& input_ids, + TensorObject&& input_mask, + TensorObject&& seq_ids) : InferenceMemory(count) { - this->tensors["input_ids"] = std::move(input_ids); - this->tensors["input_mask"] = std::move(input_mask); - this->tensors["seq_ids"] = std::move(seq_ids); + set_tensor("input_ids", std::move(input_ids)); + set_tensor("input_mask", std::move(input_mask)); + set_tensor("seq_ids", std::move(seq_ids)); } const TensorObject& InferenceMemoryNLP::get_input_ids() const @@ -54,9 +54,9 @@ const TensorObject& InferenceMemoryNLP::get_input_ids() const return found->second; } -void InferenceMemoryNLP::set_input_ids(TensorObject input_ids) +void InferenceMemoryNLP::set_input_ids(TensorObject&& input_ids) { - this->tensors["input_ids"] = std::move(input_ids); + set_tensor("input_ids", std::move(input_ids)); } const TensorObject& InferenceMemoryNLP::get_input_mask() const @@ -70,9 +70,9 @@ const TensorObject& InferenceMemoryNLP::get_input_mask() const return found->second; } -void InferenceMemoryNLP::set_input_mask(TensorObject input_mask) +void InferenceMemoryNLP::set_input_mask(TensorObject&& input_mask) { - this->tensors["input_mask"] = std::move(input_mask); + set_tensor("input_mask", std::move(input_mask)); } const TensorObject& InferenceMemoryNLP::get_seq_ids() const @@ -86,9 +86,9 @@ const TensorObject& InferenceMemoryNLP::get_seq_ids() const return found->second; } -void InferenceMemoryNLP::set_seq_ids(TensorObject seq_ids) +void InferenceMemoryNLP::set_seq_ids(TensorObject&& seq_ids) { - this->tensors["seq_ids"] = std::move(seq_ids); + set_tensor("seq_ids", std::move(seq_ids)); } /****** InferenceMemoryNLPInterfaceProxy *************************/ diff --git a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp index 8d5e09b998..16c236c1b2 100644 --- a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp @@ -31,9 +31,9 @@ namespace morpheus { /****** Component public implementations *******************/ /****** ResponseMemoryProbs****************************************/ -ResponseMemoryProbs::ResponseMemoryProbs(size_t count, TensorObject probs) : ResponseMemory(count) +ResponseMemoryProbs::ResponseMemoryProbs(size_t count, TensorObject&& probs) : ResponseMemory(count) { - this->tensors["probs"] = std::move(probs); + set_tensor("probs", std::move(probs)); } ResponseMemoryProbs::ResponseMemoryProbs(size_t count, CupyUtil::tensor_map_t&& tensors) : @@ -53,9 +53,9 @@ const TensorObject& ResponseMemoryProbs::get_probs() const return found->second; } -void ResponseMemoryProbs::set_probs(TensorObject probs) +void ResponseMemoryProbs::set_probs(TensorObject&& probs) { - this->tensors["probs"] = std::move(probs); + set_tensor("probs", std::move(probs)); } /****** ResponseMemoryProbsInterfaceProxy *************************/ diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index a505042fc5..e3b3554b43 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -19,9 +19,12 @@ #include "morpheus/utilities/cupy_util.hpp" +#include #include // for key_error & object #include +#include +#include // for std::length_error #include #include @@ -30,7 +33,9 @@ namespace morpheus { /****** TensorMemory****************************************/ TensorMemory::TensorMemory(size_t count) : count(count) {} TensorMemory::TensorMemory(size_t count, CupyUtil::tensor_map_t&& tensors) : count(count), tensors(std::move(tensors)) -{} +{ + check_tensors_length(this->tensors); +} bool TensorMemory::has_tensor(const std::string& name) const { @@ -49,6 +54,37 @@ CupyUtil::tensor_map_t TensorMemory::copy_tensor_ranges(const std::vectorcount) + { + std::stringstream err_msg; + err_msg << "The number rows in tensor " << tensor.shape(0) << " does not match TensorMemory.count of " + << this->count; + throw std::length_error{err_msg.str()}; + } +} + +void TensorMemory::set_tensor(const std::string& name, TensorObject&& tensor) +{ + check_tensor_length(tensor); + this->tensors.insert_or_assign(name, std::move(tensor)); +} + +void TensorMemory::check_tensors_length(const CupyUtil::tensor_map_t& tensors) +{ + for (const auto& p : tensors) + { + check_tensor_length(p.second); + } +} + +void TensorMemory::set_tensors(CupyUtil::tensor_map_t&& tensors) +{ + check_tensors_length(tensors); + this->tensors = std::move(tensors); +} + /****** TensorMemoryInterfaceProxy *************************/ std::shared_ptr TensorMemoryInterfaceProxy::init(std::size_t count, pybind11::object& tensors) { @@ -75,7 +111,7 @@ CupyUtil::py_tensor_map_t TensorMemoryInterfaceProxy::get_tensors(TensorMemory& void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors) { - self.tensors = std::move(CupyUtil::cupy_to_tensors(tensors)); + self.set_tensors(CupyUtil::cupy_to_tensors(tensors)); } pybind11::object TensorMemoryInterfaceProxy::get_tensor(TensorMemory& self, const std::string name) @@ -92,7 +128,7 @@ void TensorMemoryInterfaceProxy::set_tensor(TensorMemory& self, const std::string name, const pybind11::object& cupy_tensor) { - self.tensors.insert_or_assign(name, CupyUtil::cupy_to_tensor(cupy_tensor)); + self.set_tensor(name, CupyUtil::cupy_to_tensor(cupy_tensor)); } } // namespace morpheus diff --git a/morpheus/_lib/src/stages/preprocess_fil.cpp b/morpheus/_lib/src/stages/preprocess_fil.cpp index 645b76c300..0c9a4f429f 100644 --- a/morpheus/_lib/src/stages/preprocess_fil.cpp +++ b/morpheus/_lib/src/stages/preprocess_fil.cpp @@ -116,7 +116,8 @@ PreprocessFILStage::subscribe_fn_t PreprocessFILStage::build_operator() 0); // Build the results - auto memory = std::make_shared(x->mess_count, input__0, seg_ids); + auto memory = + std::make_shared(x->mess_count, std::move(input__0), std::move(seg_ids)); auto next = std::make_shared( x->meta, x->mess_offset, x->mess_count, std::move(memory), 0, memory->count); From 64265e0c73133d6f0c85e18ef759b1ee049b5cd0 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 15:48:57 -0800 Subject: [PATCH 50/85] Ensure tensor lengths match the count --- morpheus/messages/tensor_memory.py | 14 ++++++++++++++ tests/test_tensor_memory.py | 22 ++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index b5b42824ef..a75a92adf4 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -43,9 +43,21 @@ def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = None): if tensors is None: tensors = {} + else: + self._check_tensors(tensors) self._tensors = tensors + def _check_tensors(self, tensors: typing.Dict[str, cp.ndarray]): + for tensor in tensors.values(): + self._check_tensor(tensor) + + def _check_tensor(self, tensor: cp.ndarray): + if (tensor.shape[0] != self.count): + class_name = type(self).__name__ + raise ValueError( + f"The number rows in tensor {tensor.shape[0]} does not match {class_name}.count of {self.count}") + def get_tensors(self): """ Get the tensors contained by this instance. It is important to note that when C++ execution is enabled the @@ -68,6 +80,7 @@ def set_tensors(self, tensors): tensors : typing.Dict[str, cupy.ndarray] Collection of tensors uniquely identified by a name. """ + self._check_tensors(tensors) self._tensors = tensors def get_tensor(self, name): @@ -101,4 +114,5 @@ def set_tensor(self, name, tensor): tensors : typing.Dict[str, cupy.ndarray] Collection of tensors uniquely identified by a name. """ + self._check_tensor(tensor) self._tensors[name] = tensor diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index f1eda298f5..b58273afb0 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -150,3 +150,25 @@ def test_response_memory_ae(config): def test_response_memory_probs(config): check_response_memory_probs_and_ae(ResponseMemoryProbs) + + +@pytest.mark.parametrize("tensor_cls", [TensorMemory, InferenceMemory, ResponseMemory]) +def test_constructor_length_error(config, tensor_cls): + count = 10 + tensors = {"a": cp.zeros(count), "b": cp.ones(count)} + pytest.raises(ValueError, tensor_cls, count - 1, tensors) + + +@pytest.mark.parametrize("tensor_cls", [TensorMemory, InferenceMemory, ResponseMemory]) +def test_set_tensor_length_error(config, tensor_cls): + count = 10 + m = tensor_cls(count) + pytest.raises(ValueError, m.set_tensor, 'a', cp.zeros(count + 1)) + + +@pytest.mark.parametrize("tensor_cls", [TensorMemory, InferenceMemory, ResponseMemory]) +def test_set_tensors_length_error(config, tensor_cls): + count = 10 + tensors = {"a": cp.zeros(count), "b": cp.ones(count)} + m = tensor_cls(count + 1) + pytest.raises(ValueError, m.set_tensors, tensors) From e96f8c2beaf0924ac1bc415ae04979903a4bac8f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 16:03:54 -0800 Subject: [PATCH 51/85] Fix tests --- tests/test_inference_stage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 3670d3b210..eb1dffb33f 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -288,7 +288,7 @@ def test_convert_response_errors(): mm2.count.side_effect = [2, 1] mm2.mess_count.side_effect = [2, 1, 1] - pytest.raises(AssertionError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) + pytest.raises(ValueError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) @pytest.mark.use_python @@ -308,11 +308,11 @@ def test_convert_one_response(config): assert mem.get_output('probs').tolist() == [[1.0, 2.0, 3.0]] # Test for the second branch - inf.mess_count = 2 + inf.count = 2 inf.seq_ids = cp.array([[0], [1]]) - res = ResponseMemoryProbs(count=1, probs=cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])) + res = ResponseMemoryProbs(count=2, probs=cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])) - mem = ResponseMemoryProbs(1, probs=cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])) + mem = ResponseMemoryProbs(2, probs=cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])) mpm = inference_stage.InferenceStage._convert_one_response(mem, inf, res) assert mem.get_output('probs').tolist() == [[0.1, 0.6, 0.8], [5.6, 6.7, 9.2]] From c68b0f70b70887cf6cc799f67a6f34d28d36399f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 24 Feb 2023 16:09:50 -0800 Subject: [PATCH 52/85] Add type hints --- morpheus/messages/multi_inference_message.py | 8 ++++---- morpheus/messages/multi_response_message.py | 6 +++--- morpheus/messages/tensor_memory.py | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 3d32e1f52a..e7245d8d99 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -56,7 +56,7 @@ def get_input(self, name: str): except KeyError: raise AttributeError - def set_input(self, name: str, value): + def set_input(self, name: str, value: cp.ndarray): """ Setter function used with DataClassProp for setting inference input in message containers derived from InferenceMemory. @@ -94,7 +94,7 @@ class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - def __init__(self, count, input_ids, input_mask, seq_ids): + def __init__(self, count: int, input_ids: cp.ndarray, input_mask: cp.ndarray, seq_ids: cp.ndarray): super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) @@ -116,7 +116,7 @@ class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - def __init__(self, count, input__0, seq_ids): + def __init__(self, count: int, input__0: cp.ndarray, seq_ids: cp.ndarray): super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) @@ -137,7 +137,7 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): input: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - def __init__(self, count, input, seq_ids): + def __init__(self, count: int, input: cp.ndarray, seq_ids: cp.ndarray): super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 3e6df2a453..935ea8ce70 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -55,7 +55,7 @@ def get_output(self, name: str): except KeyError: raise AttributeError - def set_output(self, name: str, value): + def set_output(self, name: str, value: cp.ndarray): """ Setter function used with DataClassProp for setting output in message containers derived from ResponseMemory. @@ -85,7 +85,7 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) - def __init__(self, count, probs): + def __init__(self, count: int, probs: cp.ndarray): super().__init__(count, tensors={'probs': probs}) @@ -110,7 +110,7 @@ class ResponseMemoryAE(ResponseMemory, cpp_class=None): user_id = "" explain_df = None - def __init__(self, count, probs): + def __init__(self, count: int, probs: cp.ndarray): super().__init__(count, tensors={'probs': probs}) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index a75a92adf4..60209b0df1 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -70,7 +70,7 @@ def get_tensors(self): """ return self._tensors - def set_tensors(self, tensors): + def set_tensors(self, tensors: typing.Dict[str, cp.ndarray]): """ Overwrite the tensors stored by this instance. If the length of the tensors has changed, then the `count` property should also be updated. @@ -83,7 +83,7 @@ def set_tensors(self, tensors): self._check_tensors(tensors) self._tensors = tensors - def get_tensor(self, name): + def get_tensor(self, name: str): """ Get the Tensor stored in the TensorMemory container identified by `name`. @@ -104,7 +104,7 @@ def get_tensor(self, name): """ return self._tensors[name] - def set_tensor(self, name, tensor): + def set_tensor(self, name: str, tensor: cp.ndarray): """ Update the tensor identified by `name`. If the length of the tensor has changed, then the `count` property should also be updated. From 56d46c45e2850103d3e6650fa52ce437b48f077a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:21:36 -0800 Subject: [PATCH 53/85] Add type hint --- morpheus/messages/multi_response_message.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 935ea8ce70..49a190c6e9 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -193,7 +193,7 @@ def copy_output_ranges(self, ranges, mask=None): outputs = self.outputs return {key: output[mask] for (key, output) in outputs.items()} - def copy_ranges(self, ranges): + def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): """ Perform a copy of the current message, dataframe and tensors for the given `ranges` of rows. @@ -275,7 +275,7 @@ class MultiResponseAEMessage(MultiResponseProbsMessage, cpp_class=None): user_id: str = None - def copy_ranges(self, ranges): + def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): """ Perform a copy of the current message, dataframe and tensors for the given `ranges` of rows. From ddd4da7ab472c7ea74d153643f0f0d124f6f09ab Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:24:33 -0800 Subject: [PATCH 54/85] Remove optional num_selected_rows arg, it wasn't needed --- morpheus/messages/multi_ae_message.py | 11 ++--------- morpheus/messages/multi_message.py | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/morpheus/messages/multi_ae_message.py b/morpheus/messages/multi_ae_message.py index d168053bf9..294c09fac5 100644 --- a/morpheus/messages/multi_ae_message.py +++ b/morpheus/messages/multi_ae_message.py @@ -60,7 +60,7 @@ def get_slice(self, start, stop): train_scores_mean=self.train_scores_mean, train_scores_std=self.train_scores_std) - def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]], num_selected_rows: int = None): + def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): """ Perform a copy of the current message instance for the given `ranges` of rows. @@ -71,22 +71,15 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]], num_selected_ The final output is exclusive of the `stop_row`, i.e. `[start_row, stop_row)`. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` - num_selected_rows : typing.Union[None, int] - Optional specify the number of rows selected by `ranges`, otherwise this is computed by the result. - Returns ------- `MultiAEMessage` """ sliced_rows = self.copy_meta_ranges(ranges) - - if num_selected_rows is None: - num_selected_rows = len(sliced_rows) - return MultiAEMessage(meta=UserMessageMeta(sliced_rows, user_id=self.meta.user_id), mess_offset=0, - mess_count=num_selected_rows, + mess_count=len(sliced_rows), model=self.model, train_scores_mean=self.train_scores_mean, train_scores_std=self.train_scores_std) diff --git a/morpheus/messages/multi_message.py b/morpheus/messages/multi_message.py index d9e944e309..f49f45d499 100644 --- a/morpheus/messages/multi_message.py +++ b/morpheus/messages/multi_message.py @@ -215,7 +215,7 @@ def copy_meta_ranges(self, return df.loc[mask, :] - def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]], num_selected_rows: int = None): + def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): """ Perform a copy of the current message instance for the given `ranges` of rows. @@ -225,16 +225,9 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]], num_selected_ Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` - num_selected_rows : typing.Union[None, int] - Optional specify the number of rows selected by `ranges`, otherwise this is computed by the result. - Returns ------- `MultiMessage` """ sliced_rows = self.copy_meta_ranges(ranges) - - if num_selected_rows is None: - num_selected_rows = len(sliced_rows) - - return MultiMessage(meta=MessageMeta(sliced_rows), mess_offset=0, mess_count=num_selected_rows) + return MultiMessage(meta=MessageMeta(sliced_rows), mess_offset=0, mess_count=len(sliced_rows)) From 14d8d478766a6aa9c697f017c466499063893015 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:26:50 -0800 Subject: [PATCH 55/85] Fix bad copy/paste docstring --- morpheus/messages/tensor_memory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/tensor_memory.py index 60209b0df1..81d8d9d9c0 100644 --- a/morpheus/messages/tensor_memory.py +++ b/morpheus/messages/tensor_memory.py @@ -111,8 +111,8 @@ def set_tensor(self, name: str, tensor: cp.ndarray): Parameters ---------- - tensors : typing.Dict[str, cupy.ndarray] - Collection of tensors uniquely identified by a name. + tensor : cupy.ndarray + Tensory as a CuPy Array. """ self._check_tensor(tensor) self._tensors[name] = tensor From 47efc93a7a9621b18410bfcfea9005831af96fab Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:49:51 -0800 Subject: [PATCH 56/85] Move tensor memory classes to their own modules --- morpheus/messages/__init__.py | 14 +- morpheus/messages/memory/__init__.py | 0 morpheus/messages/memory/inference_memory.py | 139 ++++++++++++++++++ morpheus/messages/memory/response_memory.py | 111 ++++++++++++++ .../messages/{ => memory}/tensor_memory.py | 0 morpheus/messages/multi_inference_message.py | 130 +--------------- morpheus/messages/multi_response_message.py | 106 +------------ tests/test_messages.py | 2 +- tests/test_tensor_memory.py | 16 +- 9 files changed, 279 insertions(+), 239 deletions(-) create mode 100644 morpheus/messages/memory/__init__.py create mode 100644 morpheus/messages/memory/inference_memory.py create mode 100644 morpheus/messages/memory/response_memory.py rename morpheus/messages/{ => memory}/tensor_memory.py (100%) diff --git a/morpheus/messages/__init__.py b/morpheus/messages/__init__.py index 58d7c1c85f..a966af5bfc 100644 --- a/morpheus/messages/__init__.py +++ b/morpheus/messages/__init__.py @@ -18,24 +18,24 @@ # Import order is very important here. Import base classes before child ones # isort: off +from morpheus.messages.memory.inference_memory import InferenceMemory +from morpheus.messages.memory.inference_memory import InferenceMemoryAE +from morpheus.messages.memory.inference_memory import InferenceMemoryFIL +from morpheus.messages.memory.inference_memory import InferenceMemoryNLP +from morpheus.messages.memory.response_memory import ResponseMemory +from morpheus.messages.memory.response_memory import ResponseMemoryAE +from morpheus.messages.memory.response_memory import ResponseMemoryProbs from morpheus.messages.message_base import MessageBase from morpheus.messages.message_meta import MessageMeta from morpheus.messages.message_meta import UserMessageMeta from morpheus.messages.multi_message import MultiMessage from morpheus.messages.multi_ae_message import MultiAEMessage -from morpheus.messages.multi_inference_message import InferenceMemory -from morpheus.messages.multi_inference_message import InferenceMemoryAE -from morpheus.messages.multi_inference_message import InferenceMemoryFIL -from morpheus.messages.multi_inference_message import InferenceMemoryNLP from morpheus.messages.multi_inference_message import MultiInferenceFILMessage from morpheus.messages.multi_inference_message import MultiInferenceMessage from morpheus.messages.multi_inference_message import MultiInferenceNLPMessage from morpheus.messages.multi_response_message import MultiResponseAEMessage from morpheus.messages.multi_response_message import MultiResponseMessage from morpheus.messages.multi_response_message import MultiResponseProbsMessage -from morpheus.messages.multi_response_message import ResponseMemory -from morpheus.messages.multi_response_message import ResponseMemoryAE -from morpheus.messages.multi_response_message import ResponseMemoryProbs __all__ = [ "InferenceMemory", diff --git a/morpheus/messages/memory/__init__.py b/morpheus/messages/memory/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/morpheus/messages/memory/inference_memory.py b/morpheus/messages/memory/inference_memory.py new file mode 100644 index 0000000000..ac35c12545 --- /dev/null +++ b/morpheus/messages/memory/inference_memory.py @@ -0,0 +1,139 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses + +import cupy as cp + +import morpheus._lib.messages as _messages +from morpheus.messages.data_class_prop import DataClassProp +from morpheus.messages.memory.tensor_memory import TensorMemory + + +@dataclasses.dataclass(init=False) +class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): + """ + This is a base container class for data that will be used for inference stages. This class is designed to + hold generic tensor data in cupy arrays. + """ + + def get_input(self, name: str): + """ + Getter function used with DataClassProp for getting inference input from message containers derived + from InferenceMemory. + + Parameters + ---------- + name : str + Key used to do lookup in inputs dict of message container. + + Returns + ------- + cupy.ndarray + Inputs corresponding to name. + + Raises + ------ + AttributeError + If input name does not exist in message container. + """ + try: + return self.get_tensor(name) + except KeyError: + raise AttributeError + + def set_input(self, name: str, value: cp.ndarray): + """ + Setter function used with DataClassProp for setting inference input in message containers derived + from InferenceMemory. + + Parameters + ---------- + name : str + Key used to do lookup in inputs dict of message container. + value : cupy.ndarray + Value to set for input. + """ + # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + self.set_tensor(name, tensor) + + +@dataclasses.dataclass(init=False) +class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP): + """ + This is a container class for data that needs to be submitted to the inference server for NLP category + usecases. + + Parameters + ---------- + input_ids : cupy.ndarray + The token-ids for each string padded with 0s to max_length. + input_mask : cupy.ndarray + The mask for token-ids result where corresponding positions identify valid token-id values. + seq_ids : cupy.ndarray + Ids used to index from an inference input to a message. Necessary since there can be more inference + inputs than messages (i.e., if some messages get broken into multiple inference requests). + + """ + input_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + + def __init__(self, count: int, input_ids: cp.ndarray, input_mask: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) + + +@dataclasses.dataclass(init=False) +class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL): + """ + This is a container class for data that needs to be submitted to the inference server for FIL category + usecases. + + Parameters + ---------- + input__0 : cupy.ndarray + Inference input. + seq_ids : cupy.ndarray + Ids used to index from an inference input to a message. Necessary since there can be more inference + inputs than messages (i.e., if some messages get broken into multiple inference requests). + + """ + input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + + def __init__(self, count: int, input__0: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) + + +@dataclasses.dataclass(init=False) +class InferenceMemoryAE(InferenceMemory, cpp_class=None): + """ + This is a container class for data that needs to be submitted to the inference server for auto encoder usecases. + + Parameters + ---------- + input : cupy.ndarray + Inference input. + seq_ids : cupy.ndarray + Ids used to index from an inference input to a message. Necessary since there can be more inference + inputs than messages (i.e., if some messages get broken into multiple inference requests). + """ + + input: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) + + def __init__(self, count: int, input: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) diff --git a/morpheus/messages/memory/response_memory.py b/morpheus/messages/memory/response_memory.py new file mode 100644 index 0000000000..12a9b95ca5 --- /dev/null +++ b/morpheus/messages/memory/response_memory.py @@ -0,0 +1,111 @@ +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import dataclasses + +import cupy as cp + +import morpheus._lib.messages as _messages +from morpheus.messages.data_class_prop import DataClassProp +from morpheus.messages.memory.tensor_memory import TensorMemory + + +@dataclasses.dataclass(init=False) +class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): + """Output memory block holding the results of inference.""" + + def get_output(self, name: str): + """ + Getter function used with DataClassProp for getting inference output from message containers derived + from ResponseMemory. + + Parameters + ---------- + name : str + Key used to do lookup in tensors dict of message container. + + Returns + ------- + cupy.ndarray + Tensors corresponding to name. + + Raises + ------ + AttributeError + If output name does not exist in message container. + + """ + try: + return self.get_tensor(name) + except KeyError: + raise AttributeError + + def set_output(self, name: str, value: cp.ndarray): + """ + Setter function used with DataClassProp for setting output in message containers derived + from ResponseMemory. + + Parameters + ---------- + name : str + Key used to do lookup in tensors dict of message container. + value : cupy.ndarray + Value to set for input. + """ + + # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) + tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) + self.set_tensor(name, tensor) + + +@dataclasses.dataclass(init=False) +class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProbs): + """ + Subclass of `ResponseMemory` containng an output tensor named 'probs'. + + Parameters + ---------- + probs : cupy.ndarray + Probabilities tensor + """ + probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) + + def __init__(self, count: int, probs: cp.ndarray): + super().__init__(count, tensors={'probs': probs}) + + +@dataclasses.dataclass(init=False) +class ResponseMemoryAE(ResponseMemory, cpp_class=None): + """ + Subclass of `ResponseMemory` specific to the AutoEncoder pipeline. + + Parameters + ---------- + probs : cupy.ndarray + Probabilities tensor + + user_id : str + User id the inference was performed against. + + explain_df : pd.Dataframe + Explainability Dataframe, for each feature a column will exist with a name in the form of: `{feature}_z_loss` + containing the loss z-score along with `max_abs_z` and `mean_abs_z` columns + """ + probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) + user_id = "" + explain_df = None + + def __init__(self, count: int, probs: cp.ndarray): + super().__init__(count, tensors={'probs': probs}) diff --git a/morpheus/messages/tensor_memory.py b/morpheus/messages/memory/tensor_memory.py similarity index 100% rename from morpheus/messages/tensor_memory.py rename to morpheus/messages/memory/tensor_memory.py diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index e7245d8d99..ca806cf4c2 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -19,132 +19,14 @@ import cupy as cp import morpheus._lib.messages as _messages -from morpheus.messages.data_class_prop import DataClassProp +from morpheus.messages.memory.inference_memory import InferenceMemory from morpheus.messages.multi_message import MultiMessage -from morpheus.messages.tensor_memory import TensorMemory - - -@dataclasses.dataclass(init=False) -class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): - """ - This is a base container class for data that will be used for inference stages. This class is designed to - hold generic tensor data in cupy arrays. - """ - - def get_input(self, name: str): - """ - Getter function used with DataClassProp for getting inference input from message containers derived - from InferenceMemory. - - Parameters - ---------- - name : str - Key used to do lookup in inputs dict of message container. - - Returns - ------- - cupy.ndarray - Inputs corresponding to name. - - Raises - ------ - AttributeError - If input name does not exist in message container. - """ - try: - return self.get_tensor(name) - except KeyError: - raise AttributeError - - def set_input(self, name: str, value: cp.ndarray): - """ - Setter function used with DataClassProp for setting inference input in message containers derived - from InferenceMemory. - - Parameters - ---------- - name : str - Key used to do lookup in inputs dict of message container. - value : cupy.ndarray - Value to set for input. - """ - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - self.set_tensor(name, tensor) - - -@dataclasses.dataclass(init=False) -class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP): - """ - This is a container class for data that needs to be submitted to the inference server for NLP category - usecases. - - Parameters - ---------- - input_ids : cupy.ndarray - The token-ids for each string padded with 0s to max_length. - input_mask : cupy.ndarray - The mask for token-ids result where corresponding positions identify valid token-id values. - seq_ids : cupy.ndarray - Ids used to index from an inference input to a message. Necessary since there can be more inference - inputs than messages (i.e., if some messages get broken into multiple inference requests). - - """ - input_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - input_mask: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - - def __init__(self, count: int, input_ids: cp.ndarray, input_mask: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) - - -@dataclasses.dataclass(init=False) -class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL): - """ - This is a container class for data that needs to be submitted to the inference server for FIL category - usecases. - - Parameters - ---------- - input__0 : cupy.ndarray - Inference input. - seq_ids : cupy.ndarray - Ids used to index from an inference input to a message. Necessary since there can be more inference - inputs than messages (i.e., if some messages get broken into multiple inference requests). - - """ - input__0: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - - def __init__(self, count: int, input__0: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) - - -@dataclasses.dataclass(init=False) -class InferenceMemoryAE(InferenceMemory, cpp_class=None): - """ - This is a container class for data that needs to be submitted to the inference server for auto encoder usecases. - - Parameters - ---------- - input : cupy.ndarray - Inference input. - seq_ids : cupy.ndarray - Ids used to index from an inference input to a message. Necessary since there can be more inference - inputs than messages (i.e., if some messages get broken into multiple inference requests). - """ - - input: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory.get_input, InferenceMemory.set_input) - - def __init__(self, count: int, input: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) @dataclasses.dataclass class MultiInferenceMessage(MultiMessage, cpp_class=_messages.MultiInferenceMessage): """ - This is a container class that holds the TensorMemory container and the metadata of the data contained + This is a container class that holds the InferenceMemory container and the metadata of the data contained within it. Builds on top of the `MultiMessage` class to add additional data for inferencing. This class requires two separate memory blocks for a batch. One for the message metadata (i.e., start time, @@ -156,7 +38,7 @@ class MultiInferenceMessage(MultiMessage, cpp_class=_messages.MultiInferenceMess Parameters ---------- - memory : `TensorMemory` + memory : `InferenceMemory` Inference memory. offset : int Message offset in inference memory instance. @@ -164,14 +46,14 @@ class MultiInferenceMessage(MultiMessage, cpp_class=_messages.MultiInferenceMess Message count in inference memory instance. """ - memory: TensorMemory = dataclasses.field(repr=False) + memory: InferenceMemory = dataclasses.field(repr=False) offset: int count: int @property def inputs(self): """ - Get inputs stored in the TensorMemory container. + Get inputs stored in the InferenceMemory container. Returns ------- @@ -193,7 +75,7 @@ def __getattr__(self, name: str) -> typing.Any: def get_input(self, name: str): """ - Get input stored in the TensorMemory container. + Get input stored in the InferenceMemory container. Parameters ---------- diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 49a190c6e9..fa59a1ab48 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -16,102 +16,10 @@ import dataclasses import typing -import cupy as cp - import morpheus._lib.messages as _messages -from morpheus.messages.data_class_prop import DataClassProp +from morpheus.messages.memory.response_memory import ResponseMemory from morpheus.messages.message_meta import MessageMeta from morpheus.messages.multi_message import MultiMessage -from morpheus.messages.tensor_memory import TensorMemory - - -@dataclasses.dataclass(init=False) -class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): - """Output memory block holding the results of inference.""" - - def get_output(self, name: str): - """ - Getter function used with DataClassProp for getting inference output from message containers derived - from ResponseMemory. - - Parameters - ---------- - name : str - Key used to do lookup in tensors dict of message container. - - Returns - ------- - cupy.ndarray - Tensors corresponding to name. - - Raises - ------ - AttributeError - If output name does not exist in message container. - - """ - try: - return self.get_tensor(name) - except KeyError: - raise AttributeError - - def set_output(self, name: str, value: cp.ndarray): - """ - Setter function used with DataClassProp for setting output in message containers derived - from ResponseMemory. - - Parameters - ---------- - name : str - Key used to do lookup in tensors dict of message container. - value : cupy.ndarray - Value to set for input. - """ - - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - self.set_tensor(name, tensor) - - -@dataclasses.dataclass(init=False) -class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProbs): - """ - Subclass of `ResponseMemory` containng an output tensor named 'probs'. - - Parameters - ---------- - probs : cupy.ndarray - Probabilities tensor - """ - probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) - - def __init__(self, count: int, probs: cp.ndarray): - super().__init__(count, tensors={'probs': probs}) - - -@dataclasses.dataclass(init=False) -class ResponseMemoryAE(ResponseMemory, cpp_class=None): - """ - Subclass of `ResponseMemory` specific to the AutoEncoder pipeline. - - Parameters - ---------- - probs : cupy.ndarray - Probabilities tensor - - user_id : str - User id the inference was performed against. - - explain_df : pd.Dataframe - Explainability Dataframe, for each feature a column will exist with a name in the form of: `{feature}_z_loss` - containing the loss z-score along with `max_abs_z` and `mean_abs_z` columns - """ - probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) - user_id = "" - explain_df = None - - def __init__(self, count: int, probs: cp.ndarray): - super().__init__(count, tensors={'probs': probs}) @dataclasses.dataclass @@ -121,22 +29,22 @@ class MultiResponseMessage(MultiMessage, cpp_class=_messages.MultiResponseMessag Parameters ---------- - memory : `TensorMemory` + memory : `ResponseMemory` This is a response container instance for triton inference requests. offset : int - Offset of each response message into the `TensorMemory` block. + Offset of each response message into the `ResponseMemory` block. count : int Inference results size of all responses. """ - memory: TensorMemory = dataclasses.field(repr=False) + memory: ResponseMemory = dataclasses.field(repr=False) offset: int count: int @property def outputs(self): """ - Get outputs stored in the TensorMemory container. + Get outputs stored in the ResponseMemory container. Returns ------- @@ -152,7 +60,7 @@ def __getattr__(self, name: str) -> typing.Any: def get_output(self, name: str): """ - Get output stored in the TensorMemory container. + Get output stored in the ResponseMemory container. Parameters ---------- @@ -211,7 +119,7 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): sliced_count = len(sliced_rows) sliced_outputs = self.copy_output_ranges(ranges, mask=mask) - mem = TensorMemory(count=sliced_count) + mem = ResponseMemory(count=sliced_count) mem.outputs = sliced_outputs return MultiResponseMessage(MessageMeta(sliced_rows), 0, sliced_count, mem, 0, sliced_count) diff --git a/tests/test_messages.py b/tests/test_messages.py index f36beec3b0..cfcfe85fa9 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -22,7 +22,7 @@ import morpheus._lib.messages as _messages import morpheus.config from morpheus import messages -from morpheus.messages import tensor_memory +from morpheus.messages.memory import tensor_memory def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, no_cpp_class: bool, args: tuple): diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index b58273afb0..51e8232990 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -23,14 +23,14 @@ from morpheus._lib.common import FileTypes from morpheus.io.deserializers import read_file_to_df -from morpheus.messages.multi_inference_message import InferenceMemory -from morpheus.messages.multi_inference_message import InferenceMemoryAE -from morpheus.messages.multi_inference_message import InferenceMemoryFIL -from morpheus.messages.multi_inference_message import InferenceMemoryNLP -from morpheus.messages.multi_response_message import ResponseMemory -from morpheus.messages.multi_response_message import ResponseMemoryAE -from morpheus.messages.multi_response_message import ResponseMemoryProbs -from morpheus.messages.tensor_memory import TensorMemory +from morpheus.messages.memory.inference_memory import InferenceMemory +from morpheus.messages.memory.inference_memory import InferenceMemoryAE +from morpheus.messages.memory.inference_memory import InferenceMemoryFIL +from morpheus.messages.memory.inference_memory import InferenceMemoryNLP +from morpheus.messages.memory.response_memory import ResponseMemory +from morpheus.messages.memory.response_memory import ResponseMemoryAE +from morpheus.messages.memory.response_memory import ResponseMemoryProbs +from morpheus.messages.memory.tensor_memory import TensorMemory from utils import TEST_DIRS INPUT_FILE = os.path.join(TEST_DIRS.tests_data_dir, 'filter_probs.csv') From 8c915797b8ba7eebf14095d59bc897c89e0c66d4 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:57:28 -0800 Subject: [PATCH 57/85] First pass at python MultiTensorMessage class --- morpheus/messages/multi_tensor_message.py | 137 ++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 morpheus/messages/multi_tensor_message.py diff --git a/morpheus/messages/multi_tensor_message.py b/morpheus/messages/multi_tensor_message.py new file mode 100644 index 0000000000..4145cd9330 --- /dev/null +++ b/morpheus/messages/multi_tensor_message.py @@ -0,0 +1,137 @@ +import dataclasses +import typing + +import morpheus._lib.messages as _messages +from morpheus.messages.memory.tensor_memory import TensorMemory +from morpheus.messages.message_meta import MessageMeta +from morpheus.messages.multi_message import MultiMessage + + +@dataclasses.dataclass +class MultiTensorMessage(MultiMessage, cpp_class=_messages.MultiTensorMessage): + """ + This class contains several inference responses as well as the cooresponding message metadata. + + Parameters + ---------- + memory : `TensorMemory` + Container holding generic tensor data in cupy arrays + offset : int + Offset of each message into the `TensorMemory` block. + count : int + Number of rows in the `TensorMemory` block. + + """ + memory: TensorMemory = dataclasses.field(repr=False) + offset: int + count: int + + @property + def tensors(self): + """ + Get tensors stored in the TensorMemory container sliced according to `offset` and `count`. + + Returns + ------- + cupy.ndarray + Inference tensors. + + """ + tensors = self.memory.get_tensors() + return {key: self.get_tensor(key) for key in tensors.keys()} + + def __getattr__(self, name: str) -> typing.Any: + return self.get_tensor(name) + + def get_tensor(self, name: str): + """ + Get tensor stored in the TensorMemory container. + + Parameters + ---------- + name : str + tensor key name. + + Returns + ------- + cupy.ndarray + Inference tensor. + + """ + return self.memory.get_tensor(name)[self.offset:self.offset + self.count, :] + + def copy_tensor_ranges(self, ranges, mask=None): + """ + Perform a copy of the underlying tensor tensors for the given `ranges` of rows. + + Parameters + ---------- + ranges : typing.List[typing.Tuple[int, int]] + Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` + The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` + + mask : typing.Union[None, cupy.ndarray, numpy.ndarray] + Optionally specify rows as a cupy array (when using cudf Dataframes) or a numpy array (when using pandas + Dataframes) of booleans. When not-None `ranges` will be ignored. This is useful as an optimization as this + avoids needing to generate the mask on it's own. + + Returns + ------- + typing.Dict[str, cupy.ndarray] + """ + if mask is None: + mask = self._ranges_to_mask(self.get_meta(), ranges=ranges) + + # The tensors property method returns a copy with the offsets applied + tensors = self.tensors + return {key: tensor[mask] for (key, tensor) in tensors.items()} + + def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): + """ + Perform a copy of the current message, dataframe and tensors for the given `ranges` of rows. + + Parameters + ---------- + ranges : typing.List[typing.Tuple[int, int]] + Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` + The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` + + ------- + `MultiTensorMessage` + """ + mask = self._ranges_to_mask(self.get_meta(), ranges) + sliced_rows = self.copy_meta_ranges(ranges, mask=mask) + sliced_count = len(sliced_rows) + sliced_tensors = self.copy_tensor_ranges(ranges, mask=mask) + + mem = TensorMemory(count=sliced_count) + mem.tensors = sliced_tensors + + return MultiTensorMessage(MessageMeta(sliced_rows), 0, sliced_count, mem, 0, sliced_count) + + def get_slice(self, start, stop): + """ + Perform a slice of the current message from `start`:`stop` (excluding `stop`) + + For example to slice from rows 1-3 use `m.get_slice(1, 4)`. The returned `MultiTensorMessage` will contain + references to the same underlying Dataframe and tensor tensors, and this calling this method is reletively low + cost compared to `MultiTensorMessage.copy_ranges` + + Parameters + ---------- + start : int + Starting row of the slice + + stop : int + Stop of the slice + + ------- + `MultiTensorMessage` + """ + mess_count = stop - start + return MultiTensorMessage(meta=self.meta, + mess_offset=self.mess_offset + start, + mess_count=mess_count, + memory=self.memory, + offset=self.offset + start, + count=mess_count) From 2b773bbd0b8026b8f21cec9cab286653d2885d50 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 16:59:57 -0800 Subject: [PATCH 58/85] wip --- morpheus/messages/multi_response_message.py | 24 ++++----------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index fa59a1ab48..eb97340316 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -20,31 +20,19 @@ from morpheus.messages.memory.response_memory import ResponseMemory from morpheus.messages.message_meta import MessageMeta from morpheus.messages.multi_message import MultiMessage +from morpheus.messages.multi_tensor_message import MultiTensorMessage @dataclasses.dataclass -class MultiResponseMessage(MultiMessage, cpp_class=_messages.MultiResponseMessage): +class MultiResponseMessage(MultiTensorMessage, cpp_class=_messages.MultiResponseMessage): """ This class contains several inference responses as well as the cooresponding message metadata. - - Parameters - ---------- - memory : `ResponseMemory` - This is a response container instance for triton inference requests. - offset : int - Offset of each response message into the `ResponseMemory` block. - count : int - Inference results size of all responses. - """ - memory: ResponseMemory = dataclasses.field(repr=False) - offset: int - count: int @property def outputs(self): """ - Get outputs stored in the ResponseMemory container. + Get outputs stored in the ResponseMemory container. Alias for `MultiResponseMessage.tensors`. Returns ------- @@ -52,11 +40,7 @@ def outputs(self): Inference outputs. """ - tensors = self.memory.get_tensors() - return {key: self.get_output(key) for key in tensors.keys()} - - def __getattr__(self, name: str) -> typing.Any: - return self.get_output(name) + return self.tensors def get_output(self, name: str): """ From b31b02f8ecd4f229505432bc63ee1395dd69f5db Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 27 Feb 2023 17:06:01 -0800 Subject: [PATCH 59/85] wip --- morpheus/messages/multi_response_message.py | 28 ++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index eb97340316..742d895cdd 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -44,7 +44,7 @@ def outputs(self): def get_output(self, name: str): """ - Get output stored in the ResponseMemory container. + Get output stored in the ResponseMemory container. Alias for `MultiResponseMessage.get_tensor`. Parameters ---------- @@ -57,11 +57,12 @@ def get_output(self, name: str): Inference output. """ - return self.memory.get_output(name)[self.offset:self.offset + self.count, :] + return self.get_tensor(name) def copy_output_ranges(self, ranges, mask=None): """ Perform a copy of the underlying output tensors for the given `ranges` of rows. + Alias for `MultiResponseMessage.copy_output_ranges` Parameters ---------- @@ -78,12 +79,7 @@ def copy_output_ranges(self, ranges, mask=None): ------- typing.Dict[str, cupy.ndarray] """ - if mask is None: - mask = self._ranges_to_mask(self.get_meta(), ranges=ranges) - - # The outputs property method returns a copy with the offsets applied - outputs = self.outputs - return {key: output[mask] for (key, output) in outputs.items()} + return self.copy_tensor_ranges(ranges, mask=mask) def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): """ @@ -98,15 +94,13 @@ def copy_ranges(self, ranges: typing.List[typing.Tuple[int, int]]): ------- `MultiResponseMessage` """ - mask = self._ranges_to_mask(self.get_meta(), ranges) - sliced_rows = self.copy_meta_ranges(ranges, mask=mask) - sliced_count = len(sliced_rows) - sliced_outputs = self.copy_output_ranges(ranges, mask=mask) - - mem = ResponseMemory(count=sliced_count) - mem.outputs = sliced_outputs - - return MultiResponseMessage(MessageMeta(sliced_rows), 0, sliced_count, mem, 0, sliced_count) + m = super().copy_ranges(ranges) + return MultiResponseMessage(meta=m.meta, + mess_offset=m.mess_offset, + mess_count=m.mess_count, + memory=m.memory, + offset=m.offset, + count=m.mess_count) def get_slice(self, start, stop): """ From d04685bc4ce604193b719f4e7a1da5fd8e3c8ce7 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 08:29:17 -0800 Subject: [PATCH 60/85] wip --- .../morpheus/messages/multi_response.hpp | 6 +- .../morpheus/messages/multi_tensor.hpp | 58 +++++++++++++++++++ 2 files changed, 61 insertions(+), 3 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index a48e25597f..3ca90c09f4 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -124,14 +124,14 @@ struct MultiResponseMessageInterfaceProxy cudf::size_type count); /** - * @brief GReturns a shared pointer of a response memory probs object + * @brief Returns a shared pointer of a response memory object * * @return std::shared_ptr */ static std::shared_ptr memory(MultiResponseMessage& self); /** - * @brief Message offset in response memory probs object + * @brief Message offset in response memory object * * @param self * @return std::size_t @@ -139,7 +139,7 @@ struct MultiResponseMessageInterfaceProxy static std::size_t offset(MultiResponseMessage& self); /** - * @brief Messages count in response memory probs object + * @brief Messages count in response memory object * * @param self * @return std::size_t diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp index 043fc2081b..1f0f75f592 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp @@ -120,6 +120,64 @@ class MultiTensorMessage : public DerivedMultiMessage + */ + static std::shared_ptr init(std::shared_ptr meta, + std::size_t mess_offset, + std::size_t mess_count, + std::shared_ptr memory, + std::size_t offset, + std::size_t count); + + /** + * @brief GReturns a shared pointer of a tensor memory object + * + * @return std::shared_ptr + */ + static std::shared_ptr memory(MultiTensorMessage& self); + + /** + * @brief Message offset in tensor memory object + * + * @param self + * @return std::size_t + */ + static std::size_t offset(MultiTensorMessage& self); + + /** + * @brief Messages count in tensor memory object + * + * @param self + * @return std::size_t + */ + static std::size_t count(MultiTensorMessage& self); + + /** + * @brief Returns the tensor tensor for a given name + * + * @param self + * @param name : Tensor name + * @return pybind11::object + */ + static pybind11::object get_tensor(MultiTensorMessage& self, const std::string& name); +}; + #pragma GCC visibility pop /** @} */ // end of group } // namespace morpheus From 0e3dc97ccffacc35b2d0ff7c25768ffaa37928c7 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 09:57:02 -0800 Subject: [PATCH 61/85] wip --- morpheus/_lib/src/messages/multi_tensor.cpp | 38 +++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/morpheus/_lib/src/messages/multi_tensor.cpp b/morpheus/_lib/src/messages/multi_tensor.cpp index 2ca9b29b93..4e5f23874a 100644 --- a/morpheus/_lib/src/messages/multi_tensor.cpp +++ b/morpheus/_lib/src/messages/multi_tensor.cpp @@ -17,6 +17,8 @@ #include "morpheus/messages/multi_tensor.hpp" +#include "morpheus/utilities/cupy_util.hpp" + #include // for cudf::size_type> #include @@ -117,4 +119,40 @@ std::shared_ptr MultiTensorMessage::copy_input_ranges( return std::make_shared(num_selected_rows, std::move(tensors)); } +/****** MultiTensorMessageInterfaceProxy *************************/ +std::shared_ptr MultiTensorMessageInterfaceProxy::init(std::shared_ptr meta, + std::size_t mess_offset, + std::size_t mess_count, + std::shared_ptr memory, + std::size_t offset, + std::size_t count) +{ + return std::make_shared( + std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); +} + +std::shared_ptr MultiTensorMessageInterfaceProxy::memory(MultiTensorMessage& self) +{ + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + + return std::static_pointer_cast(self.memory); +} + +std::size_t MultiTensorMessageInterfaceProxy::offset(MultiTensorMessage& self) +{ + return self.offset; +} + +std::size_t MultiTensorMessageInterfaceProxy::count(MultiTensorMessage& self) +{ + return self.count; +} + +pybind11::object MultiTensorMessageInterfaceProxy::get_tensor(MultiTensorMessage& self, const std::string& name) +{ + auto tensor = self.get_tensor(name); + + return CupyUtil::tensor_to_cupy(tensor); +} + } // namespace morpheus From 6e0c2639df67c4b09e8ba28292fe23a90019aa83 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 10:06:14 -0800 Subject: [PATCH 62/85] Edge connectors --- morpheus/_lib/src/python_modules/messages.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 35f789566d..25eae8b790 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -28,6 +28,7 @@ #include "morpheus/messages/multi_inference_nlp.hpp" #include "morpheus/messages/multi_response.hpp" #include "morpheus/messages/multi_response_probs.hpp" +#include "morpheus/messages/multi_tensor.hpp" #include "morpheus/objects/data_table.hpp" #include "morpheus/objects/mutable_table_ctx_mgr.hpp" #include "morpheus/utilities/cudf_util.hpp" @@ -85,6 +86,7 @@ PYBIND11_MODULE(messages, m) mrc::pymrc::PortBuilderUtil::register_port_util>(); mrc::pymrc::PortBuilderUtil::register_port_util>(); + mrc::pymrc::PortBuilderUtil::register_port_util>(); mrc::pymrc::PortBuilderUtil::register_port_util>(); mrc::pymrc::PortBuilderUtil::register_port_util>(); mrc::pymrc::PortBuilderUtil::register_port_util>(); @@ -92,27 +94,45 @@ PYBIND11_MODULE(messages, m) mrc::pymrc::PortBuilderUtil::register_port_util>(); // EdgeConnectors for derived classes of MultiMessage to MultiMessage + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, + std::shared_ptr>::register_converter(); + mrc::edge::EdgeConnector, std::shared_ptr>::register_converter(); From eaaaeb0412763620c5058bb99d00a2a0a4203920 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 10:10:05 -0800 Subject: [PATCH 63/85] bindings for MultiTensorMessage --- morpheus/_lib/src/python_modules/messages.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 25eae8b790..77c667bd1c 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -309,6 +309,19 @@ PYBIND11_MODULE(messages, m) .def_property( "probs", &ResponseMemoryProbsInterfaceProxy::get_probs, &ResponseMemoryProbsInterfaceProxy::set_probs); + py::class_>(m, "MultiTensorMessage") + .def(py::init<>(&MultiTensorMessageInterfaceProxy::init), + py::arg("meta"), + py::arg("mess_offset"), + py::arg("mess_count"), + py::arg("memory"), + py::arg("offset"), + py::arg("count")) + .def_property_readonly("memory", &MultiTensorMessageInterfaceProxy::memory) + .def_property_readonly("offset", &MultiTensorMessageInterfaceProxy::offset) + .def_property_readonly("count", &MultiTensorMessageInterfaceProxy::count) + .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor); + py::class_>(m, "MultiResponseMessage") .def(py::init<>(&MultiResponseMessageInterfaceProxy::init), py::arg("meta"), From 83335773b73eb36765a5001867e765d626c50bda Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 10:27:47 -0800 Subject: [PATCH 64/85] wip --- morpheus/messages/multi_inference_message.py | 31 +++++--------------- morpheus/messages/multi_response_message.py | 3 -- 2 files changed, 8 insertions(+), 26 deletions(-) diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index ca806cf4c2..16f486fde4 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -19,15 +19,14 @@ import cupy as cp import morpheus._lib.messages as _messages -from morpheus.messages.memory.inference_memory import InferenceMemory -from morpheus.messages.multi_message import MultiMessage +from morpheus.messages.multi_tensor_message import MultiTensorMessage @dataclasses.dataclass -class MultiInferenceMessage(MultiMessage, cpp_class=_messages.MultiInferenceMessage): +class MultiInferenceMessage(MultiTensorMessage, cpp_class=_messages.MultiInferenceMessage): """ This is a container class that holds the InferenceMemory container and the metadata of the data contained - within it. Builds on top of the `MultiMessage` class to add additional data for inferencing. + within it. Builds on top of the `MultiTensorMessage` class to add additional data for inferencing. This class requires two separate memory blocks for a batch. One for the message metadata (i.e., start time, IP address, etc.) and another for the raw inference inputs (i.e., input_ids, seq_ids). Since there can be @@ -35,25 +34,12 @@ class MultiInferenceMessage(MultiMessage, cpp_class=_messages.MultiInferenceMess inference requests) this class stores two different offset and count values. `mess_offset` and `mess_count` refer to the offset and count in the message metadata batch and `offset` and `count` index into the inference batch data. - - Parameters - ---------- - memory : `InferenceMemory` - Inference memory. - offset : int - Message offset in inference memory instance. - count : int - Message count in inference memory instance. - """ - memory: InferenceMemory = dataclasses.field(repr=False) - offset: int - count: int @property def inputs(self): """ - Get inputs stored in the InferenceMemory container. + Get inputs stored in the InferenceMemory container. Alias for `MultiInferenceMessage.tensors`. Returns ------- @@ -61,8 +47,7 @@ def inputs(self): Inference inputs. """ - tensors = self.memory.get_tensors() - return {key: self.get_input(key) for key in tensors.keys()} + return self.tensors def __getstate__(self): return self.__dict__ @@ -71,11 +56,11 @@ def __setstate__(self, d): self.__dict__ = d def __getattr__(self, name: str) -> typing.Any: - return self.get_input(name) + return self.get_tensor(name) def get_input(self, name: str): """ - Get input stored in the InferenceMemory container. + Get input stored in the InferenceMemory container. Alias for `MultiInferenceMessage.get_tensor`. Parameters ---------- @@ -92,7 +77,7 @@ def get_input(self, name: str): AttributeError When no matching input tensor exists. """ - return self.memory.get_input(name)[self.offset:self.offset + self.count, :] + return self.get_tensor(name) def get_slice(self, start, stop): """ diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 742d895cdd..26442f13d2 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -17,9 +17,6 @@ import typing import morpheus._lib.messages as _messages -from morpheus.messages.memory.response_memory import ResponseMemory -from morpheus.messages.message_meta import MessageMeta -from morpheus.messages.multi_message import MultiMessage from morpheus.messages.multi_tensor_message import MultiTensorMessage From fc4174111f4e2b05acf11a088ed78220d30e252c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 10:56:49 -0800 Subject: [PATCH 65/85] cleanups --- morpheus/messages/__init__.py | 2 ++ morpheus/messages/memory/inference_memory.py | 15 ++++----------- morpheus/messages/memory/response_memory.py | 16 ++++------------ morpheus/messages/memory/tensor_memory.py | 20 ++++++++++++-------- morpheus/messages/multi_inference_message.py | 2 -- tests/test_messages.py | 5 +++++ 6 files changed, 27 insertions(+), 33 deletions(-) diff --git a/morpheus/messages/__init__.py b/morpheus/messages/__init__.py index a966af5bfc..cfdf70cff8 100644 --- a/morpheus/messages/__init__.py +++ b/morpheus/messages/__init__.py @@ -29,6 +29,7 @@ from morpheus.messages.message_meta import MessageMeta from morpheus.messages.message_meta import UserMessageMeta from morpheus.messages.multi_message import MultiMessage +from morpheus.messages.multi_tensor_message import MultiTensorMessage from morpheus.messages.multi_ae_message import MultiAEMessage from morpheus.messages.multi_inference_message import MultiInferenceFILMessage from morpheus.messages.multi_inference_message import MultiInferenceMessage @@ -52,6 +53,7 @@ "MultiResponseAEMessage", "MultiResponseMessage", "MultiResponseProbsMessage", + "MultiTensorMessage", "ResponseMemory", "ResponseMemoryAE", "ResponseMemoryProbs", diff --git a/morpheus/messages/memory/inference_memory.py b/morpheus/messages/memory/inference_memory.py index ac35c12545..c522248ce1 100644 --- a/morpheus/messages/memory/inference_memory.py +++ b/morpheus/messages/memory/inference_memory.py @@ -31,8 +31,7 @@ class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): def get_input(self, name: str): """ - Getter function used with DataClassProp for getting inference input from message containers derived - from InferenceMemory. + Get the Tensor stored in the container identified by `name`. Alias for `InferenceMemory.get_tensor`. Parameters ---------- @@ -49,15 +48,11 @@ def get_input(self, name: str): AttributeError If input name does not exist in message container. """ - try: - return self.get_tensor(name) - except KeyError: - raise AttributeError + return self.get_tensor(name) def set_input(self, name: str, value: cp.ndarray): """ - Setter function used with DataClassProp for setting inference input in message containers derived - from InferenceMemory. + Update the input tensor identified by `name`. Alias for `InferenceMemory.set_tensor` Parameters ---------- @@ -66,9 +61,7 @@ def set_input(self, name: str, value: cp.ndarray): value : cupy.ndarray Value to set for input. """ - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - self.set_tensor(name, tensor) + self.set_tensor(name, value) @dataclasses.dataclass(init=False) diff --git a/morpheus/messages/memory/response_memory.py b/morpheus/messages/memory/response_memory.py index 12a9b95ca5..0c5daa5ecd 100644 --- a/morpheus/messages/memory/response_memory.py +++ b/morpheus/messages/memory/response_memory.py @@ -28,8 +28,7 @@ class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): def get_output(self, name: str): """ - Getter function used with DataClassProp for getting inference output from message containers derived - from ResponseMemory. + Get the Tensor stored in the container identified by `name`. Alias for `ResponseMemory.get_tensor`. Parameters ---------- @@ -47,15 +46,11 @@ def get_output(self, name: str): If output name does not exist in message container. """ - try: - return self.get_tensor(name) - except KeyError: - raise AttributeError + return self.get_tensor(name) def set_output(self, name: str, value: cp.ndarray): """ - Setter function used with DataClassProp for setting output in message containers derived - from ResponseMemory. + Update the output tensor identified by `name`. Alias for `ResponseMemory.set_tensor` Parameters ---------- @@ -64,10 +59,7 @@ def set_output(self, name: str, value: cp.ndarray): value : cupy.ndarray Value to set for input. """ - - # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) - tensor = value if value.ndim == 2 else cp.reshape(value, (value.shape[0], -1)) - self.set_tensor(name, tensor) + self.set_tensor(name, value) @dataclasses.dataclass(init=False) diff --git a/morpheus/messages/memory/tensor_memory.py b/morpheus/messages/memory/tensor_memory.py index 81d8d9d9c0..43f42f9f14 100644 --- a/morpheus/messages/memory/tensor_memory.py +++ b/morpheus/messages/memory/tensor_memory.py @@ -85,7 +85,7 @@ def set_tensors(self, tensors: typing.Dict[str, cp.ndarray]): def get_tensor(self, name: str): """ - Get the Tensor stored in the TensorMemory container identified by `name`. + Get the Tensor stored in the container identified by `name`. Parameters ---------- @@ -99,20 +99,24 @@ def get_tensor(self, name: str): Raises ------ - KeyError - When no matching tensor exists. + AttributeError + If output name does not exist in message container. """ - return self._tensors[name] + try: + return self._tensors[name] + except KeyError: + raise AttributeError def set_tensor(self, name: str, tensor: cp.ndarray): """ - Update the tensor identified by `name`. If the length of the tensor has changed, then the `count` - property should also be updated. + Update the tensor identified by `name`. Parameters ---------- tensor : cupy.ndarray Tensory as a CuPy Array. """ - self._check_tensor(tensor) - self._tensors[name] = tensor + # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) + reshaped_tensor = tensor if tensor.ndim == 2 else cp.reshape(tensor, (tensor.shape[0], -1)) + self._check_tensor(reshaped_tensor) + self._tensors[name] = reshaped_tensor diff --git a/morpheus/messages/multi_inference_message.py b/morpheus/messages/multi_inference_message.py index 16f486fde4..20ba5ce117 100644 --- a/morpheus/messages/multi_inference_message.py +++ b/morpheus/messages/multi_inference_message.py @@ -16,8 +16,6 @@ import dataclasses import typing -import cupy as cp - import morpheus._lib.messages as _messages from morpheus.messages.multi_tensor_message import MultiTensorMessage diff --git a/tests/test_messages.py b/tests/test_messages.py index cfcfe85fa9..6c82a37dcb 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -68,6 +68,11 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): # No C++ impl, should always get the Python class check_message(messages.InferenceMemoryAE, None, should_be_cpp, no_cpp_class, (1, cp_array, cp_array)) + check_message(messages.MultiTensorMessage, + _messages.MultiTensorMessage, + should_be_cpp, + no_cpp_class, (None, 0, 1, None, 0, 1)) + check_message(messages.MultiInferenceMessage, _messages.MultiInferenceMessage, should_be_cpp, From 05228233c75b5dd71819700d1c276305fb5ee9e1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 11:04:03 -0800 Subject: [PATCH 66/85] cleanups --- morpheus/messages/memory/inference_memory.py | 16 ++++++++-------- morpheus/messages/memory/response_memory.py | 15 ++++++++++----- morpheus/messages/memory/tensor_memory.py | 9 +++++++-- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/morpheus/messages/memory/inference_memory.py b/morpheus/messages/memory/inference_memory.py index c522248ce1..54ac64e8d0 100644 --- a/morpheus/messages/memory/inference_memory.py +++ b/morpheus/messages/memory/inference_memory.py @@ -31,12 +31,12 @@ class InferenceMemory(TensorMemory, cpp_class=_messages.InferenceMemory): def get_input(self, name: str): """ - Get the Tensor stored in the container identified by `name`. Alias for `InferenceMemory.get_tensor`. + Get the tensor stored in the container identified by `name`. Alias for `InferenceMemory.get_tensor`. Parameters ---------- name : str - Key used to do lookup in inputs dict of message container. + Key used to do lookup in inputs dict of the container. Returns ------- @@ -46,22 +46,22 @@ def get_input(self, name: str): Raises ------ AttributeError - If input name does not exist in message container. + If input name does not exist in the container. """ return self.get_tensor(name) - def set_input(self, name: str, value: cp.ndarray): + def set_input(self, name: str, tensor: cp.ndarray): """ Update the input tensor identified by `name`. Alias for `InferenceMemory.set_tensor` Parameters ---------- name : str - Key used to do lookup in inputs dict of message container. - value : cupy.ndarray - Value to set for input. + Key used to do lookup in inputs dict of the container. + tensor : cupy.ndarray + Tensor as a CuPy array. """ - self.set_tensor(name, value) + self.set_tensor(name, tensor) @dataclasses.dataclass(init=False) diff --git a/morpheus/messages/memory/response_memory.py b/morpheus/messages/memory/response_memory.py index 0c5daa5ecd..560d8668c4 100644 --- a/morpheus/messages/memory/response_memory.py +++ b/morpheus/messages/memory/response_memory.py @@ -48,18 +48,23 @@ def get_output(self, name: str): """ return self.get_tensor(name) - def set_output(self, name: str, value: cp.ndarray): + def set_output(self, name: str, tensor: cp.ndarray): """ Update the output tensor identified by `name`. Alias for `ResponseMemory.set_tensor` Parameters ---------- name : str - Key used to do lookup in tensors dict of message container. - value : cupy.ndarray - Value to set for input. + Key used to do lookup in tensors dict of the container. + tensor : cupy.ndarray + Tensor as a CuPy array. + + Raises + ------ + ValueError + If the number of rows in `tensor` does not match `count` """ - self.set_tensor(name, value) + self.set_tensor(name, tensor) @dataclasses.dataclass(init=False) diff --git a/morpheus/messages/memory/tensor_memory.py b/morpheus/messages/memory/tensor_memory.py index 43f42f9f14..fde20ee61c 100644 --- a/morpheus/messages/memory/tensor_memory.py +++ b/morpheus/messages/memory/tensor_memory.py @@ -100,7 +100,7 @@ def get_tensor(self, name: str): Raises ------ AttributeError - If output name does not exist in message container. + If tensor name does not exist in the container. """ try: return self._tensors[name] @@ -114,7 +114,12 @@ def set_tensor(self, name: str, tensor: cp.ndarray): Parameters ---------- tensor : cupy.ndarray - Tensory as a CuPy Array. + Tensor as a CuPy array. + + Raises + ------ + ValueError + If the number of rows in `tensor` does not match `count` """ # Ensure that we have 2D array here (`ensure_2d` inserts the wrong axis) reshaped_tensor = tensor if tensor.ndim == 2 else cp.reshape(tensor, (tensor.shape[0], -1)) From 8c748e74005776a554452eaf8c47a55d2608e439 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 11:05:40 -0800 Subject: [PATCH 67/85] Insert license --- morpheus/messages/multi_tensor_message.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/morpheus/messages/multi_tensor_message.py b/morpheus/messages/multi_tensor_message.py index 4145cd9330..0be2b01f48 100644 --- a/morpheus/messages/multi_tensor_message.py +++ b/morpheus/messages/multi_tensor_message.py @@ -1,3 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import dataclasses import typing From 5a3cebb02e4fade8bc5c6dc93fe98c850da84810 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 11:44:43 -0800 Subject: [PATCH 68/85] IWYU fixes --- .../include/morpheus/messages/memory/inference_memory.hpp | 5 ++++- .../morpheus/messages/memory/inference_memory_fil.hpp | 1 - .../include/morpheus/messages/memory/response_memory.hpp | 6 ++++-- .../morpheus/messages/memory/response_memory_probs.hpp | 2 +- .../include/morpheus/messages/memory/tensor_memory.hpp | 2 +- morpheus/_lib/include/morpheus/messages/multi_tensor.hpp | 2 ++ morpheus/_lib/include/morpheus/utilities/cupy_util.hpp | 1 + morpheus/_lib/src/messages/memory/inference_memory.cpp | 4 ++-- .../_lib/src/messages/memory/inference_memory_fil.cpp | 2 -- .../_lib/src/messages/memory/inference_memory_nlp.cpp | 1 - morpheus/_lib/src/messages/memory/response_memory.cpp | 4 ++-- morpheus/_lib/src/messages/memory/tensor_memory.cpp | 8 ++++---- morpheus/_lib/src/python_modules/messages.cpp | 1 - morpheus/_lib/src/stages/preprocess_nlp.cpp | 2 +- 14 files changed, 22 insertions(+), 19 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index bb7f6b61eb..0b51c5d042 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -18,9 +18,12 @@ #pragma once #include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil::tensor_map_t + +#include // for object #include // for size_t -#include +#include // for shared_ptr #include namespace morpheus { diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp index 02f50e832b..33657574f7 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp @@ -25,7 +25,6 @@ #include #include -#include namespace morpheus { /****** Component public implementations *******************/ diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp index a81a7cf6e6..b7565b902a 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp @@ -18,10 +18,12 @@ #pragma once #include "morpheus/messages/memory/tensor_memory.hpp" -#include "morpheus/objects/tensor_object.hpp" // for TensorObject +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil::tensor_map_t + +#include // for object #include // for size_t -#include +#include // for shared_ptr #include namespace morpheus { diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index 2fb45cf44d..90ac71b650 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -18,8 +18,8 @@ #pragma once #include "morpheus/messages/memory/response_memory.hpp" -#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/objects/tensor_object.hpp" +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil::tensor_map_t #include #include diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 46887f22d7..ae3caf2ca0 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -23,7 +23,7 @@ #include // for object #include // for size_t -#include +#include // for shared_ptr #include #include // for pair #include diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp index 1f0f75f592..a6f1182aeb 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp @@ -22,6 +22,8 @@ #include "morpheus/messages/multi.hpp" #include "morpheus/objects/tensor_object.hpp" +#include // for object + #include #include #include diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index e66fd32fe3..48ddc9e991 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -23,6 +23,7 @@ #include #include +#include namespace morpheus { /****** Component public implementations *******************/ diff --git a/morpheus/_lib/src/messages/memory/inference_memory.cpp b/morpheus/_lib/src/messages/memory/inference_memory.cpp index 382280cb84..3fafe20dc0 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory.cpp @@ -17,8 +17,8 @@ #include "morpheus/messages/memory/inference_memory.hpp" -#include // for object -#include +#include +#include // IWYU pragma: keep #include #include // for move diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index b3a1029300..333abd41ea 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -18,7 +18,6 @@ #include "morpheus/messages/memory/inference_memory_fil.hpp" #include "morpheus/messages/memory/inference_memory.hpp" -#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/utilities/cupy_util.hpp" #include @@ -27,7 +26,6 @@ #include // this->tensors is a map #include #include // for runtime_error -#include #include namespace morpheus { diff --git a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp index 538334d34a..33af8f4eff 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_nlp.cpp @@ -18,7 +18,6 @@ #include "morpheus/messages/memory/inference_memory_nlp.hpp" #include "morpheus/messages/memory/inference_memory.hpp" -#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include // for size_type diff --git a/morpheus/_lib/src/messages/memory/response_memory.cpp b/morpheus/_lib/src/messages/memory/response_memory.cpp index 00ae4c0118..bee9c28659 100644 --- a/morpheus/_lib/src/messages/memory/response_memory.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory.cpp @@ -19,8 +19,8 @@ #include "morpheus/utilities/cupy_util.hpp" -#include // for object -#include +#include +#include // IWYU pragma: keep #include #include // for move diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index e3b3554b43..6cacfc0fa0 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -15,14 +15,14 @@ * limitations under the License. */ -#include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" // IWYU pragma: associated #include "morpheus/utilities/cupy_util.hpp" -#include -#include // for key_error & object -#include +#include +#include // IWYU pragma: keep +#include #include #include // for std::length_error #include diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 77c667bd1c..588276a63e 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -32,7 +32,6 @@ #include "morpheus/objects/data_table.hpp" #include "morpheus/objects/mutable_table_ctx_mgr.hpp" #include "morpheus/utilities/cudf_util.hpp" -#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil #include #include // for Status diff --git a/morpheus/_lib/src/stages/preprocess_nlp.cpp b/morpheus/_lib/src/stages/preprocess_nlp.cpp index 9db112c57f..49594ac97d 100644 --- a/morpheus/_lib/src/stages/preprocess_nlp.cpp +++ b/morpheus/_lib/src/stages/preprocess_nlp.cpp @@ -18,12 +18,12 @@ #include "morpheus/stages/preprocess_nlp.hpp" #include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory -#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t #include "morpheus/messages/multi_inference.hpp" #include "morpheus/objects/dtype.hpp" #include "morpheus/objects/table_info.hpp" // for TableInfo #include "morpheus/objects/tensor.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject +#include "morpheus/utilities/cupy_util.hpp" // for CupyUtil::tensor_map_t #include // for column, column::contents #include // for strings_column_view From 3635018bbab110cf2fcb51bcfbd764e225a56e16 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 28 Feb 2023 11:46:40 -0800 Subject: [PATCH 69/85] Formatting --- morpheus/_lib/src/stages/triton_inference.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 7aa03bdf23..a48483d352 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -28,7 +28,7 @@ #include "morpheus/objects/triton_in_out.hpp" #include "morpheus/utilities/cupy_util.hpp" #include "morpheus/utilities/matx_util.hpp" -#include "morpheus/utilities/stage_util.hpp" // for foreach_map +#include "morpheus/utilities/stage_util.hpp" // for foreach_map #include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR #include "morpheus/utilities/tensor_util.hpp" // for get_elem_count From 3c5ead2a4f3d3e3207ad9cb341825d3b4379c2a2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 1 Mar 2023 14:00:12 -0800 Subject: [PATCH 70/85] Fix merge error --- morpheus/messages/memory/response_memory.py | 8 ++++++++ morpheus/messages/multi_response_message.py | 7 +++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/morpheus/messages/memory/response_memory.py b/morpheus/messages/memory/response_memory.py index 560d8668c4..4c2b993689 100644 --- a/morpheus/messages/memory/response_memory.py +++ b/morpheus/messages/memory/response_memory.py @@ -14,12 +14,16 @@ # limitations under the License. import dataclasses +import logging import cupy as cp import morpheus._lib.messages as _messages from morpheus.messages.data_class_prop import DataClassProp from morpheus.messages.memory.tensor_memory import TensorMemory +from morpheus.utils import logger as morpheus_logger + +logger = logging.getLogger(__name__) @dataclasses.dataclass(init=False) @@ -79,6 +83,10 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory.get_output, ResponseMemory.set_output) + def __new__(cls, *args, **kwargs): + morpheus_logger.deprecated_message_warning(logger, cls, ResponseMemory) + return super(ResponseMemory, cls).__new__(cls, *args, **kwargs) + def __init__(self, count: int, probs: cp.ndarray): super().__init__(count, tensors={'probs': probs}) diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 5547c61337..bcd615fd22 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -14,11 +14,14 @@ # limitations under the License. import dataclasses +import logging import typing import morpheus._lib.messages as _messages from morpheus.messages.multi_tensor_message import MultiTensorMessage -from morpheus.utils.logger import deprecated_message_warning +from morpheus.utils import logger as morpheus_logger + +logger = logging.getLogger(__name__) @dataclasses.dataclass @@ -150,7 +153,7 @@ def probs(self): return self.get_output("probs") def __new__(cls, *args, **kwargs): - deprecated_message_warning(logger, cls, MultiResponseMessage) + morpheus_logger.deprecated_message_warning(logger, cls, MultiResponseMessage) return super(MultiResponseMessage, cls).__new__(cls, *args, **kwargs) From 2f8b0115f8ffc0275875b0cc39070364f193ed49 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 1 Mar 2023 14:00:52 -0800 Subject: [PATCH 71/85] Save the response output to a local variable to avoid fetching it inside of a loop --- morpheus/stages/inference/inference_stage.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index b392df470d..b930e7ea2c 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -401,6 +401,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re # assert inf.mess_offset == saved_offset + saved_count probs = memory.get_output("probs") + resp_probs = res.get_output('probs') seq_offset = inf.seq_ids[0, 0].item() seq_count = inf.seq_ids[-1, 0].item() + 1 - seq_offset @@ -410,7 +411,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re assert seq_count == res.count # In message and out message have same count. Just use probs as is - probs[seq_offset:seq_offset + seq_count, :] = res.get_output('probs') + probs[seq_offset:seq_offset + seq_count, :] = resp_probs else: assert inf.count == res.count @@ -418,7 +419,7 @@ def _convert_one_response(memory: ResponseMemory, inf: MultiInferenceMessage, re # Out message has more reponses, so we have to do key based blending of probs for i, idx in enumerate(mess_ids): - probs[idx, :] = cp.maximum(probs[idx, :], res.get_output('probs')[i, :]) + probs[idx, :] = cp.maximum(probs[idx, :], resp_probs[i, :]) return MultiResponseMessage(meta=inf.meta, mess_offset=inf.mess_offset, From 905ab4645682013fac2c0d6c93d0a746dd2cfacb Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 1 Mar 2023 14:01:06 -0800 Subject: [PATCH 72/85] Fix tests --- tests/test_inference_stage.py | 11 ++++++----- tests/test_messages.py | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 92e4657e59..209264b3aa 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -288,7 +288,7 @@ def test_convert_response_errors(): mm2.count.side_effect = [2, 1] mm2.mess_count.side_effect = [2, 1, 1] - pytest.raises(ValueError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) + pytest.raises(AssertionError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) @pytest.mark.use_python @@ -308,13 +308,14 @@ def test_convert_one_response(config): assert mem.get_output('probs').tolist() == [[1.0, 2.0, 3.0]] # Test for the second branch + inf.mess_count = 1 inf.count = 2 - inf.seq_ids = cp.array([[0], [1]]) - res = ResponseMemory(count=1, tensors={'probs': cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])}) + inf.seq_ids = cp.array([[0], [0]]) + res = ResponseMemory(count=2, tensors={'probs': cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])}) - mem = ResponseMemory(1, tensors={'probs': cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])}) + mem = ResponseMemory(1, tensors={'probs': cp.array([[4.5, 6.7, 8.9]])}) mpm = inference_stage.InferenceStage._convert_one_response(mem, inf, res) - assert mem.get_output('probs').tolist() == [[0.1, 0.6, 0.8], [5.6, 6.7, 9.2]] + assert mem.get_output('probs').tolist() == [[5.6, 6.7, 9.2]] def test_convert_one_response_error(): diff --git a/tests/test_messages.py b/tests/test_messages.py index 02bf0472be..6dfb91a606 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -26,7 +26,7 @@ from morpheus.messages.memory import tensor_memory -@mock.patch('morpheus.messages.multi_response_message.deprecated_message_warning') +@mock.patch('morpheus.utils.logger.deprecated_message_warning') def check_message(python_type: type, cpp_type: type, should_be_cpp: bool, @@ -86,7 +86,8 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): check_message(messages.MultiTensorMessage, _messages.MultiTensorMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) check_message(messages.MultiInferenceMessage, _messages.MultiInferenceMessage, From 79f117fff84abdbf4de438f19e6f97f762240085 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 1 Mar 2023 14:11:58 -0800 Subject: [PATCH 73/85] IWYU --- .../_lib/include/morpheus/stages/add_classification.hpp | 2 +- morpheus/_lib/include/morpheus/stages/add_scores.hpp | 2 +- morpheus/_lib/include/morpheus/stages/triton_inference.hpp | 2 +- morpheus/_lib/src/stages/triton_inference.cpp | 7 +++---- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index 155e4eb251..ee299e5d1d 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -17,7 +17,7 @@ #pragma once -#include "morpheus/messages/multi_response_probs.hpp" +#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage #include #include diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index f8da91929f..e2c5d9f3f9 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -17,7 +17,7 @@ #pragma once -#include "morpheus/messages/multi_response_probs.hpp" +#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage #include #include diff --git a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp index da0b720e4d..10afa15759 100644 --- a/morpheus/_lib/include/morpheus/stages/triton_inference.hpp +++ b/morpheus/_lib/include/morpheus/stages/triton_inference.hpp @@ -18,7 +18,7 @@ #pragma once #include "morpheus/messages/multi_inference.hpp" -#include "morpheus/messages/multi_response_probs.hpp" +#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage #include "morpheus/objects/triton_in_out.hpp" #include diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 18775a4188..098c5579d9 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -19,16 +19,15 @@ #include "morpheus/messages/memory/response_memory.hpp" // for ResponseMemory #include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t -#include "morpheus/messages/multi_response_probs.hpp" -#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo -#include "morpheus/objects/dtype.hpp" // for DType +#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo +#include "morpheus/objects/dtype.hpp" // for DType #include "morpheus/objects/rmm_tensor.hpp" #include "morpheus/objects/tensor.hpp" #include "morpheus/objects/tensor_object.hpp" // for TensorIndex, TensorObject #include "morpheus/objects/triton_in_out.hpp" #include "morpheus/utilities/cupy_util.hpp" #include "morpheus/utilities/matx_util.hpp" -#include "morpheus/utilities/stage_util.hpp" // for foreach_map +#include "morpheus/utilities/stage_util.hpp" // for foreach_map #include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR #include "morpheus/utilities/tensor_util.hpp" // for get_elem_count From 54e467cd426919f64e7b4e4822a53489270c43bb Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 7 Mar 2023 11:03:52 -0800 Subject: [PATCH 74/85] Fix merge error [no ci] --- .../messages/memory/inference_memory_fil.cpp | 102 +++++++++--------- 1 file changed, 49 insertions(+), 53 deletions(-) diff --git a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp index 4960f66fec..123ca62d6c 100644 --- a/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp +++ b/morpheus/_lib/src/messages/memory/inference_memory_fil.cpp @@ -18,11 +18,7 @@ #include "morpheus/messages/memory/inference_memory_fil.hpp" #include "morpheus/messages/memory/inference_memory.hpp" -<<<<<<< HEAD -#include "morpheus/utilities/cupy_util.hpp" - ======= #include "morpheus/utilities/cupy_util.hpp" // for CupyUtil - >>>>>>> branch-23.03 #include #include @@ -30,63 +26,63 @@ #include #include - namespace morpheus +namespace morpheus { +/****** Component public implementations *******************/ +/****** InferenceMemoryFIL****************************************/ +InferenceMemoryFIL::InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids) : + InferenceMemory(count) { - /****** Component public implementations *******************/ - /****** InferenceMemoryFIL****************************************/ - InferenceMemoryFIL::InferenceMemoryFIL(size_t count, TensorObject && input__0, TensorObject && seq_ids) : - InferenceMemory(count) - { - set_tensor("input__0", std::move(input__0)); - set_tensor("seq_ids", std::move(seq_ids)); - } + set_tensor("input__0", std::move(input__0)); + set_tensor("seq_ids", std::move(seq_ids)); +} - const TensorObject& InferenceMemoryFIL::get_input__0() const - { - return get_tensor("input__0"); - } +const TensorObject& InferenceMemoryFIL::get_input__0() const +{ + return get_tensor("input__0"); +} - void InferenceMemoryFIL::set_input__0(TensorObject && input__0) - { - set_tensor("input__0", std::move(input__0)); - } +void InferenceMemoryFIL::set_input__0(TensorObject&& input__0) +{ + set_tensor("input__0", std::move(input__0)); +} - const TensorObject& InferenceMemoryFIL::get_seq_ids() const - { - return get_tensor("seq_ids"); - } +const TensorObject& InferenceMemoryFIL::get_seq_ids() const +{ + return get_tensor("seq_ids"); +} - void InferenceMemoryFIL::set_seq_ids(TensorObject && seq_ids) - { - set_tensor("seq_ids", std::move(seq_ids)); - } +void InferenceMemoryFIL::set_seq_ids(TensorObject&& seq_ids) +{ + set_tensor("seq_ids", std::move(seq_ids)); +} - /****** InferenceMemoryFILInterfaceProxy *************************/ - std::shared_ptr InferenceMemoryFILInterfaceProxy::init( - cudf::size_type count, pybind11::object input__0, pybind11::object seq_ids) - { - // Convert the cupy arrays to tensors - return std::make_shared( - count, std::move(CupyUtil::cupy_to_tensor(input__0)), std::move(CupyUtil::cupy_to_tensor(seq_ids))); - } +/****** InferenceMemoryFILInterfaceProxy *************************/ +std::shared_ptr InferenceMemoryFILInterfaceProxy::init(cudf::size_type count, + pybind11::object input__0, + pybind11::object seq_ids) +{ + // Convert the cupy arrays to tensors + return std::make_shared( + count, std::move(CupyUtil::cupy_to_tensor(input__0)), std::move(CupyUtil::cupy_to_tensor(seq_ids))); +} - pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL & self) - { - return get_tensor_property(self, "input__0"); - } +pybind11::object InferenceMemoryFILInterfaceProxy::get_input__0(InferenceMemoryFIL& self) +{ + return get_tensor_property(self, "input__0"); +} - void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL & self, pybind11::object cupy_values) - { - self.set_input__0(CupyUtil::cupy_to_tensor(cupy_values)); - } +void InferenceMemoryFILInterfaceProxy::set_input__0(InferenceMemoryFIL& self, pybind11::object cupy_values) +{ + self.set_input__0(CupyUtil::cupy_to_tensor(cupy_values)); +} - pybind11::object InferenceMemoryFILInterfaceProxy::get_seq_ids(InferenceMemoryFIL & self) - { - return get_tensor_property(self, "seq_ids"); - } +pybind11::object InferenceMemoryFILInterfaceProxy::get_seq_ids(InferenceMemoryFIL& self) +{ + return get_tensor_property(self, "seq_ids"); +} - void InferenceMemoryFILInterfaceProxy::set_seq_ids(InferenceMemoryFIL & self, pybind11::object cupy_values) - { - return self.set_seq_ids(CupyUtil::cupy_to_tensor(cupy_values)); - } +void InferenceMemoryFILInterfaceProxy::set_seq_ids(InferenceMemoryFIL& self, pybind11::object cupy_values) +{ + return self.set_seq_ids(CupyUtil::cupy_to_tensor(cupy_values)); +} } // namespace morpheus From d7c3b74e26bc8e7c55f019401cf0f61a73a31e86 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 7 Mar 2023 11:08:28 -0800 Subject: [PATCH 75/85] Fix merge error [no ci] --- morpheus/_lib/src/python_modules/messages.cpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index abf7ae2b23..ddce249116 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -277,19 +277,6 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("input__0", &MultiInferenceFILMessageInterfaceProxy::input__0) .def_property_readonly("seq_ids", &MultiInferenceFILMessageInterfaceProxy::seq_ids); - py::class_>(m, "MultiTensorMessage") - .def(py::init<>(&MultiTensorMessageInterfaceProxy::init), - py::arg("meta"), - py::arg("mess_offset"), - py::arg("mess_count"), - py::arg("memory"), - py::arg("offset"), - py::arg("count")) - .def_property_readonly("memory", &MultiTensorMessageInterfaceProxy::memory) - .def_property_readonly("offset", &MultiTensorMessageInterfaceProxy::offset) - .def_property_readonly("count", &MultiTensorMessageInterfaceProxy::count) - .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor); - py::class_>(m, "MultiResponseMessage") .def(py::init<>(&MultiResponseMessageInterfaceProxy::init), py::arg("meta"), From 94fc8eb1dadfc38bf666e9d0a29fc17665cfe674 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Tue, 7 Mar 2023 11:27:37 -0800 Subject: [PATCH 76/85] Fix tests [no ci] --- tests/test_inference_stage.py | 8 ++++---- tests/test_messages.py | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 196f4e3d7b..ab962c1ec2 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -288,7 +288,7 @@ def test_convert_response_errors(): mm2.count.side_effect = [2, 1] mm2.mess_count.side_effect = [2, 1, 1] - pytest.raises(ValueError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) + pytest.raises(AssertionError, inference_stage.InferenceStage._convert_response, ([mm1, mm2], [out_msg1, out_msg2])) @pytest.mark.use_python @@ -310,11 +310,11 @@ def test_convert_one_response(config): # Test for the second branch inf.count = 2 inf.seq_ids = cp.array([[0], [1]]) - res = ResponseMemory(count=2, probs=cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])) + res = ResponseMemory(count=2, tensors={'probs': cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2]])}) - mem = ResponseMemory(2, probs=cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])) + mem = ResponseMemory(2, tensors={'probs': cp.array([[0.1, 0.5, 0.8], [4.5, 6.7, 8.9]])}) mpm = inference_stage.InferenceStage._convert_one_response(mem, inf, res) - assert mem.get_output('probs').tolist() == [[5.6, 6.7, 9.2]] + assert mem.get_output('probs').tolist() == [[0.1, 0.6, 0.8], [5.6, 6.7, 9.2]] def test_convert_one_response_error(): diff --git a/tests/test_messages.py b/tests/test_messages.py index 7b431b83dd..904293cb73 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -92,7 +92,8 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): check_message(messages.MultiTensorMessage, _messages.MultiTensorMessage, should_be_cpp, - no_cpp_class, (None, 0, 1, None, 0, 1)) + no_cpp_class, (None, 0, 1, None, 0, 1), + False) check_message(messages.MultiInferenceMessage, _messages.MultiInferenceMessage, From a838d0b0fb23a4548af737aa14af544744787346 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 13:07:12 -0600 Subject: [PATCH 77/85] Almost all tests are passing --- .../morpheus/messages/multi_inference.hpp | 17 ++- .../morpheus/messages/multi_inference_fil.hpp | 19 +-- .../morpheus/messages/multi_inference_nlp.hpp | 16 ++- .../morpheus/messages/multi_response.hpp | 29 +++- .../messages/multi_response_probs.hpp | 18 ++- .../morpheus/messages/multi_tensor.hpp | 15 +- .../morpheus/stages/add_classification.hpp | 21 +-- .../include/morpheus/stages/add_scores.hpp | 14 +- .../morpheus/utilities/string_util.hpp | 23 +++ .../_lib/src/messages/multi_inference.cpp | 23 +-- .../_lib/src/messages/multi_inference_fil.cpp | 15 +- .../_lib/src/messages/multi_inference_nlp.cpp | 14 +- morpheus/_lib/src/messages/multi_response.cpp | 55 +++++++- .../src/messages/multi_response_probs.cpp | 26 +++- morpheus/_lib/src/messages/multi_tensor.cpp | 69 ++++++--- morpheus/_lib/src/python_modules/messages.cpp | 44 ++++-- morpheus/_lib/src/python_modules/stages.cpp | 12 +- .../_lib/src/stages/add_classification.cpp | 33 ++--- morpheus/_lib/src/stages/add_scores.cpp | 30 ++-- morpheus/_lib/src/stages/triton_inference.cpp | 1 - morpheus/_lib/src/utilities/string_util.cpp | 3 + morpheus/messages/__init__.py | 2 + morpheus/messages/memory/inference_memory.py | 12 +- morpheus/messages/memory/response_memory.py | 16 +-- morpheus/messages/memory/tensor_memory.py | 10 +- morpheus/messages/multi_message.py | 6 + morpheus/messages/multi_response_message.py | 58 +++++--- morpheus/messages/multi_tensor_message.py | 12 +- .../inference/auto_encoder_inference_stage.py | 10 +- .../inference/identity_inference_stage.py | 13 +- morpheus/stages/inference/inference_stage.py | 24 ++-- .../inference/pytorch_inference_stage.py | 6 +- .../inference/triton_inference_stage.py | 22 +-- .../postprocess/add_classifications_stage.py | 95 ++----------- .../stages/postprocess/add_scores_stage.py | 89 ++---------- .../postprocess/add_scores_stage_base.py | 133 ++++++++++++++++++ .../postprocess/generate_viz_frames_stage.py | 2 +- .../stages/postprocess/ml_flow_drift_stage.py | 2 +- morpheus/utils/logger.py | 5 +- tests/test_add_classifications_stage.py | 73 ++++++---- tests/test_add_scores_stage.py | 68 +++++---- tests/test_inference_stage.py | 26 ++-- tests/test_messages.py | 28 ++-- tests/test_multi_message.py | 46 ++++++ tests/test_tensor_memory.py | 26 ++-- tests/utils.py | 2 + 46 files changed, 786 insertions(+), 497 deletions(-) create mode 100644 morpheus/stages/postprocess/add_scores_stage_base.py diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp index 9d74fc1499..dc7a5c6e66 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp @@ -18,6 +18,7 @@ #pragma once #include "morpheus/messages/memory/inference_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_tensor.hpp" @@ -62,11 +63,12 @@ class MultiInferenceMessage : public DerivedMultiMessage meta, - TensorIndex mess_offset = 0, - TensorIndex mess_count = -1, - std::shared_ptr memory = nullptr, - TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex mess_offset = 0, + TensorIndex mess_count = -1, + std::shared_ptr memory = nullptr, + TensorIndex offset = 0, + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids"); /** * @brief Returns the input tensor for the given `name`. @@ -113,9 +115,10 @@ struct MultiInferenceMessageInterfaceProxy : public MultiTensorMessageInterfaceP static std::shared_ptr init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name); }; #pragma GCC visibility pop /** @} */ // end of group diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp index 67203217ec..51427659e2 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp @@ -18,7 +18,8 @@ #pragma once #include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory -#include "morpheus/messages/meta.hpp" // for MessageMeta +#include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/messages/meta.hpp" // for MessageMeta #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_inference.hpp" #include "morpheus/objects/tensor_object.hpp" @@ -59,11 +60,12 @@ class MultiInferenceFILMessage : public DerivedMultiMessage meta, - TensorIndex mess_offset = 0, - TensorIndex mess_count = -1, - std::shared_ptr memory = nullptr, - TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex mess_offset = 0, + TensorIndex mess_count = -1, + std::shared_ptr memory = nullptr, + TensorIndex offset = 0, + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids"); /** * @brief Returns the 'input__0' tensor, throws a `std::runtime_error` if it does not exist @@ -119,9 +121,10 @@ struct MultiInferenceFILMessageInterfaceProxy : public MultiInferenceMessageInte static std::shared_ptr init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name); /** * @brief Get 'input__0' tensor as a python object diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp index 20e03af43d..e5aa8ed1b4 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp @@ -59,11 +59,12 @@ class MultiInferenceNLPMessage : public DerivedMultiMessage meta, - TensorIndex mess_offset = 0, - TensorIndex mess_count = -1, - std::shared_ptr memory = nullptr, - TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex mess_offset = 0, + TensorIndex mess_count = -1, + std::shared_ptr memory = nullptr, + TensorIndex offset = 0, + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids"); /** * @brief Returns the 'input_ids' tensor, throws a `std::runtime_error` if it does not exist. @@ -135,9 +136,10 @@ struct MultiInferenceNLPMessageInterfaceProxy : public MultiInferenceMessageInte static std::shared_ptr init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name); /** * @brief Get 'input_ids' tensor as a python object diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index 70bf5ce5ab..b62bcb920e 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -18,6 +18,7 @@ #pragma once #include "morpheus/messages/memory/response_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_tensor.hpp" @@ -65,11 +66,15 @@ class MultiResponseMessage : public DerivedMultiMessage meta, - TensorIndex mess_offset = 0, - TensorIndex mess_count = -1, - std::shared_ptr memory = nullptr, - TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex mess_offset = 0, + TensorIndex mess_count = -1, + std::shared_ptr memory = nullptr, + TensorIndex offset = 0, + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids", + std::string probs_tensor_name = "probs"); + + std::string probs_tensor_name; /** * @brief Returns the output tensor with the given name. @@ -97,6 +102,8 @@ class MultiResponseMessage : public DerivedMultiMessage init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name); + + static std::string probs_tensor_name_getter(MultiResponseMessage& self); + + static void probs_tensor_name_setter(MultiResponseMessage& self, std::string probs_tensor_name); /** * @brief Returns the output tensor for a given name @@ -133,6 +146,8 @@ struct MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfacePr * @throws pybind11::key_error When no matching tensor exists. */ static pybind11::object get_output(MultiResponseMessage& self, const std::string& name); + + static pybind11::object get_probs_tensor(MultiResponseMessage& self); }; #pragma GCC visibility pop /** @} */ // end of group diff --git a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp index 55971f9566..6579170784 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp @@ -64,11 +64,13 @@ class MultiResponseProbsMessage : public DerivedMultiMessage meta, - TensorIndex mess_offset = 0, - TensorIndex mess_count = -1, - std::shared_ptr memory = nullptr, - TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex mess_offset = 0, + TensorIndex mess_count = -1, + std::shared_ptr memory = nullptr, + TensorIndex offset = 0, + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids", + std::string probs_tensor_name = "probs"); /** * @brief Returns the `probs` (probabilities) output tensor @@ -106,9 +108,11 @@ struct MultiResponseProbsMessageInterfaceProxy : public MultiResponseMessageInte static std::shared_ptr init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name); /** * @brief Return the `probs` (probabilities) output tensor diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp index e9361f4564..c81b8bf568 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp @@ -77,11 +77,13 @@ class MultiTensorMessage : public DerivedMultiMessage memory = nullptr, TensorIndex offset = 0, - TensorIndex count = -1); + TensorIndex count = -1, + std::string id_tensor_name = "seq_ids"); std::shared_ptr memory; TensorIndex offset{0}; TensorIndex count{0}; + std::string id_tensor_name; /** * @brief Returns a tensor with the given name. @@ -111,6 +113,8 @@ class MultiTensorMessage : public DerivedMultiMessage new_message, TensorIndex start, TensorIndex stop) const override; @@ -147,7 +151,8 @@ struct MultiTensorMessageInterfaceProxy TensorIndex mess_count, std::shared_ptr memory, TensorIndex offset, - TensorIndex count); + TensorIndex count, + std::string id_tensor_name); /** * @brief Returns a shared pointer of a tensor memory object @@ -172,6 +177,10 @@ struct MultiTensorMessageInterfaceProxy */ static TensorIndex count(MultiTensorMessage& self); + static std::string id_tensor_name_getter(MultiTensorMessage& self); + + static void id_tensor_name_setter(MultiTensorMessage& self, std::string id_tensor_name); + /** * @brief Returns the tensor tensor for a given name * @@ -182,6 +191,8 @@ struct MultiTensorMessageInterfaceProxy */ static pybind11::object get_tensor(MultiTensorMessage& self, const std::string& name); + static pybind11::object get_id_tensor(MultiTensorMessage& self); + /** * @brief Same as `get_tensor` but used when the method is being bound to a python property * diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index ee299e5d1d..6e7f17ed71 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -61,14 +61,9 @@ class AddClassificationsStage * @brief Construct a new Add Classifications Stage object * * @param threshold : Threshold to consider true/false for each class - * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map - * @param output_name : Name of the output tensor containing probabilities */ - AddClassificationsStage(float threshold, - std::size_t num_class_labels, - std::map idx2label, - std::string output_name = "probs"); + AddClassificationsStage(std::map idx2label, float threshold); private: /** @@ -76,10 +71,10 @@ class AddClassificationsStage */ subscribe_fn_t build_operator(); - float m_threshold; - std::size_t m_num_class_labels; std::map m_idx2label; - std::string m_output_name; + float m_threshold; + + std::size_t m_min_col_count; }; /****** AddClassificationStageInterfaceProxy******************/ @@ -94,19 +89,15 @@ struct AddClassificationStageInterfaceProxy * * @param builder : Pipeline context object reference * @param name : Name of a stage reference - * @param threshold : Threshold to consider true/false for each class - * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map - * @param output_name : Name of the output tensor containing probabilities + * @param threshold : Threshold to consider true/false for each class * @return std::shared_ptr> */ static std::shared_ptr> init( mrc::segment::Builder& builder, const std::string& name, - float threshold, - std::size_t num_class_labels, std::map idx2label, - std::string output_name); + float threshold); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index e2c5d9f3f9..a616e5b6e3 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -62,20 +62,17 @@ class AddScoresStage * * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map - * @param output_name : Name of the output tensor containing probabilities */ - AddScoresStage(std::size_t num_class_labels, - std::map idx2label, - std::string output_name = "probs"); + AddScoresStage(std::map idx2label); /** * TODO(Documentation) */ subscribe_fn_t build_operator(); - std::size_t m_num_class_labels; std::map m_idx2label; - std::string m_output_name; + + std::size_t m_min_col_count; }; /****** AddScoresStageInterfaceProxy******************/ @@ -91,14 +88,11 @@ struct AddScoresStageInterfaceProxy * @param name : Name of a stage reference * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map - * @param output_name : Name of the output tensor containing probabilities * @return std::shared_ptr> */ static std::shared_ptr> init(mrc::segment::Builder& builder, const std::string& name, - std::size_t num_class_labels, - std::map idx2label, - std::string output_name); + std::map idx2label); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/utilities/string_util.hpp b/morpheus/_lib/include/morpheus/utilities/string_util.hpp index 70417ae7b6..5d1d3aa372 100644 --- a/morpheus/_lib/include/morpheus/utilities/string_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/string_util.hpp @@ -65,6 +65,29 @@ struct StringUtil return MORPHEUS_CONCAT_STR("[" << join(begin, end, ", ") << "]"); } + template + static std::string map_to_str(IterT begin, IterT end) + { + std::ostringstream ss; + + ss << "{"; + + if (begin != end) + { + ss << begin->first << ": '" << begin->second << "'"; + ++begin; + } + while (begin != end) + { + ss << ", " << begin->first << ": '" << begin->second << "'"; + ++begin; + } + + ss << "}"; + + return ss.str(); + } + /** * TODO(Documentation) */ diff --git a/morpheus/_lib/src/messages/multi_inference.cpp b/morpheus/_lib/src/messages/multi_inference.cpp index 7e05228d03..65a1384314 100644 --- a/morpheus/_lib/src/messages/multi_inference.cpp +++ b/morpheus/_lib/src/messages/multi_inference.cpp @@ -31,10 +31,11 @@ namespace morpheus { MultiInferenceMessage::MultiInferenceMessage(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : - DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) + TensorIndex count, + std::string id_tensor_name) : + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count, std::move(id_tensor_name)) {} const TensorObject MultiInferenceMessage::get_input(const std::string& name) const @@ -53,16 +54,16 @@ void MultiInferenceMessage::set_input(const std::string& name, const TensorObjec } /****** InterfaceProxy *************************/ -std::shared_ptr MultiInferenceMessageInterfaceProxy::init( - std::shared_ptr meta, - TensorIndex mess_offset, - TensorIndex mess_count, - std::shared_ptr memory, - TensorIndex offset, - TensorIndex count) +std::shared_ptr MultiInferenceMessageInterfaceProxy::init(std::shared_ptr meta, + TensorIndex mess_offset, + TensorIndex mess_count, + std::shared_ptr memory, + TensorIndex offset, + TensorIndex count, + std::string id_tensor_name) { return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + std::move(meta), mess_offset, mess_count, std::move(memory), offset, count, std::move(id_tensor_name)); } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/multi_inference_fil.cpp b/morpheus/_lib/src/messages/multi_inference_fil.cpp index 8c06231e9f..36b6309462 100644 --- a/morpheus/_lib/src/messages/multi_inference_fil.cpp +++ b/morpheus/_lib/src/messages/multi_inference_fil.cpp @@ -18,6 +18,7 @@ #include "morpheus/messages/multi_inference_fil.hpp" #include "morpheus/messages/memory/inference_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_inference.hpp" @@ -31,10 +32,11 @@ namespace morpheus { MultiInferenceFILMessage::MultiInferenceFILMessage(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : - DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) + TensorIndex count, + std::string id_tensor_name) : + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count, std::move(id_tensor_name)) {} const TensorObject MultiInferenceFILMessage::get_input__0() const @@ -62,12 +64,13 @@ std::shared_ptr MultiInferenceFILMessageInterfaceProxy std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) + TensorIndex count, + std::string id_tensor_name) { return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + std::move(meta), mess_offset, mess_count, std::move(memory), offset, count, std::move(id_tensor_name)); } pybind11::object MultiInferenceFILMessageInterfaceProxy::input__0(MultiInferenceFILMessage& self) diff --git a/morpheus/_lib/src/messages/multi_inference_nlp.cpp b/morpheus/_lib/src/messages/multi_inference_nlp.cpp index 195a7d5d5a..7e8721761f 100644 --- a/morpheus/_lib/src/messages/multi_inference_nlp.cpp +++ b/morpheus/_lib/src/messages/multi_inference_nlp.cpp @@ -32,10 +32,11 @@ namespace morpheus { MultiInferenceNLPMessage::MultiInferenceNLPMessage(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : - DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) + TensorIndex count, + std::string id_tensor_name) : + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count, std::move(id_tensor_name)) {} const TensorObject MultiInferenceNLPMessage::get_input_ids() const @@ -73,12 +74,13 @@ std::shared_ptr MultiInferenceNLPMessageInterfaceProxy std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) + TensorIndex count, + std::string id_tensor_name) { return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + std::move(meta), mess_offset, mess_count, std::move(memory), offset, count, std::move(id_tensor_name)); } pybind11::object MultiInferenceNLPMessageInterfaceProxy::input_ids(MultiInferenceNLPMessage& self) diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp index 3bf3543c1b..c8dd060299 100644 --- a/morpheus/_lib/src/messages/multi_response.cpp +++ b/morpheus/_lib/src/messages/multi_response.cpp @@ -31,10 +31,13 @@ namespace morpheus { MultiResponseMessage::MultiResponseMessage(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : - DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name) : + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count, std::move(id_tensor_name)), + probs_tensor_name(std::move(probs_tensor_name)) {} const TensorObject MultiResponseMessage::get_output(const std::string& name) const @@ -52,16 +55,54 @@ void MultiResponseMessage::set_output(const std::string& name, const TensorObjec set_tensor(name, value); } +const TensorObject MultiResponseMessage::get_probs_tensor() const +{ + try + { + return this->get_tensor(this->probs_tensor_name); + } catch (std::runtime_error) + { + // Throw a better error here if we are missing the ID tensor + throw pybind11::key_error{MORPHEUS_CONCAT_STR("Cannot get probabilities tensor. Tensor with name '" + << this->probs_tensor_name + << "' does not exist in the memory object")}; + } +} + /****** MultiResponseMessageInterfaceProxy *************************/ std::shared_ptr MultiResponseMessageInterfaceProxy::init(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name) +{ + return std::make_shared(std::move(meta), + mess_offset, + mess_count, + std::move(memory), + offset, + count, + std::move(id_tensor_name), + std::move(probs_tensor_name)); +} + +std::string MultiResponseMessageInterfaceProxy::probs_tensor_name_getter(MultiResponseMessage& self) +{ + return self.probs_tensor_name; +} + +void MultiResponseMessageInterfaceProxy::probs_tensor_name_setter(MultiResponseMessage& self, + std::string probs_tensor_name) +{ + self.probs_tensor_name = probs_tensor_name; +} + +pybind11::object MultiResponseMessageInterfaceProxy::get_probs_tensor(MultiResponseMessage& self) { - return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + return CupyUtil::tensor_to_cupy(self.get_probs_tensor()); } } // namespace morpheus diff --git a/morpheus/_lib/src/messages/multi_response_probs.cpp b/morpheus/_lib/src/messages/multi_response_probs.cpp index a04142a6b0..13a0711817 100644 --- a/morpheus/_lib/src/messages/multi_response_probs.cpp +++ b/morpheus/_lib/src/messages/multi_response_probs.cpp @@ -31,10 +31,13 @@ namespace morpheus { MultiResponseProbsMessage::MultiResponseProbsMessage(std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : - DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name) : + DerivedMultiMessage( + meta, mess_offset, mess_count, memory, offset, count, std::move(id_tensor_name), std::move(probs_tensor_name)) {} const TensorObject MultiResponseProbsMessage::get_probs() const @@ -55,16 +58,25 @@ std::shared_ptr MultiResponseProbsMessageInterfacePro std::shared_ptr meta, TensorIndex mess_offset, TensorIndex mess_count, - std::shared_ptr memory, + std::shared_ptr memory, TensorIndex offset, - TensorIndex count) + TensorIndex count, + std::string id_tensor_name, + std::string probs_tensor_name) { - return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + return std::make_shared(std::move(meta), + mess_offset, + mess_count, + std::move(memory), + offset, + count, + std::move(id_tensor_name), + std::move(probs_tensor_name)); } pybind11::object MultiResponseProbsMessageInterfaceProxy::probs(MultiResponseProbsMessage& self) { return get_tensor_property(self, "probs"); } + } // namespace morpheus diff --git a/morpheus/_lib/src/messages/multi_tensor.cpp b/morpheus/_lib/src/messages/multi_tensor.cpp index 2ebae47cf6..8fd57f2fc7 100644 --- a/morpheus/_lib/src/messages/multi_tensor.cpp +++ b/morpheus/_lib/src/messages/multi_tensor.cpp @@ -24,7 +24,8 @@ #include // IWYU pragma: keep #include // for MRC_PTR_CAST -#include // for key_error +#include +#include // for key_error #include #include @@ -68,10 +69,12 @@ MultiTensorMessage::MultiTensorMessage(std::shared_ptr meta, TensorIndex mess_count, std::shared_ptr memory, TensorIndex offset, - TensorIndex count) : + TensorIndex count, + std::string id_tensor_name) : DerivedMultiMessage(meta, mess_offset, mess_count), memory(std::move(memory)), - offset(offset) + offset(offset), + id_tensor_name(std::move(id_tensor_name)) { if (!this->memory) { @@ -100,24 +103,26 @@ MultiTensorMessage::MultiTensorMessage(std::shared_ptr meta, } // Finally, perform a consistency check on the seq_ids - if (this->memory->has_tensor("seq_ids")) + if (this->memory->has_tensor(this->id_tensor_name)) { - auto id_tensor = this->memory->get_tensor("seq_ids"); + auto id_tensor = this->memory->get_tensor(this->id_tensor_name); TensorIndex first_element = read_idx_from_tensor(id_tensor, {this->offset, 0}); TensorIndex last_element = read_idx_from_tensor(id_tensor, {this->offset + this->count - 1, 0}); if (first_element != this->mess_offset) { - throw std::runtime_error(MORPHEUS_CONCAT_STR("Inconsistent ID column. First element in 'seq_ids' tensor, [" - << first_element << "], must match mess_offset, [" - << this->mess_offset << "]")); + throw std::runtime_error(MORPHEUS_CONCAT_STR("Inconsistent ID column. First element in '" + << this->id_tensor_name << "' tensor, [" << first_element + << "], must match mess_offset, [" << this->mess_offset + << "]")); } if (last_element != this->mess_offset + this->mess_count - 1) { - throw std::runtime_error(MORPHEUS_CONCAT_STR("Inconsistent ID column. Last element in 'seq_ids' tensor, [" - << last_element << "], must not extend beyond last message, [" + throw std::runtime_error(MORPHEUS_CONCAT_STR("Inconsistent ID column. Last element in '" + << this->id_tensor_name << "' tensor, [" << last_element + << "], must not extend beyond last message, [" << (this->mess_offset + this->mess_count - 1) << "]")); } } @@ -155,6 +160,20 @@ void MultiTensorMessage::set_tensor(const std::string& name, const TensorObject& slice = value; } +const TensorObject MultiTensorMessage::get_id_tensor() const +{ + try + { + return this->get_tensor(this->id_tensor_name); + } catch (std::runtime_error) + { + // Throw a better error here if we are missing the ID tensor + throw pybind11::key_error{MORPHEUS_CONCAT_STR("Cannot get ID tensor. Tensor with name '" + << this->id_tensor_name + << "' does not exist in the memory object")}; + } +} + void MultiTensorMessage::get_slice_impl(std::shared_ptr new_message, TensorIndex start, TensorIndex stop) const @@ -173,21 +192,23 @@ void MultiTensorMessage::get_slice_impl(std::shared_ptr new_messag throw std::out_of_range("Invalid memory `stop` argument"); } - sliced_message->offset = this->offset + start; - sliced_message->count = stop - start; + sliced_message->memory = this->memory; + sliced_message->offset = this->offset + start; + sliced_message->count = stop - start; + sliced_message->id_tensor_name = this->id_tensor_name; if (this->count != this->mess_count) { // If we have more tensor rows than message rows, we need to use the seq_ids to figure out the slicing. This // will be slow and should be avoided at all costs - if (!this->memory->has_tensor("seq_ids")) + if (!this->memory->has_tensor(this->id_tensor_name)) { throw std::runtime_error( "The tensor memory object is missing the required ID tensor 'seq_ids' this tensor is required to make " "slices of MultiTensorMessages"); } - auto id_tensor = this->get_tensor("seq_ids"); + auto id_tensor = this->get_id_tensor(); // Determine the new start and stop before passing onto the base start = read_idx_from_tensor(id_tensor, {start, 0}) - this->mess_offset; @@ -224,10 +245,11 @@ std::shared_ptr MultiTensorMessageInterfaceProxy::init(std:: TensorIndex mess_count, std::shared_ptr memory, TensorIndex offset, - TensorIndex count) + TensorIndex count, + std::string id_tensor_name) { return std::make_shared( - std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); + std::move(meta), mess_offset, mess_count, std::move(memory), offset, count, std::move(id_tensor_name)); } std::shared_ptr MultiTensorMessageInterfaceProxy::memory(MultiTensorMessage& self) @@ -245,6 +267,16 @@ TensorIndex MultiTensorMessageInterfaceProxy::count(MultiTensorMessage& self) return self.count; } +std::string MultiTensorMessageInterfaceProxy::id_tensor_name_getter(MultiTensorMessage& self) +{ + return self.id_tensor_name; +} + +void MultiTensorMessageInterfaceProxy::id_tensor_name_setter(MultiTensorMessage& self, std::string id_tensor_name) +{ + self.id_tensor_name = id_tensor_name; +} + pybind11::object MultiTensorMessageInterfaceProxy::get_tensor(MultiTensorMessage& self, const std::string& name) { try @@ -257,6 +289,11 @@ pybind11::object MultiTensorMessageInterfaceProxy::get_tensor(MultiTensorMessage } } +pybind11::object MultiTensorMessageInterfaceProxy::get_id_tensor(MultiTensorMessage& self) +{ + return CupyUtil::tensor_to_cupy(self.get_id_tensor()); +} + pybind11::object MultiTensorMessageInterfaceProxy::get_tensor_property(MultiTensorMessage& self, const std::string name) { try diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index b25accbad4..bfa7c1deda 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -244,12 +244,17 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids") .def_property_readonly("memory", &MultiTensorMessageInterfaceProxy::memory) .def_property_readonly("offset", &MultiTensorMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiTensorMessageInterfaceProxy::count) - .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor); + .def_property("id_tensor_name", + &MultiTensorMessageInterfaceProxy::id_tensor_name_getter, + &MultiTensorMessageInterfaceProxy::id_tensor_name_setter) + .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor) + .def("get_id_tensor", &MultiTensorMessageInterfaceProxy::get_id_tensor); py::class_>( m, "MultiInferenceMessage") @@ -259,8 +264,9 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids") .def("get_input", &MultiInferenceMessageInterfaceProxy::get_tensor); py::class_>( @@ -271,8 +277,9 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids") .def_property_readonly("input_ids", &MultiInferenceNLPMessageInterfaceProxy::input_ids) .def_property_readonly("input_mask", &MultiInferenceNLPMessageInterfaceProxy::input_mask) .def_property_readonly("seq_ids", &MultiInferenceNLPMessageInterfaceProxy::seq_ids); @@ -285,8 +292,9 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids") .def_property_readonly("input__0", &MultiInferenceFILMessageInterfaceProxy::input__0) .def_property_readonly("seq_ids", &MultiInferenceFILMessageInterfaceProxy::seq_ids); @@ -298,9 +306,15 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) - .def("get_output", &MultiResponseMessageInterfaceProxy::get_tensor); + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids", + py::arg("probs_tensor_name") = "probs") + .def_property("probs_tensor_name", + &MultiResponseMessageInterfaceProxy::probs_tensor_name_getter, + &MultiResponseMessageInterfaceProxy::probs_tensor_name_setter) + .def("get_output", &MultiResponseMessageInterfaceProxy::get_tensor) + .def("get_probs_tensor", &MultiResponseMessageInterfaceProxy::get_probs_tensor); py::class_>( m, "MultiResponseProbsMessage") @@ -310,8 +324,10 @@ PYBIND11_MODULE(messages, m) py::arg("mess_offset") = 0, py::arg("mess_count") = -1, py::arg("memory"), - py::arg("offset") = 0, - py::arg("count") = -1) + py::arg("offset") = 0, + py::arg("count") = -1, + py::arg("id_tensor_name") = "seq_ids", + py::arg("probs_tensor_name") = "probs") .def_property_readonly("probs", &MultiResponseProbsMessageInterfaceProxy::probs); #ifdef VERSION_INFO diff --git a/morpheus/_lib/src/python_modules/stages.cpp b/morpheus/_lib/src/python_modules/stages.cpp index 74139320b6..b8ff87e686 100644 --- a/morpheus/_lib/src/python_modules/stages.cpp +++ b/morpheus/_lib/src/python_modules/stages.cpp @@ -65,20 +65,14 @@ PYBIND11_MODULE(stages, m) .def(py::init<>(&AddClassificationStageInterfaceProxy::init), py::arg("builder"), py::arg("name"), - py::arg("threshold"), - py::arg("num_class_labels"), py::arg("idx2label"), - py::arg("output_name") = "probs"); + py::arg("threshold")); py::class_, mrc::segment::ObjectProperties, std::shared_ptr>>(m, "AddScoresStage", py::multiple_inheritance()) - .def(py::init<>(&AddScoresStageInterfaceProxy::init), - py::arg("builder"), - py::arg("name"), - py::arg("num_class_labels"), - py::arg("idx2label"), - py::arg("output_name") = "probs"); + .def( + py::init<>(&AddScoresStageInterfaceProxy::init), py::arg("builder"), py::arg("name"), py::arg("idx2label")); py::class_, mrc::segment::ObjectProperties, diff --git a/morpheus/_lib/src/stages/add_classification.cpp b/morpheus/_lib/src/stages/add_classification.cpp index 1dd0c1914b..ff7bc24fb2 100644 --- a/morpheus/_lib/src/stages/add_classification.cpp +++ b/morpheus/_lib/src/stages/add_classification.cpp @@ -23,6 +23,7 @@ #include "morpheus/objects/tensor_object.hpp" // for TensorObject #include "morpheus/types.hpp" // for TensorIndex #include "morpheus/utilities/matx_util.hpp" +#include "morpheus/utilities/string_util.hpp" #include "morpheus/utilities/tensor_util.hpp" // for TensorUtils::get_element_stride #include // for cudaMemcpy, cudaMemcpyDeviceToDevice @@ -35,6 +36,7 @@ #include #include // for divides, bind, placeholders #include +#include #include // needed for logging #include // for move // IWYU thinks we need __alloc_traits<>::value_type for vector assignments @@ -43,33 +45,29 @@ namespace morpheus { // Component public implementations // ************ AddClassificationStage **************************** // -AddClassificationsStage::AddClassificationsStage(float threshold, - std::size_t num_class_labels, - std::map idx2label, - std::string output_name) : +AddClassificationsStage::AddClassificationsStage(std::map idx2label, float threshold) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), - m_threshold(threshold), - m_num_class_labels(num_class_labels), m_idx2label(std::move(idx2label)), - m_output_name(std::move(output_name)) -{ - CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; -} + m_threshold(threshold), + m_min_col_count(m_idx2label.rbegin()->first) // Ordered map's largest key will be the last entry +{} AddClassificationsStage::subscribe_fn_t AddClassificationsStage::build_operator() { return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_output(m_output_name); + const auto& probs = x->get_probs_tensor(); const auto& shape = probs.get_shape(); // Depending on the input the stride is given in bytes or elements, convert to elements auto stride = TensorUtils::get_element_stride(probs.get_stride()); - CHECK(shape.size() == 2 && shape[1] == m_num_class_labels) - << "Label count does not match output of model. Label count: " << m_num_class_labels - << ", Model output: " << shape[1]; + CHECK(shape.size() == 2 && shape[1] > m_min_col_count) + << "Model output did not contain enough columns to fufill the requested labels. Label " + "indexes: " + << StringUtil::map_to_str(m_idx2label.begin(), m_idx2label.end()) + << ", Model output columns: " << shape[1]; const auto num_rows = shape[0]; const auto num_columns = shape[1]; @@ -112,13 +110,10 @@ AddClassificationsStage::subscribe_fn_t AddClassificationsStage::build_operator( std::shared_ptr> AddClassificationStageInterfaceProxy::init( mrc::segment::Builder& builder, const std::string& name, - float threshold, - std::size_t num_class_labels, std::map idx2label, - std::string output_name) + float threshold) { - auto stage = builder.construct_object( - name, threshold, num_class_labels, std::move(idx2label), std::move(output_name)); + auto stage = builder.construct_object(name, idx2label, threshold); return stage; } diff --git a/morpheus/_lib/src/stages/add_scores.cpp b/morpheus/_lib/src/stages/add_scores.cpp index 96f018406c..f6086a162b 100644 --- a/morpheus/_lib/src/stages/add_scores.cpp +++ b/morpheus/_lib/src/stages/add_scores.cpp @@ -36,28 +36,25 @@ namespace morpheus { // Component public implementations // ************ AddScoresStage **************************** // -AddScoresStage::AddScoresStage(std::size_t num_class_labels, - std::map idx2label, - std::string output_name) : +AddScoresStage::AddScoresStage(std::map idx2label) : PythonNode(base_t::op_factory_from_sub_fn(build_operator())), - m_num_class_labels(num_class_labels), m_idx2label(std::move(idx2label)), - m_output_name(std::move(output_name)) -{ - CHECK(m_idx2label.size() <= m_num_class_labels) << "idx2label should represent a subset of the class_labels"; -} + m_min_col_count(m_idx2label.rbegin()->first) +{} AddScoresStage::subscribe_fn_t AddScoresStage::build_operator() { return [this](rxcpp::observable input, rxcpp::subscriber output) { return input.subscribe(rxcpp::make_observer( [this, &output](sink_type_t x) { - const auto& probs = x->get_output(m_output_name); + const auto& probs = x->get_probs_tensor(); const auto& shape = probs.get_shape(); - CHECK(shape.size() == 2 && shape[1] == m_num_class_labels) - << "Label count does not match output of model. Label count: " << m_num_class_labels - << ", Model output: " << shape[1]; + CHECK(shape.size() == 2 && shape[1] > m_min_col_count) + << "Model output did not contain enough columns to fufill the requested labels. Label " + "indexes: " + << StringUtil::map_to_str(m_idx2label.begin(), m_idx2label.end()) + << ", Model output columns: " << shape[1]; const auto num_rows = shape[0]; const auto num_columns = shape[1]; @@ -86,13 +83,8 @@ AddScoresStage::subscribe_fn_t AddScoresStage::build_operator() // ************ AddScoresStageInterfaceProxy ************* // std::shared_ptr> AddScoresStageInterfaceProxy::init( - mrc::segment::Builder& builder, - const std::string& name, - std::size_t num_class_labels, - std::map idx2label, - std::string output_name) + mrc::segment::Builder& builder, const std::string& name, std::map idx2label) { - return builder.construct_object( - name, num_class_labels, std::move(idx2label), std::move(output_name)); + return builder.construct_object(name, std::move(idx2label)); } } // namespace morpheus diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 0f910b531e..be8287202a 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -19,7 +19,6 @@ #include "morpheus/messages/memory/response_memory_probs.hpp" // for ResponseMemoryProbs #include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory -#include "morpheus/messages/multi_response_probs.hpp" // for MultiResponseProbsMessage #include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo #include "morpheus/objects/dtype.hpp" // for DType #include "morpheus/objects/tensor.hpp" // for Tensor::create diff --git a/morpheus/_lib/src/utilities/string_util.cpp b/morpheus/_lib/src/utilities/string_util.cpp index 19108baad9..4dd7e597ed 100644 --- a/morpheus/_lib/src/utilities/string_util.cpp +++ b/morpheus/_lib/src/utilities/string_util.cpp @@ -17,9 +17,12 @@ #include "morpheus/utilities/string_util.hpp" +#include + namespace morpheus { bool StringUtil::str_contains(const std::string& str, const std::string& search_str) { return str.find(search_str) != std::string::npos; } + } // namespace morpheus diff --git a/morpheus/messages/__init__.py b/morpheus/messages/__init__.py index cfdf70cff8..d2b9b5b949 100644 --- a/morpheus/messages/__init__.py +++ b/morpheus/messages/__init__.py @@ -18,6 +18,7 @@ # Import order is very important here. Import base classes before child ones # isort: off +from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.messages.memory.inference_memory import InferenceMemory from morpheus.messages.memory.inference_memory import InferenceMemoryAE from morpheus.messages.memory.inference_memory import InferenceMemoryFIL @@ -57,5 +58,6 @@ "ResponseMemory", "ResponseMemoryAE", "ResponseMemoryProbs", + "TensorMemory", "UserMessageMeta", ] diff --git a/morpheus/messages/memory/inference_memory.py b/morpheus/messages/memory/inference_memory.py index 58a8a56999..74147df864 100644 --- a/morpheus/messages/memory/inference_memory.py +++ b/morpheus/messages/memory/inference_memory.py @@ -88,8 +88,8 @@ class InferenceMemoryNLP(InferenceMemory, cpp_class=_messages.InferenceMemoryNLP seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory._get_tensor_prop, InferenceMemory.set_input) - def __init__(self, count: int, input_ids: cp.ndarray, input_mask: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) + def __init__(self, *, count: int, input_ids: cp.ndarray, input_mask: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count=count, tensors={'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) @dataclasses.dataclass(init=False) @@ -112,8 +112,8 @@ class InferenceMemoryFIL(InferenceMemory, cpp_class=_messages.InferenceMemoryFIL seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory._get_tensor_prop, InferenceMemory.set_input) - def __init__(self, count: int, input__0: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) + def __init__(self, *, count: int, input__0: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count=count, tensors={'input__0': input__0, 'seq_ids': seq_ids}) @dataclasses.dataclass(init=False) @@ -134,5 +134,5 @@ class InferenceMemoryAE(InferenceMemory, cpp_class=None): seq_ids: dataclasses.InitVar[cp.ndarray] = DataClassProp(InferenceMemory._get_tensor_prop, InferenceMemory.set_input) - def __init__(self, count: int, input: cp.ndarray, seq_ids: cp.ndarray): - super().__init__(count, tensors={'input': input, 'seq_ids': seq_ids}) + def __init__(self, *, count: int, input: cp.ndarray, seq_ids: cp.ndarray): + super().__init__(count=count, tensors={'input': input, 'seq_ids': seq_ids}) diff --git a/morpheus/messages/memory/response_memory.py b/morpheus/messages/memory/response_memory.py index 1df8dcce81..ef1cb3f488 100644 --- a/morpheus/messages/memory/response_memory.py +++ b/morpheus/messages/memory/response_memory.py @@ -30,6 +30,10 @@ class ResponseMemory(TensorMemory, cpp_class=_messages.ResponseMemory): """Output memory block holding the results of inference.""" + def __new__(cls, *args, **kwargs): + morpheus_logger.deprecated_message_warning(logger, cls, TensorMemory) + return super().__new__(cls, *args, **kwargs) + def get_output(self, name: str): """ Get the Tensor stored in the container identified by `name`. Alias for `ResponseMemory.get_tensor`. @@ -83,12 +87,8 @@ class ResponseMemoryProbs(ResponseMemory, cpp_class=_messages.ResponseMemoryProb """ probs: dataclasses.InitVar[cp.ndarray] = DataClassProp(ResponseMemory._get_tensor_prop, ResponseMemory.set_output) - def __new__(cls, *args, **kwargs): - morpheus_logger.deprecated_message_warning(logger, cls, ResponseMemory) - return super(ResponseMemory, cls).__new__(cls, *args, **kwargs) - - def __init__(self, count: int, probs: cp.ndarray): - super().__init__(count, tensors={'probs': probs}) + def __init__(self, *, count: int, probs: cp.ndarray): + super().__init__(count=count, tensors={'probs': probs}) @dataclasses.dataclass(init=False) @@ -112,5 +112,5 @@ class ResponseMemoryAE(ResponseMemory, cpp_class=None): user_id = "" explain_df = None - def __init__(self, count: int, probs: cp.ndarray): - super().__init__(count, tensors={'probs': probs}) + def __init__(self, *, count: int, probs: cp.ndarray): + super().__init__(count=count, tensors={'probs': probs}) diff --git a/morpheus/messages/memory/tensor_memory.py b/morpheus/messages/memory/tensor_memory.py index f00e74e799..cc28cdea17 100644 --- a/morpheus/messages/memory/tensor_memory.py +++ b/morpheus/messages/memory/tensor_memory.py @@ -37,8 +37,10 @@ class TensorMemory(MessageData, cpp_class=_messages.TensorMemory): """ count: int + tensors: typing.Dict[str, cp.ndarray] + + def __init__(self, *, count: int = None, tensors: typing.Dict[str, cp.ndarray] = None): - def __init__(self, count: int, tensors: typing.Dict[str, cp.ndarray] = None): self.count = count if tensors is None: @@ -66,6 +68,10 @@ def __getattr__(self, name: str) -> typing.Any: return super().__getattr__(name) raise AttributeError + @property + def tensor_names(self) -> typing.List[str]: + return list(self._tensors.keys()) + def has_tensor(self, name: str) -> bool: """ Returns True if a tensor with the requested name exists in the tensors object @@ -158,6 +164,8 @@ def set_tensor(self, name: str, tensor: cp.ndarray): Parameters ---------- + name : str + Tensor key name. tensor : cupy.ndarray Tensor as a CuPy array. diff --git a/morpheus/messages/multi_message.py b/morpheus/messages/multi_message.py index 98a53dd6bc..6ac229b24e 100644 --- a/morpheus/messages/multi_message.py +++ b/morpheus/messages/multi_message.py @@ -19,6 +19,7 @@ import cupy as cp import numpy as np +import pandas as pd import cudf @@ -229,6 +230,11 @@ def set_meta(self, columns: typing.Union[None, str, typing.List[str]], value): # Get exclusive access to the dataframe with self.meta.mutable_dataframe() as df: + + # Check if the value is a cupy array and we have a pandas dataframe, convert to numpy + if (isinstance(value, cp.ndarray) and isinstance(df, pd.DataFrame)): + value = value.get() + # Check to see if we are adding a column. If so, we need to use df.loc instead of df.iloc if (-1 not in column_indexer): diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index edf521d763..8c084d3dd2 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -32,6 +32,9 @@ class MultiResponseMessage(MultiTensorMessage, cpp_class=_messages.MultiResponse This class contains several inference responses as well as the cooresponding message metadata. """ + probs_tensor_name: typing.ClassVar[str] = "probs" + """Name of the tensor that holds output probabilities""" + def __init__(self, *, meta: MessageMeta, @@ -39,7 +42,16 @@ def __init__(self, mess_count: int = -1, memory: TensorMemory = None, offset: int = 0, - count: int = -1): + count: int = -1, + probs_tensor_name: str = None): + + if (probs_tensor_name is not None): + self.probs_tensor_name = probs_tensor_name + + # Add the tensor name to the required list + if (self.probs_tensor_name not in self.required_tensors): + # Make sure to set a new variable here instead of append otherwise you change all classes + self.required_tensors = self.required_tensors + [self.probs_tensor_name] super().__init__(meta=meta, mess_offset=mess_offset, @@ -51,7 +63,7 @@ def __init__(self, @property def outputs(self): """ - Get outputs stored in the ResponseMemory container. Alias for `MultiResponseMessage.tensors`. + Get outputs stored in the TensorMemory container. Alias for `MultiResponseMessage.tensors`. Returns ------- @@ -63,7 +75,7 @@ def outputs(self): def get_output(self, name: str): """ - Get output stored in the ResponseMemory container. Alias for `MultiResponseMessage.get_tensor`. + Get output stored in the TensorMemory container. Alias for `MultiResponseMessage.get_tensor`. Parameters ---------- @@ -78,27 +90,31 @@ def get_output(self, name: str): """ return self.get_tensor(name) - def copy_output_ranges(self, ranges, mask=None): - """ - Perform a copy of the underlying output tensors for the given `ranges` of rows. - Alias for `MultiResponseMessage.copy_output_ranges` + def get_probs_tensor(self): - Parameters - ---------- - ranges : typing.List[typing.Tuple[int, int]] - Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` - The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` + return self.get_tensor(self.probs_tensor_name) - mask : typing.Union[None, cupy.ndarray, numpy.ndarray] - Optionally specify rows as a cupy array (when using cudf Dataframes) or a numpy array (when using pandas - Dataframes) of booleans. When not-None `ranges` will be ignored. This is useful as an optimization as this - avoids needing to generate the mask on it's own. + # def copy_output_ranges(self, ranges, mask=None): + # """ + # Perform a copy of the underlying output tensors for the given `ranges` of rows. + # Alias for `MultiResponseMessage.copy_output_ranges` - Returns - ------- - typing.Dict[str, cupy.ndarray] - """ - return self.copy_tensor_ranges(ranges, mask=mask) + # Parameters + # ---------- + # ranges : typing.List[typing.Tuple[int, int]] + # Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` + # The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` + + # mask : typing.Union[None, cupy.ndarray, numpy.ndarray] + # Optionally specify rows as a cupy array (when using cudf Dataframes) or a numpy array (when using pandas + # Dataframes) of booleans. When not-None `ranges` will be ignored. This is useful as an optimization as this + # avoids needing to generate the mask on it's own. + + # Returns + # ------- + # typing.Dict[str, cupy.ndarray] + # """ + # return self.copy_tensor_ranges(ranges, mask=mask) @dataclasses.dataclass diff --git a/morpheus/messages/multi_tensor_message.py b/morpheus/messages/multi_tensor_message.py index 4fd68f63de..722b43b73d 100644 --- a/morpheus/messages/multi_tensor_message.py +++ b/morpheus/messages/multi_tensor_message.py @@ -56,7 +56,8 @@ def __init__(self, mess_count: int = -1, memory: TensorMemory, offset: int = 0, - count: int = -1): + count: int = -1, + id_tensor_name: str = "seq_ids"): if memory is None: raise ValueError("Must define `memory` when creating {}".format(self.__class__.__name__)) @@ -74,6 +75,7 @@ def __init__(self, self.memory = memory self.offset = offset self.count = count + self.id_tensor_name = id_tensor_name # Call the base class last because the properties need to be initialized first super().__init__(meta=meta, mess_offset=mess_offset, mess_count=mess_count) @@ -185,6 +187,14 @@ def get_tensor(self, name: str): """ return self.memory.get_tensor(name)[self.offset:self.offset + self.count, :] + def get_id_tensor(self): + + try: + return self.get_tensor(self.id_tensor_name) + except KeyError as exc: + raise KeyError(f"Cannopt get ID tensor. Tensor with name '{self.id_tensor_name}' " + "does not exist in the memory object") from exc + def _get_tensor_prop(self, name: str): """ This method is intended to be used by propery methods in subclasses diff --git a/morpheus/stages/inference/auto_encoder_inference_stage.py b/morpheus/stages/inference/auto_encoder_inference_stage.py index 432419aa95..cc791f6ce9 100644 --- a/morpheus/stages/inference/auto_encoder_inference_stage.py +++ b/morpheus/stages/inference/auto_encoder_inference_stage.py @@ -22,11 +22,11 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiResponseAEMessage -from morpheus.messages import ResponseMemory from morpheus.messages import ResponseMemoryAE +from morpheus.messages import TensorMemory from morpheus.messages.multi_inference_ae_message import MultiInferenceAEMessage from morpheus.messages.multi_inference_message import MultiInferenceMessage -from morpheus.messages.multi_response_message import MultiResponseProbsMessage +from morpheus.messages.multi_response_message import MultiResponseMessage from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -83,7 +83,7 @@ def calc_output_dims(self, x: MultiInferenceAEMessage) -> typing.Tuple: # reconstruction loss and zscore return (x.count, 2) - def process(self, batch: MultiInferenceAEMessage, cb: typing.Callable[[ResponseMemory], None]): + def process(self, batch: MultiInferenceAEMessage, cb: typing.Callable[[TensorMemory], None]): """ This function processes inference batch by using batch's model to calculate anomaly scores and adding results to response. @@ -92,7 +92,7 @@ def process(self, batch: MultiInferenceAEMessage, cb: typing.Callable[[ResponseM ---------- batch : `morpheus.pipeline.messages.MultiInferenceMessage` Batch of inference messages. - cb : typing.Callable[[`morpheus.pipeline.messages.ResponseMemory`], None] + cb : typing.Callable[[`morpheus.pipeline.messages.TensorMemory`], None] Inference callback. """ @@ -149,7 +149,7 @@ def _get_inference_worker(self, inf_queue: ProducerConsumerQueue) -> InferenceWo return _AutoEncoderInferenceWorker(inf_queue, self._config) @staticmethod - def _convert_one_response(output: MultiResponseProbsMessage, inf: MultiInferenceMessage, res: ResponseMemoryAE): + def _convert_one_response(output: MultiResponseMessage, inf: MultiInferenceMessage, res: ResponseMemoryAE): # Set the explainability and then call the base res.explain_df.index = range(inf.mess_offset, inf.mess_offset + inf.mess_count) diff --git a/morpheus/stages/inference/identity_inference_stage.py b/morpheus/stages/inference/identity_inference_stage.py index 22f0fb2a4a..25eccc66b5 100644 --- a/morpheus/stages/inference/identity_inference_stage.py +++ b/morpheus/stages/inference/identity_inference_stage.py @@ -20,7 +20,7 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage -from morpheus.messages import ResponseMemory +from morpheus.messages import TensorMemory from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -47,15 +47,14 @@ def __init__(self, inf_queue: ProducerConsumerQueue, c: Config): def calc_output_dims(self, x: MultiInferenceMessage) -> typing.Tuple: return (x.count, self._seq_length) - def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMemory], None]): + def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemory], None]): def tmp(b: MultiInferenceMessage, f): - f( - ResponseMemory( - count=b.count, - tensors={'probs': cp.zeros((b.count, self._seq_length), dtype=cp.float32)}, - )) + f(TensorMemory( + count=b.count, + tensors={'probs': cp.zeros((b.count, self._seq_length), dtype=cp.float32)}, + )) # Call directly instead of enqueing tmp(batch, cb) diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index 5a28c6f4be..3b6e7f4f08 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -24,7 +24,7 @@ from morpheus.config import Config from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiResponseMessage -from morpheus.messages import ResponseMemory +from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.pipeline.multi_message_stage import MultiMessageStage from morpheus.pipeline.stream_pair import StreamPair from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -81,7 +81,7 @@ def build_output_message(self, x: MultiInferenceMessage) -> MultiResponseMessage dims = self.calc_output_dims(x) output_dims = (x.mess_count, *dims[1:]) - memory = ResponseMemory(count=output_dims[0], tensors={'probs': cp.zeros(output_dims)}) + memory = TensorMemory(count=output_dims[0], tensors={'probs': cp.zeros(output_dims)}) output_message = MultiResponseMessage.from_message(x, memory=memory) @@ -105,7 +105,7 @@ def calc_output_dims(self, x: MultiInferenceMessage) -> typing.Tuple: pass @abstractmethod - def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMemory], None]): + def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemory], None]): """ Main inference processing function. This function will be called once for each mini-batch. Once the inference is complete, the `cb` parameter should be used to set the response value. The callback can be called @@ -115,7 +115,7 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMem ---------- batch : `morpheus.pipeline.messages.MultiInferenceMessage` Mini-batch of inference messages. - cb : typing.Callable[[`morpheus.pipeline.messages.ResponseMemory`], None] + cb : typing.Callable[[`morpheus.pipeline.messages.TensorMemory`], None] Callback to set the values for the inference response. """ @@ -236,7 +236,7 @@ def on_next(x: MultiInferenceMessage): completion_future = mrc.Future() - def set_output_fut(resp: ResponseMemory, b, batch_future: mrc.Future): + def set_output_fut(resp: TensorMemory, b, batch_future: mrc.Future): nonlocal outstanding_requests m = self._convert_one_response(output_message, b, resp) @@ -337,7 +337,7 @@ def _split_batches(x: MultiInferenceMessage, max_batch_size: int) -> typing.List return out_resp @staticmethod - def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[ResponseMemory]]): + def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing.List[TensorMemory]]): # Convert a MultiInferenceMessage into a MultiResponseMessage in_message = x[0] @@ -349,7 +349,7 @@ def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing total_mess_count = reduce(lambda y, z: y + z.mess_count, in_message, 0) # Create a message data to store the entire list - probs = cp.zeros((total_mess_count, out_message[0].get_output('probs').shape[1])) + probs = cp.zeros((total_mess_count, out_message[0].get_tensor('probs').shape[1])) saved_offset = in_message[0].mess_offset saved_count = 0 @@ -379,20 +379,20 @@ def _convert_response(x: typing.Tuple[typing.List[MultiInferenceMessage], typing assert saved_count == total_mess_count, "Did not set every element in output" - memory = ResponseMemory(count=total_mess_count, tensors={'probs': probs}) + memory = TensorMemory(count=total_mess_count, tensors={'probs': probs}) return MultiResponseMessage.from_message(in_message[0], mess_count=saved_count, memory=memory) @staticmethod - def _convert_one_response(output: MultiResponseMessage, inf: MultiInferenceMessage, res: ResponseMemory): + def _convert_one_response(output: MultiResponseMessage, inf: MultiInferenceMessage, res: TensorMemory): # Make sure we have a continuous list # assert inf.mess_offset == saved_offset + saved_count memory = output.memory - probs = memory.get_output("probs") - resp_probs = res.get_output('probs') + probs = memory.get_tensor(output.probs_tensor_name) + resp_probs = res.get_tensor(output.probs_tensor_name) - seq_ids = inf.get_tensor("seq_ids") + seq_ids = inf.get_id_tensor() seq_offset = seq_ids[0, 0].item() - output.mess_offset seq_count = (seq_ids[-1, 0].item() + 1 - seq_offset) - output.mess_offset diff --git a/morpheus/stages/inference/pytorch_inference_stage.py b/morpheus/stages/inference/pytorch_inference_stage.py index a7b9398ee5..9e7b3d7346 100644 --- a/morpheus/stages/inference/pytorch_inference_stage.py +++ b/morpheus/stages/inference/pytorch_inference_stage.py @@ -21,7 +21,7 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage -from morpheus.messages import ResponseMemory +from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -85,7 +85,7 @@ def calc_output_dims(self, x: MultiInferenceMessage) -> typing.Tuple: return (x.count, self._outputs[list(self._outputs.keys())[0]].shape[1]) - def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMemory], None]): + def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemory], None]): # convert from cupy to torch tensor using dlpack input_ids = from_dlpack(batch.get_input("input_ids").astype(cp.float).toDlpack()).type(torch.long) @@ -101,7 +101,7 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMem if (len(probs_cp.shape) == 1): probs_cp = cp.expand_dims(probs_cp, axis=1) - response_mem = ResponseMemory(count=batch.count, tensors={'probs': probs_cp}) + response_mem = TensorMemory(count=batch.count, tensors={'probs': probs_cp}) # Return the response cb(response_mem) diff --git a/morpheus/stages/inference/triton_inference_stage.py b/morpheus/stages/inference/triton_inference_stage.py index 4c144dfd59..839ce7b0b8 100644 --- a/morpheus/stages/inference/triton_inference_stage.py +++ b/morpheus/stages/inference/triton_inference_stage.py @@ -34,7 +34,7 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage -from morpheus.messages import ResponseMemory +from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -581,11 +581,11 @@ def calc_output_dims(self, x: MultiInferenceMessage) -> typing.Tuple: return (x.count, self._outputs[list(self._outputs.keys())[0]].shape[1]) @abstractmethod - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> TensorMemory: pass def _infer_callback(self, - cb: typing.Callable[[ResponseMemory], None], + cb: typing.Callable[[TensorMemory], None], m: InputWrapper, b: MultiInferenceMessage, result: tritonclient.InferResult, @@ -603,7 +603,7 @@ def _infer_callback(self, self._mem_pool.return_obj(m) - def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMemory], None]): + def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[TensorMemory], None]): """ This function sends batch of events as a requests to Triton inference server using triton client API. @@ -611,7 +611,7 @@ def process(self, batch: MultiInferenceMessage, cb: typing.Callable[[ResponseMem ---------- batch : `morpheus.pipeline.messages.MultiInferenceMessage` Mini-batch of inference messages. - cb : typing.Callable[[`morpheus.pipeline.messages.ResponseMemory`], None] + cb : typing.Callable[[`morpheus.pipeline.messages.TensorMemory`], None] Callback to set the values for the inference response. """ @@ -701,14 +701,14 @@ def default_inout_mapping(cls) -> typing.Dict[str, str]: "output": "probs", } - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> TensorMemory: output = {output.mapped_name: result.as_numpy(output.name) for output in self._outputs.values()} if (self._needs_logits): output = {key: 1.0 / (1.0 + np.exp(-val)) for key, val in output.items()} - mem = ResponseMemory( + mem = TensorMemory( count=output["probs"].shape[0], tensors={'probs': cp.array(output["probs"])} # For now, only support one output ) @@ -776,7 +776,7 @@ def default_inout_mapping(cls) -> typing.Dict[str, str]: "output__0": "probs", } - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> TensorMemory: output = {output.mapped_name: result.as_numpy(output.name) for output in self._outputs.values()} @@ -784,7 +784,7 @@ def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.Inf if (len(val.shape) == 1): output[key] = np.expand_dims(val, 1) - mem = ResponseMemory( + mem = TensorMemory( count=output["probs"].shape[0], tensors={'probs': cp.array(output["probs"])} # For now, only support one output ) @@ -853,7 +853,7 @@ def supports_cpp_node(cls): # Enable support by default return False - def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> ResponseMemory: + def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.InferResult) -> TensorMemory: import torch @@ -875,7 +875,7 @@ def _build_response(self, batch: MultiInferenceMessage, result: tritonclient.Inf ae_scores = cp.asarray(net_loss) ae_scores = ae_scores.reshape((batch.count, 1)) - mem = ResponseMemory( + mem = TensorMemory( count=batch.count, tensors={'probs': ae_scores} # For now, only support one output ) diff --git a/morpheus/stages/postprocess/add_classifications_stage.py b/morpheus/stages/postprocess/add_classifications_stage.py index b1651090c7..26c0ba0b0d 100644 --- a/morpheus/stages/postprocess/add_classifications_stage.py +++ b/morpheus/stages/postprocess/add_classifications_stage.py @@ -20,15 +20,13 @@ from morpheus._lib.common import TypeId from morpheus.cli.register_stage import register_stage from morpheus.config import Config -from morpheus.messages import MultiResponseMessage -from morpheus.pipeline.single_port_stage import SinglePortStage -from morpheus.pipeline.stream_pair import StreamPair +from morpheus.stages.postprocess.add_scores_stage_base import AddScoresStageBase logger = logging.getLogger(__name__) @register_stage("add-class", rename_options={"labels": "--label"}) -class AddClassificationsStage(SinglePortStage): +class AddClassificationsStage(AddScoresStageBase): """ Add detected classifications to each message. @@ -39,99 +37,36 @@ class AddClassificationsStage(SinglePortStage): ---------- c : `morpheus.config.Config` Pipeline configuration instance. - threshold : float, default = 0.5 - Threshold to consider True/False for each class. - labels : list, default = None, multiple = True, show_default = "[Config.class_labels]" + labels : typing.List[str], default = None, multiple = True, show_default = "[Config.class_labels]" Converts probability indexes into classification labels. Each item in the list will determine its index from the Config.class_labels property and must be one of the available class labels. Leave as None to add all labels in the Config.class_labels property. prefix : str, default = "" Prefix to add to each label. Allows adding labels different from the `Config.class_labels` property. - output_name : str, default = "probs" - Name of the output tensor containing the probabilities + probs_type : TypeId, default = "bool8" + Datatype of the scores columns. + threshold : typing.Optional[float], default = 0.5 + Converts all scores to a boolean value using this threshold. If `None`, scores are used, as-is. + """ def __init__(self, c: Config, - threshold: float = 0.5, + *, labels: typing.List[str] = None, prefix: str = "", - output_name: str = "probs"): - super().__init__(c) - - self._feature_length = c.feature_length - self._threshold = threshold - self._prefix = prefix - self._output_name = output_name - self._class_labels = c.class_labels - self._labels = labels if labels is not None and len(labels) > 0 else c.class_labels - - # Build the Index to Label map. - self._idx2label = {} - - for label in self._labels: - # All labels must be in class_labels in order to get their position - if (label not in self._class_labels): - logger.warning("The label '%s' is not in Config.class_labels and will be ignored", label) - continue - - prefixed_label = self._prefix + label - self._idx2label[self._class_labels.index(label)] = prefixed_label - self._needed_columns[prefixed_label] = TypeId.BOOL8 - - assert len(self._idx2label) > 0, "No labels were added to the stage" + probs_type: TypeId = TypeId.BOOL8, + threshold: float = 0.5): + super().__init__(c, labels=labels, prefix=prefix, probs_type=probs_type, threshold=threshold) @property def name(self) -> str: return "add-class" - def accepted_types(self) -> typing.Tuple: - """ - Accepted input types for this stage are returned. - - Returns - ------- - typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ] - Accepted input types. - - """ - return (MultiResponseMessage, ) - def supports_cpp_node(self): # Enable support by default return True - def _add_labels(self, x: MultiResponseMessage): - - probs = x.get_output(self._output_name) - - if (probs.shape[1] != len(self._class_labels)): - raise RuntimeError("Label count does not match output of model. Label count: {}, Model output: {}".format( - len(self._class_labels), probs.shape[1])) - - probs_np = (probs > self._threshold).astype(bool).get() - - for i, label in self._idx2label.items(): - x.set_meta(label, probs_np[:, i].tolist()) - - # Return passthrough - return x - - def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: - - # Convert the messages to rows of strings - if self._build_cpp_node(): - import morpheus._lib.stages as _stages - stream = _stages.AddClassificationsStage(builder, - self.unique_name, - self._threshold, - len(self._class_labels), - self._idx2label, - output_name=self._output_name) - else: - stream = builder.make_node(self.unique_name, self._add_labels) - - builder.make_edge(input_stream[0], stream) - - # Return input unchanged - return stream, MultiResponseMessage + def _get_cpp_node(self, builder: mrc.Builder): + import morpheus._lib.stages as _stages + return _stages.AddClassificationsStage(builder, self.unique_name, self._idx2label, self._threshold) diff --git a/morpheus/stages/postprocess/add_scores_stage.py b/morpheus/stages/postprocess/add_scores_stage.py index 0eda6dc7f4..4eae6f174b 100644 --- a/morpheus/stages/postprocess/add_scores_stage.py +++ b/morpheus/stages/postprocess/add_scores_stage.py @@ -17,19 +17,16 @@ import mrc -import morpheus._lib.stages as _stages from morpheus._lib.common import TypeId from morpheus.cli.register_stage import register_stage from morpheus.config import Config -from morpheus.messages import MultiResponseMessage -from morpheus.pipeline.single_port_stage import SinglePortStage -from morpheus.pipeline.stream_pair import StreamPair +from morpheus.stages.postprocess.add_scores_stage_base import AddScoresStageBase logger = logging.getLogger(__name__) @register_stage("add-scores", rename_options={"labels": "--label"}) -class AddScoresStage(SinglePortStage): +class AddScoresStage(AddScoresStageBase): """ Add probability scores to each message. @@ -38,97 +35,35 @@ class AddScoresStage(SinglePortStage): Parameters ---------- - c : morpheus.config.Config + c : `morpheus.config.Config` Pipeline configuration instance. - labels : list, default = None, multiple = True, show_default = "[Config.class_labels]" - Converts probability indexes into classification scores. Each item in the list will determine its index from the + labels : typing.List[str], default = None, multiple = True, show_default = "[Config.class_labels]" + Converts probability indexes into classification labels. Each item in the list will determine its index from the Config.class_labels property and must be one of the available class labels. Leave as None to add all labels in the Config.class_labels property. prefix : str, default = "" Prefix to add to each label. Allows adding labels different from the `Config.class_labels` property. probs_type : `morpheus._lib.common.TypeId`, default = "float32" Datatype of the scores columns. - output_name : str, default = "probs" - Name of the output tensor containing the probabilities """ def __init__(self, c: Config, + *, labels: typing.List[str] = None, prefix: str = "", - probs_type: TypeId = TypeId.FLOAT32, - output_name: str = "probs"): - super().__init__(c) - - self._feature_length = c.feature_length - self._prefix = prefix - self._output_name = output_name - self._class_labels = c.class_labels - self._labels = labels if labels is not None and len(labels) > 0 else c.class_labels - - # Build the Index to Label map. - self._idx2label = {} - - for label in self._labels: - # All labels must be in class_labels in order to get their position - if (label not in self._class_labels): - logger.warning("The label '%s' is not in Config.class_labels and will be ignored", label) - continue - - prefixed_label = self._prefix + label - self._idx2label[self._class_labels.index(label)] = prefixed_label - self._needed_columns[prefixed_label] = probs_type - - assert len(self._idx2label) > 0, "No labels were added to the stage" + probs_type: TypeId = TypeId.FLOAT32): + # Initialize the base with threshold=None + super().__init__(c, labels=labels, prefix=prefix, probs_type=probs_type, threshold=None) @property def name(self) -> str: return "add-scores" - def accepted_types(self) -> typing.Tuple: - """ - Accepted input types for this stage are returned. - - Returns - ------- - typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ] - Accepted input types. - - """ - return (MultiResponseMessage, ) - def supports_cpp_node(self): # Enable support by default return True - def _add_labels(self, x: MultiResponseMessage): - probs = x.get_output(self._output_name) - - if (probs.shape[1] != len(self._class_labels)): - raise RuntimeError("Label count does not match output of model. Label count: {}, Model output: {}".format( - len(self._class_labels), probs.shape[1])) - - probs_np = probs.get() - - for i, label in self._idx2label.items(): - x.set_meta(label, probs_np[:, i].tolist()) - - # Return passthrough - return x - - def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: - - # Convert the messages to rows of strings - if self._build_cpp_node(): - stream = _stages.AddScoresStage(builder, - self.unique_name, - len(self._class_labels), - self._idx2label, - output_name=self._output_name) - else: - stream = builder.make_node(self.unique_name, self._add_labels) - - builder.make_edge(input_stream[0], stream) - - # Return input unchanged - return stream, input_stream[1] + def _get_cpp_node(self, builder: mrc.Builder): + import morpheus._lib.stages as _stages + return _stages.AddScoresStage(builder, self.unique_name, self._idx2label) diff --git a/morpheus/stages/postprocess/add_scores_stage_base.py b/morpheus/stages/postprocess/add_scores_stage_base.py new file mode 100644 index 0000000000..6de1444d6d --- /dev/null +++ b/morpheus/stages/postprocess/add_scores_stage_base.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import logging +import typing +from abc import abstractmethod + +import mrc +import mrc.core.operators as ops + +from morpheus._lib.common import TypeId +from morpheus.cli.register_stage import register_stage +from morpheus.config import Config +from morpheus.messages import MultiResponseMessage +from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stream_pair import StreamPair + +logger = logging.getLogger(__name__) + + +class AddScoresStageBase(SinglePortStage): + """ + Base class for the `AddScoresStage` and `AddClassificationStage` + + Parameters + ---------- + c : `morpheus.config.Config` + Pipeline configuration instance. + labels : typing.List[str], default = None, multiple = True, show_default = "[Config.class_labels]" + Converts probability indexes into classification labels. Each item in the list will determine its index from the + Config.class_labels property and must be one of the available class labels. Leave as None to add all labels in + the Config.class_labels property. + prefix : str, default = "" + Prefix to add to each label. Allows adding labels different from the `Config.class_labels` property. + probs_type : TypeId + Datatype of the scores columns. + threshold : typing.Optional[float] + Converts all scores to a boolean value using this threshold. If `None`, scores are used, as-is. + """ + + def __init__(self, + c: Config, + *, + labels: typing.List[str] = None, + prefix: str = "", + probs_type: TypeId, + threshold: typing.Optional[float]): + super().__init__(c) + + self._feature_length = c.feature_length + self._labels = labels if labels is not None and len(labels) > 0 else c.class_labels + self._prefix = prefix + self._threshold = threshold + + self._class_labels = c.class_labels + + # Build the Index to Label map. + self._idx2label = {} + + for label in self._labels: + # All labels must be in class_labels in order to get their position + if (label not in self._class_labels): + logger.warning("The label '%s' is not in Config.class_labels and will be ignored", label) + continue + + prefixed_label = self._prefix + label + self._idx2label[self._class_labels.index(label)] = prefixed_label + self._needed_columns[prefixed_label] = probs_type + + assert len(self._idx2label) > 0, "No labels were added to the stage" + + def accepted_types(self) -> typing.Tuple: + """ + Accepted input types for this stage are returned. + + Returns + ------- + typing.Tuple[`morpheus.pipeline.messages.MultiResponseMessage`, ] + Accepted input types. + + """ + return (MultiResponseMessage, ) + + @abstractmethod + def _get_cpp_node(self, builder: mrc.Builder): + pass + + def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: + + # Convert the messages to rows of strings + if self._build_cpp_node(): + stream = self._get_cpp_node(builder=builder) + else: + stream = builder.make_node( + self.unique_name, + ops.map(functools.partial(self._add_labels, idx2label=self._idx2label, threshold=self._threshold))) + + builder.make_edge(input_stream[0], stream) + + # Return input type unchanged + return stream, input_stream[1] + + @staticmethod + def _add_labels(x: MultiResponseMessage, idx2label: typing.Dict[int, str], threshold: typing.Optional[float]): + + probs = x.get_probs_tensor() + + if (probs.shape[1] <= max(idx2label.keys())): + raise RuntimeError( + "Model output did not contain enough columns to fufill the requested labels. Label indexes: {}, Model output columns: {}" + .format(idx2label, probs.shape[1])) + + if (threshold is not None): + probs = (probs > threshold).astype(bool) + + # Do these one at a time to prevent failures + for i, label in idx2label.items(): + x.set_meta(label, probs[:, i]) + + # Return the same object + return x diff --git a/morpheus/stages/postprocess/generate_viz_frames_stage.py b/morpheus/stages/postprocess/generate_viz_frames_stage.py index a99163c38e..c4872351a6 100644 --- a/morpheus/stages/postprocess/generate_viz_frames_stage.py +++ b/morpheus/stages/postprocess/generate_viz_frames_stage.py @@ -131,7 +131,7 @@ def indent_data(y: str): df["data"] = df["data"].apply(indent_data) - probs = x.get_output('probs') + probs = x.get_probs_tensor() pass_thresh = (probs >= 0.5).any(axis=1) max_arg = probs.argmax(axis=1) diff --git a/morpheus/stages/postprocess/ml_flow_drift_stage.py b/morpheus/stages/postprocess/ml_flow_drift_stage.py index 92f7454fbd..e385749a4f 100644 --- a/morpheus/stages/postprocess/ml_flow_drift_stage.py +++ b/morpheus/stages/postprocess/ml_flow_drift_stage.py @@ -130,7 +130,7 @@ def supports_cpp_node(self): def _calc_drift(self, x: MultiResponseMessage): # All probs in a batch will be calculated - shifted = cp.abs(x.get_output('probs') - 0.5) + 0.5 + shifted = cp.abs(x.get_probs_tensor() - 0.5) + 0.5 # Make sure the labels list is long enough for x in range(len(self._labels), shifted.shape[1]): diff --git a/morpheus/utils/logger.py b/morpheus/utils/logger.py index 08e67836d2..826ae6d398 100644 --- a/morpheus/utils/logger.py +++ b/morpheus/utils/logger.py @@ -227,4 +227,7 @@ def deprecated_message_warning(logger, cls, new_cls): """ Log a warning about a deprecated message """ - logger.warning(("The '%s' message has been deprecated in favor of '%s'. "), cls.__name__, new_cls.__name__) + logger.warning( + ("The '%s' message has been deprecated and will be removed in a future version. Please use '%s' instead."), + cls.__name__, + new_cls.__name__) diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py index 11941ef430..1eb7e5dccc 100755 --- a/tests/test_add_classifications_stage.py +++ b/tests/test_add_classifications_stage.py @@ -14,12 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os from unittest import mock import cupy as cp import pytest +import cudf + +from morpheus._lib.common import FileTypes +from morpheus.io.deserializers import read_file_to_df +from morpheus.messages.memory.tensor_memory import TensorMemory +from morpheus.messages.message_meta import MessageMeta +from morpheus.messages.multi_response_message import MultiResponseMessage from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage +from utils import TEST_DIRS +from utils import assert_df_equal def test_constructor(config): @@ -41,41 +51,52 @@ def test_constructor(config): assert ac._labels, ['lizards'] assert ac._idx2label, {1: 'test_lizards'} - pytest.raises(AssertionError, AddClassificationsStage, config, labels=['missing']) + with pytest.raises(AssertionError): + AddClassificationsStage(config, labels=['missing']) @pytest.mark.use_python -def test_add_labels(config): - mock_message = mock.MagicMock() - mock_message.get_output.return_value = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) +def test_add_labels(): - config.class_labels = ['frogs', 'lizards', 'toads'] + class_labels = {0: "frogs", 1: "lizards", 2: "toads"} - ac = AddClassificationsStage(config, threshold=0.5) - ac._add_labels(mock_message) + threshold = 0.6 - mock_message.set_meta.assert_has_calls([ - mock.call('frogs', [False, False]), - mock.call('lizards', [False, True]), - mock.call('toads', [True, True]), - ]) + df = cudf.DataFrame([0, 1], columns=["dummy"]) + probs_array = cp.array([[0.1, 0.6, 0.8], [0.3, 0.61, 0.9]]) + probs_array_bool = probs_array > threshold - wrong_shape = mock.MagicMock() - wrong_shape.get_output.return_value = cp.array([[0.1, 0.5], [0.2, 0.6]]) - pytest.raises(RuntimeError, ac._add_labels, wrong_shape) + message = MultiResponseMessage(meta=MessageMeta(df), memory=TensorMemory(count=2, tensors={"probs": probs_array})) + labeled = AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=threshold) -@pytest.mark.use_python -def test_build_single(config): - mock_stream = mock.MagicMock() - mock_segment = mock.MagicMock() - mock_segment.make_node.return_value = mock_stream - mock_input = mock.MagicMock() + assert assert_df_equal(labeled.get_meta("frogs"), probs_array_bool[:, 0]) + assert assert_df_equal(labeled.get_meta("lizards"), probs_array_bool[:, 1]) + assert assert_df_equal(labeled.get_meta("toads"), probs_array_bool[:, 2]) - config.class_labels = ['frogs', 'lizards', 'toads'] + # Same thing but change the probs tensor name + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"other_probs": probs_array}), + probs_tensor_name="other_probs") - ac = AddClassificationsStage(config) - ac._build_single(mock_segment, mock_input) + labeled = AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=threshold) + + assert assert_df_equal(labeled.get_meta("frogs"), probs_array_bool[:, 0]) + assert assert_df_equal(labeled.get_meta("lizards"), probs_array_bool[:, 1]) + assert assert_df_equal(labeled.get_meta("toads"), probs_array_bool[:, 2]) + + # Fail in missing probs data + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"other_probs": probs_array}), + probs_tensor_name="other_probs") + message.probs_tensor_name = "probs" + + with pytest.raises(KeyError): + AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=threshold) + + # Too small of a probs array + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"probs": probs_array[:, 0:-1]})) - mock_segment.make_node.assert_called_once() - mock_segment.make_edge.assert_called_once() + with pytest.raises(RuntimeError): + AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=threshold) diff --git a/tests/test_add_scores_stage.py b/tests/test_add_scores_stage.py index 903b4b407c..d905f23cb3 100755 --- a/tests/test_add_scores_stage.py +++ b/tests/test_add_scores_stage.py @@ -14,12 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -from unittest import mock - import cupy as cp import pytest +import cudf + +from morpheus.messages.memory.tensor_memory import TensorMemory +from morpheus.messages.message_meta import MessageMeta +from morpheus.messages.multi_response_message import MultiResponseMessage +from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage from morpheus.stages.postprocess.add_scores_stage import AddScoresStage +from utils import assert_df_equal def test_constructor(config): @@ -42,41 +47,48 @@ def test_constructor(config): assert a._labels == ['lizards'] assert a._idx2label == {1: 'test_lizards'} - pytest.raises(AssertionError, AddScoresStage, config, labels=['missing']) + with pytest.raises(AssertionError): + AddScoresStage(config, labels=['missing']) @pytest.mark.use_python -def test_add_labels(config): - mock_message = mock.MagicMock() - mock_message.get_output.return_value = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) +def test_add_labels(): + class_labels = {0: "frogs", 1: "lizards", 2: "toads"} - config.class_labels = ['frogs', 'lizards', 'toads'] + df = cudf.DataFrame([0, 1], columns=["dummy"]) + probs_array = cp.array([[0.1, 0.5, 0.8], [0.2, 0.6, 0.9]]) - a = AddScoresStage(config) - a._add_labels(mock_message) + message = MultiResponseMessage(meta=MessageMeta(df), memory=TensorMemory(count=2, tensors={"probs": probs_array})) - mock_message.set_meta.assert_has_calls([ - mock.call('frogs', [0.1, 0.2]), - mock.call('lizards', [0.5, 0.6]), - mock.call('toads', [0.8, 0.9]), - ]) + labeled = AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=None) - wrong_shape = mock.MagicMock() - mock_message.get_output.return_value = cp.array([[0.1, 0.5], [0.2, 0.6]]) - pytest.raises(RuntimeError, a._add_labels, wrong_shape) + assert assert_df_equal(labeled.get_meta("frogs"), probs_array[:, 0]) + assert assert_df_equal(labeled.get_meta("lizards"), probs_array[:, 1]) + assert assert_df_equal(labeled.get_meta("toads"), probs_array[:, 2]) + # Same thing but change the probs tensor name + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"other_probs": probs_array}), + probs_tensor_name="other_probs") -@pytest.mark.use_python -def test_build_single(config): - mock_stream = mock.MagicMock() - mock_segment = mock.MagicMock() - mock_segment.make_node.return_value = mock_stream - mock_input = mock.MagicMock() + labeled = AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=None) - config.class_labels = ['frogs', 'lizards', 'toads'] + assert assert_df_equal(labeled.get_meta("frogs"), probs_array[:, 0]) + assert assert_df_equal(labeled.get_meta("lizards"), probs_array[:, 1]) + assert assert_df_equal(labeled.get_meta("toads"), probs_array[:, 2]) - a = AddScoresStage(config) - a._build_single(mock_segment, mock_input) + # Fail in missing probs data + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"other_probs": probs_array}), + probs_tensor_name="other_probs") + message.probs_tensor_name = "probs" + + with pytest.raises(KeyError): + AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=None) + + # Too small of a probs array + message = MultiResponseMessage(meta=MessageMeta(df), + memory=TensorMemory(count=2, tensors={"probs": probs_array[:, 0:-1]})) - mock_segment.make_node.assert_called_once() - mock_segment.make_edge.assert_called_once() + with pytest.raises(RuntimeError): + AddClassificationsStage._add_labels(message, idx2label=class_labels, threshold=None) diff --git a/tests/test_inference_stage.py b/tests/test_inference_stage.py index 94e271d0e5..ca5b58c65d 100755 --- a/tests/test_inference_stage.py +++ b/tests/test_inference_stage.py @@ -24,6 +24,7 @@ from morpheus.messages import ResponseMemory from morpheus.messages.memory.inference_memory import InferenceMemory +from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.messages.message_meta import MessageMeta from morpheus.messages.multi_inference_message import MultiInferenceMessage from morpheus.messages.multi_response_message import MultiResponseMessage @@ -210,16 +211,17 @@ def test_convert_response(config): for i, s in enumerate(message_sizes): output_memory.append( - ResponseMemory(count=s, tensors={"probs":full_output[sum(message_sizes[:i]):sum(message_sizes[:i]) + s, :]})) + ResponseMemory(count=s, + tensors={"probs": full_output[sum(message_sizes[:i]):sum(message_sizes[:i]) + s, :]})) resp = InferenceStage._convert_response((input_messages, output_memory)) assert resp.meta == full_input.meta assert resp.mess_offset == 0 assert resp.mess_count == total_size - assert isinstance(resp.memory, ResponseMemory) + assert isinstance(resp.memory, TensorMemory) assert resp.offset == 0 assert resp.count == total_size - assert (resp.memory.get_output("probs") == full_output).all() + assert (resp.memory.get_tensor("probs") == full_output).all() def test_convert_response_errors(): @@ -231,8 +233,8 @@ def test_convert_response_errors(): mm1 = _mk_message() mm2 = _mk_message(mess_offset=12) - out_msg1 = ResponseMemory(count=1, probs=cp.random.rand(1, 3)) - out_msg2 = ResponseMemory(count=1, probs=cp.random.rand(1, 3)) + out_msg1 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)}) + out_msg2 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)}) with pytest.raises(AssertionError): InferenceStage._convert_response(([mm1, mm2], [out_msg1, out_msg2])) @@ -242,8 +244,8 @@ def test_convert_response_errors(): mm1 = mm.get_slice(0, 1) mm2 = mm.get_slice(1, 2) - out_msg1 = ResponseMemory(count=1, probs=cp.random.rand(1, 3)) - out_msg2 = ResponseMemory(count=2, probs=cp.random.rand(2, 3)) + out_msg1 = ResponseMemory(count=1, tensors={"probs": cp.random.rand(1, 3)}) + out_msg2 = ResponseMemory(count=2, tensors={"probs": cp.random.rand(2, 3)}) with pytest.raises(AssertionError): InferenceStage._convert_response(([mm1, mm2], [out_msg1, out_msg2])) @@ -252,10 +254,10 @@ def test_convert_response_errors(): @pytest.mark.use_python def test_convert_one_response(): # Test first branch where `inf.mess_count == inf.count` - mem = ResponseMemory(4, probs=cp.zeros((4, 3))) + mem = ResponseMemory(count=4, tensors={"probs": cp.zeros((4, 3))}) inf = _mk_message(mess_count=4, count=4) - res = ResponseMemory(count=4, probs=cp.random.rand(4, 3)) + res = ResponseMemory(count=4, tensors={"probs": cp.random.rand(4, 3)}) mpm = InferenceStage._convert_one_response(MultiResponseMessage.from_message(inf, memory=mem), inf, res) assert mpm.meta == inf.meta @@ -269,15 +271,15 @@ def test_convert_one_response(): inf = _mk_message(mess_count=3, count=3) inf.memory.set_tensor("seq_ids", cp.array([[0], [1], [1]])) inf.mess_count = 2 # Get around the consistency check - res = ResponseMemory(count=3, probs=cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2], [4.5, 6.7, 8.9]])) + res = ResponseMemory(count=3, tensors={"probs": cp.array([[0, 0.6, 0.7], [5.6, 4.4, 9.2], [4.5, 6.7, 8.9]])}) - mem = ResponseMemory(2, probs=cp.zeros((2, 3))) + mem = ResponseMemory(count=2, tensors={"probs": cp.zeros((2, 3))}) mpm = InferenceStage._convert_one_response(MultiResponseMessage.from_message(inf, memory=mem), inf, res) assert mem.get_output('probs').tolist() == [[0, 0.6, 0.7], [5.6, 6.7, 9.2]] def test_convert_one_response_error(): - mem = ResponseMemory(2, probs=cp.zeros((2, 2))) + mem = ResponseMemory(2, tensors={"probs": cp.zeros((2, 2))}) inf = _mk_message(mess_count=2, count=2) res = _mk_message(mess_count=1, count=1) diff --git a/tests/test_messages.py b/tests/test_messages.py index d37eea701d..913bbafcb0 100644 --- a/tests/test_messages.py +++ b/tests/test_messages.py @@ -60,8 +60,8 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): no_cpp_class, **{"meta": messages.MessageMeta(df)}) - check_message(tensor_memory.TensorMemory, _messages.TensorMemory, should_be_cpp, no_cpp_class, *(1, )) - check_message(messages.InferenceMemory, _messages.InferenceMemory, should_be_cpp, no_cpp_class, *(1, )) + check_message(tensor_memory.TensorMemory, _messages.TensorMemory, should_be_cpp, no_cpp_class, **{"count": 1}) + check_message(messages.InferenceMemory, _messages.InferenceMemory, should_be_cpp, no_cpp_class, **{"count": 1}) cp_array = cp.zeros((1, 2)) @@ -69,16 +69,26 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): _messages.InferenceMemoryNLP, should_be_cpp, no_cpp_class, - *(1, cp_array, cp_array, cp_array)) + **{ + "count": 1, "input_ids": cp_array, "input_mask": cp_array, "seq_ids": cp_array + }) check_message(messages.InferenceMemoryFIL, _messages.InferenceMemoryFIL, should_be_cpp, no_cpp_class, - *(1, cp_array, cp_array)) + **{ + "count": 1, "input__0": cp_array, "seq_ids": cp_array + }) # No C++ impl, should always get the Python class - check_message(messages.InferenceMemoryAE, None, should_be_cpp, no_cpp_class, *(1, cp_array, cp_array)) + check_message(messages.InferenceMemoryAE, + None, + should_be_cpp, + no_cpp_class, + **{ + "count": 1, "input": cp_array, "seq_ids": cp_array + }) multi_tensor_message_tensors = { "input_ids": cp.zeros((1, 2)), @@ -118,16 +128,18 @@ def check_all_messages(should_be_cpp: bool, no_cpp_class: bool): meta=messages.MessageMeta(df), memory=inference_memory.InferenceMemory(count=1, tensors=multi_tensor_message_tensors)) - check_message(messages.ResponseMemory, _messages.ResponseMemory, should_be_cpp, no_cpp_class, *(1, )) + check_message(messages.ResponseMemory, _messages.ResponseMemory, should_be_cpp, no_cpp_class, **{"count": 1}) check_message(messages.ResponseMemoryProbs, _messages.ResponseMemoryProbs, should_be_cpp, no_cpp_class, - *(1, cp_array)) + **{ + "count": 1, "probs": cp_array + }) # No C++ impl - check_message(messages.ResponseMemoryAE, None, should_be_cpp, no_cpp_class, *(1, cp_array)) + check_message(messages.ResponseMemoryAE, None, should_be_cpp, no_cpp_class, **{"count": 1, "probs": cp_array}) check_message(messages.MultiResponseMessage, _messages.MultiResponseMessage, diff --git a/tests/test_multi_message.py b/tests/test_multi_message.py index a31d22671a..a8161c0ab0 100644 --- a/tests/test_multi_message.py +++ b/tests/test_multi_message.py @@ -639,6 +639,52 @@ def test_tensor_constructor(df: cudf.DataFrame): with pytest.raises(ValueError): MultiTensorMessage(meta=meta, mess_count=10, memory=memory, count=9) + # === ID Tensors === + id_tensor = cp.expand_dims(cp.arange(0, mess_len, dtype=int), axis=1) + + # With valid ID tensor + multi_tensor = MultiTensorMessage(meta=meta, memory=TensorMemory(count=mess_len, tensors={"seq_ids": id_tensor})) + assert cp.all(multi_tensor.get_id_tensor() == id_tensor) + + # With different ID name + multi_tensor = MultiTensorMessage(meta=meta, + memory=TensorMemory(count=mess_len, tensors={"other_seq_ids": id_tensor}), + id_tensor_name="other_seq_ids") + assert cp.all(multi_tensor.get_id_tensor() == id_tensor) + + # With message offset + multi_tensor = MultiTensorMessage(meta=meta, + mess_offset=4, + memory=TensorMemory(count=mess_len, tensors={"seq_ids": id_tensor}), + offset=4) + assert cp.all(multi_tensor.get_id_tensor() == id_tensor[4:]) + + # Incorrect start ID + invalid_id_tensor = cp.copy(id_tensor) + invalid_id_tensor[0] = -1 + with pytest.raises(RuntimeError): + multi_tensor = MultiTensorMessage(meta=meta, + memory=TensorMemory(count=mess_len, tensors={"seq_ids": invalid_id_tensor})) + + # Incorrect end ID + invalid_id_tensor = cp.copy(id_tensor) + invalid_id_tensor[-1] = invalid_id_tensor[-1] + 1 + with pytest.raises(RuntimeError): + multi_tensor = MultiTensorMessage(meta=meta, + memory=TensorMemory(count=mess_len, tensors={"seq_ids": invalid_id_tensor})) + + # Incorrect end ID, different id tensor name + invalid_id_tensor = cp.copy(id_tensor) + invalid_id_tensor[-1] = invalid_id_tensor[-1] + 1 + with pytest.raises(RuntimeError): + multi_tensor = MultiTensorMessage(meta=meta, + id_tensor_name="id_tensor", + memory=TensorMemory(count=mess_len, tensors={"id_tensor": invalid_id_tensor})) + + # Doesnt check with invalid due to different name + multi_tensor = MultiTensorMessage(meta=meta, + memory=TensorMemory(count=mess_len, tensors={"id_tensor": invalid_id_tensor})) + def test_tensor_slicing(df: cudf.DataFrame): diff --git a/tests/test_tensor_memory.py b/tests/test_tensor_memory.py index 45701b9593..cacce288a4 100644 --- a/tests/test_tensor_memory.py +++ b/tests/test_tensor_memory.py @@ -45,7 +45,7 @@ def compare_tensors(t1, t2): def check_tensor_memory(cls, count, tensors): other_tensors = {'ones': cp.ones(count), 'zeros': cp.zeros(count)} - m = cls(count) + m = cls(count=count) assert m.count == count assert m.get_tensors() == {} @@ -55,7 +55,7 @@ def check_tensor_memory(cls, count, tensors): m.set_tensors(other_tensors) compare_tensors(m.get_tensors(), other_tensors) - m = cls(count, tensors) + m = cls(count=count, tensors=tensors) assert m.count == count compare_tensors(m.get_tensors(), tensors) @@ -84,7 +84,7 @@ def test_inference_memory_ae(config): input = cp.array(test_data[:, 0]) seq_ids = cp.array(test_data[:, 1]) - m = InferenceMemoryAE(count, input=input, seq_ids=seq_ids) + m = InferenceMemoryAE(count=count, input=input, seq_ids=seq_ids) assert m.count == count compare_tensors(m.get_tensors(), {'input': input, 'seq_ids': seq_ids}) @@ -98,7 +98,7 @@ def test_inference_memory_fil(config): input_0 = cp.array(test_data[:, 0]) seq_ids = cp.array(test_data[:, 1]) - m = InferenceMemoryFIL(count, input__0=input_0, seq_ids=seq_ids) + m = InferenceMemoryFIL(count=count, input__0=input_0, seq_ids=seq_ids) assert m.count == count compare_tensors(m.get_tensors(), {'input__0': input_0, 'seq_ids': seq_ids}) @@ -113,7 +113,7 @@ def test_inference_memory_nlp(config): input_ids = cp.array(test_data[:, 0]) input_mask = cp.array(test_data[:, 1]) seq_ids = cp.array(test_data[:, 2]) - m = InferenceMemoryNLP(count, input_ids=input_ids, input_mask=input_mask, seq_ids=seq_ids) + m = InferenceMemoryNLP(count=count, input_ids=input_ids, input_mask=input_mask, seq_ids=seq_ids) assert m.count == count compare_tensors(m.get_tensors(), {'input_ids': input_ids, 'input_mask': input_mask, 'seq_ids': seq_ids}) @@ -156,19 +156,25 @@ def test_response_memory_probs(config): def test_constructor_length_error(config, tensor_cls): count = 10 tensors = {"a": cp.zeros(count), "b": cp.ones(count)} - pytest.raises(ValueError, tensor_cls, count - 1, tensors) + + with pytest.raises(ValueError): + tensor_cls(count=count - 1, tensors=tensors) @pytest.mark.parametrize("tensor_cls", [TensorMemory, InferenceMemory, ResponseMemory]) def test_set_tensor_length_error(config, tensor_cls): count = 10 - m = tensor_cls(count) - pytest.raises(ValueError, m.set_tensor, 'a', cp.zeros(count + 1)) + m = tensor_cls(count=count) + + with pytest.raises(ValueError): + m.set_tensor('a', cp.zeros(count + 1)) @pytest.mark.parametrize("tensor_cls", [TensorMemory, InferenceMemory, ResponseMemory]) def test_set_tensors_length_error(config, tensor_cls): count = 10 tensors = {"a": cp.zeros(count), "b": cp.ones(count)} - m = tensor_cls(count + 1) - pytest.raises(ValueError, m.set_tensors, tensors) + m = tensor_cls(count=count + 1) + + with pytest.raises(ValueError): + m.set_tensors(tensors) diff --git a/tests/utils.py b/tests/utils.py index 20b1fc39de..05992f0540 100755 --- a/tests/utils.py +++ b/tests/utils.py @@ -348,6 +348,8 @@ def assert_df_equal(df_to_check: typing.Union[pd.DataFrame, cudf.DataFrame], val if (isinstance(val_to_check, cudf.DataFrame) or isinstance(val_to_check, cudf.Series)): val_to_check = val_to_check.to_pandas() + elif (isinstance(val_to_check, cp.ndarray)): + val_to_check = val_to_check.get() bool_df = df_to_check == val_to_check From 5e01e6571e712da74177ec602f248a094f5c79b7 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 13:55:00 -0600 Subject: [PATCH 78/85] Cleanup and documentation --- .../messages/memory/tensor_memory.hpp | 8 ++ .../morpheus/messages/multi_inference.hpp | 2 + .../morpheus/messages/multi_inference_fil.hpp | 2 + .../morpheus/messages/multi_inference_nlp.hpp | 2 + .../morpheus/messages/multi_response.hpp | 27 ++++++ .../messages/multi_response_probs.hpp | 4 + .../morpheus/messages/multi_tensor.hpp | 25 ++++++ .../morpheus/stages/add_classification.hpp | 1 + .../include/morpheus/stages/add_scores.hpp | 1 + .../include/morpheus/utilities/cupy_util.hpp | 9 ++ .../morpheus/utilities/string_util.hpp | 8 ++ .../src/messages/memory/tensor_memory.cpp | 9 ++ morpheus/_lib/src/messages/multi.cpp | 82 ++++++++++--------- morpheus/_lib/src/python_modules/messages.cpp | 7 +- morpheus/_lib/src/utilities/cupy_util.cpp | 6 ++ morpheus/messages/multi_response_message.py | 64 +++++++-------- morpheus/messages/multi_tensor_message.py | 15 +++- .../postprocess/add_scores_stage_base.py | 6 +- tests/test_add_classifications_stage.py | 6 -- 19 files changed, 196 insertions(+), 88 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 0dece7e4e6..0e4c388556 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -168,6 +168,14 @@ struct TensorMemoryInterfaceProxy */ static TensorIndex get_count(TensorMemory& self); + /** + * @brief Returns a list of the current tensor names + * + * @param self + * @return std::vector + */ + static std::vector tensor_names_getter(TensorMemory& self); + /** * @brief Returns true if a tensor with the requested name exists in the tensors object * diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp index dc7a5c6e66..8dc1c02839 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp @@ -61,6 +61,7 @@ class MultiInferenceMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -110,6 +111,7 @@ struct MultiInferenceMessageInterfaceProxy : public MultiTensorMessageInterfaceP * @param memory Holds the generic tensor data in cupy arrays that will be used for inference stages * @param offset Message offset in inference memory instance * @param count Message count in inference memory instance + * @param id_tensor_name Name of the tensor that correlates tensor rows to message IDs * @return std::shared_ptr */ static std::shared_ptr init(std::shared_ptr meta, diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp index 51427659e2..80ba99dc1b 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp @@ -58,6 +58,7 @@ class MultiInferenceFILMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -116,6 +117,7 @@ struct MultiInferenceFILMessageInterfaceProxy : public MultiInferenceMessageInte * @param memory Holds the generic tensor data in cupy arrays that will be used for inference stages * @param offset Message offset in inference memory object * @param count Message count in inference memory object + * @param id_tensor_name Name of the tensor that correlates tensor rows to message IDs * @return std::shared_ptr */ static std::shared_ptr init(std::shared_ptr meta, diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp index e5aa8ed1b4..6153132e07 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp @@ -57,6 +57,7 @@ class MultiInferenceNLPMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -131,6 +132,7 @@ struct MultiInferenceNLPMessageInterfaceProxy : public MultiInferenceMessageInte * @param memory Holds the generic tensor data in cupy arrays that will be used for inference stages * @param offset Message offset in inference memory object * @param count Message count in inference memory object + * @param id_tensor_name Name of the tensor that correlates tensor rows to message IDs * @return std::shared_ptr */ static std::shared_ptr init(std::shared_ptr meta, diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index b62bcb920e..7a30899c63 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -64,6 +64,8 @@ class MultiResponseMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -103,6 +105,11 @@ class MultiResponseMessage : public DerivedMultiMessage */ static std::shared_ptr init(std::shared_ptr meta, @@ -133,8 +142,20 @@ struct MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfacePr std::string id_tensor_name, std::string probs_tensor_name); + /** + * @brief Gets the `probs_tensor_name` property + * + * @param self + * @return std::string Name of `probs_tensor_name` + */ static std::string probs_tensor_name_getter(MultiResponseMessage& self); + /** + * @brief Sets the `probs_tensor_name` property + * + * @param self + * @param probs_tensor_name New name of `probs_tensor_name` property + */ static void probs_tensor_name_setter(MultiResponseMessage& self, std::string probs_tensor_name); /** @@ -147,6 +168,12 @@ struct MultiResponseMessageInterfaceProxy : public MultiTensorMessageInterfacePr */ static pybind11::object get_output(MultiResponseMessage& self, const std::string& name); + /** + * @brief Get the tensor that holds output probabilities. Equivalent to `get_tensor(probs_tensor_name)` + * + * @param self + * @return pybind11::object A cupy.ndarray object + */ static pybind11::object get_probs_tensor(MultiResponseMessage& self); }; #pragma GCC visibility pop diff --git a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp index 6579170784..975de41dad 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp @@ -62,6 +62,8 @@ class MultiResponseProbsMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -103,6 +105,8 @@ struct MultiResponseProbsMessageInterfaceProxy : public MultiResponseMessageInte * @param memory Holds the inference response probabilites as a tensor * @param offset Message offset in inference memory instance * @param count Message count in inference memory instance + * @param id_tensor_name Name of the tensor that correlates tensor rows to message IDs + * @param probs_tensor_name Name of the tensor that holds output probabilities * @return std::shared_ptr */ static std::shared_ptr init(std::shared_ptr meta, diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp index c81b8bf568..a6fcc67d80 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp @@ -71,6 +71,7 @@ class MultiTensorMessage : public DerivedMultiMessage meta, TensorIndex mess_offset = 0, @@ -113,6 +114,11 @@ class MultiTensorMessage : public DerivedMultiMessage */ static std::shared_ptr init(std::shared_ptr meta, @@ -177,8 +184,20 @@ struct MultiTensorMessageInterfaceProxy */ static TensorIndex count(MultiTensorMessage& self); + /** + * @brief Gets the `id_tensor_name` property + * + * @param self + * @return std::string Name of `id_tensor_name` + */ static std::string id_tensor_name_getter(MultiTensorMessage& self); + /** + * @brief Sets the `id_tensor_name` property + * + * @param self + * @param id_tensor_name New name of `id_tensor_name` property + */ static void id_tensor_name_setter(MultiTensorMessage& self, std::string id_tensor_name); /** @@ -191,6 +210,12 @@ struct MultiTensorMessageInterfaceProxy */ static pybind11::object get_tensor(MultiTensorMessage& self, const std::string& name); + /** + * @brief Get the tensor that holds message ID information. Equivalent to `get_tensor(id_tensor_name)` + * + * @param self + * @return pybind11::object A cupy.ndarray object + */ static pybind11::object get_id_tensor(MultiTensorMessage& self); /** diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index 6e7f17ed71..a22ee807b7 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -74,6 +74,7 @@ class AddClassificationsStage std::map m_idx2label; float m_threshold; + // The minimum number of columns needed to extract the label data std::size_t m_min_col_count; }; diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index a616e5b6e3..da2fc4bc6e 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -72,6 +72,7 @@ class AddScoresStage std::map m_idx2label; + // The minimum number of columns needed to extract the label data std::size_t m_min_col_count; }; diff --git a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp index 48ddc9e991..208aa3363b 100644 --- a/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/cupy_util.hpp @@ -52,6 +52,15 @@ struct CupyUtil */ static pybind11::module_ get_cp(); + /** + * @brief Tests whether or not an object is an instance of `cupy.ndarray` + * + * @param test_obj Python object to test + * @return true The object is a cupy array + * @return false The object is not a cupy array + */ + static bool is_cupy_array(pybind11::object test_obj); + /** * @brief Convert a TensorObject to a CuPy array. Requires GIL to have already been aqcuired. * diff --git a/morpheus/_lib/include/morpheus/utilities/string_util.hpp b/morpheus/_lib/include/morpheus/utilities/string_util.hpp index 5d1d3aa372..c470a4d1ab 100644 --- a/morpheus/_lib/include/morpheus/utilities/string_util.hpp +++ b/morpheus/_lib/include/morpheus/utilities/string_util.hpp @@ -65,6 +65,14 @@ struct StringUtil return MORPHEUS_CONCAT_STR("[" << join(begin, end, ", ") << "]"); } + /** + * @brief Generates a string representation of a std::map in the form "{key1: 'value1', key2: 'value2'}" + * + * @tparam IterT Deduced iterator type + * @param begin Start iterator. Use `myMap.begin()` + * @param end End iterator. Use `myMap.end()` + * @return std::string + */ template static std::string map_to_str(IterT begin, IterT end) { diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 016a4249b5..52c96a99a1 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -19,6 +19,7 @@ #include "morpheus/objects/tensor_object.hpp" // for TensorObject #include "morpheus/utilities/cupy_util.hpp" // for CupyUtil +#include "morpheus/utilities/stage_util.hpp" #include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR #include @@ -127,6 +128,14 @@ TensorIndex TensorMemoryInterfaceProxy::get_count(TensorMemory& self) return self.count; } +std::vector TensorMemoryInterfaceProxy::tensor_names_getter(TensorMemory& self) +{ + return foreach_map(self.tensors, [](const auto& item) -> std::string { + // Just return the keys + return item.first; + }); +} + bool TensorMemoryInterfaceProxy::has_tensor(TensorMemory& self, std::string name) { return self.has_tensor(name); diff --git a/morpheus/_lib/src/messages/multi.cpp b/morpheus/_lib/src/messages/multi.cpp index ca1f8d7a60..34779582eb 100644 --- a/morpheus/_lib/src/messages/multi.cpp +++ b/morpheus/_lib/src/messages/multi.cpp @@ -22,6 +22,7 @@ #include "morpheus/objects/table_info.hpp" #include "morpheus/objects/tensor_object.hpp" #include "morpheus/utilities/cudf_util.hpp" +#include "morpheus/utilities/cupy_util.hpp" #include // for cudaMemcpy, cudaMemcpy2D, cudaMemcpyDeviceToDevice #include // for column_view @@ -364,51 +365,54 @@ void MultiMessageInterfaceProxy::set_meta(MultiMessage& self, pybind11::object c auto [row_indexer, column_indexer] = get_indexers(self, df, columns); - // Check to see if this is adding a column. If so, we need to use .loc instead of .iloc - if (column_indexer.contains(-1)) - { - // cudf is really bad at adding new columns. Need to use loc with a unique and monotonic index - py::object saved_index = df.attr("index"); + if (py::isinstance(value, CupyUtil::asdf())) - // Check to see if we can use slices - if (!(saved_index.attr("is_unique").cast() && (saved_index.attr("is_monotonic_increasing").cast() || - saved_index.attr("is_monotonic_decreasing").cast()))) + // Check to see if this is adding a column. If so, we need to use .loc instead of .iloc + if (column_indexer.contains(-1)) { - df.attr("reset_index")("drop"_a = true, "inplace"_a = true); + // cudf is really bad at adding new columns. Need to use loc with a unique and monotonic index + py::object saved_index = df.attr("index"); + + // Check to see if we can use slices + if (!(saved_index.attr("is_unique").cast() && + (saved_index.attr("is_monotonic_increasing").cast() || + saved_index.attr("is_monotonic_decreasing").cast()))) + { + df.attr("reset_index")("drop"_a = true, "inplace"_a = true); + } + else + { + // Erase the saved index so we dont reset it + saved_index = py::none(); + } + + // Perform the update via slices + df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; + + // Reset the index if we changed it + if (!saved_index.is_none()) + { + df.attr("set_index")(saved_index, "inplace"_a = true); + } } else { - // Erase the saved index so we dont reset it - saved_index = py::none(); - } - - // Perform the update via slices - df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; - - // Reset the index if we changed it - if (!saved_index.is_none()) - { - df.attr("set_index")(saved_index, "inplace"_a = true); + // If we only have one column, convert it to a series (broadcasts work with more types on a series) + if (pybind11::len(column_indexer) == 1) + { + column_indexer = column_indexer.cast()[0]; + } + + try + { + // Use iloc + df.attr("iloc")[pybind11::make_tuple(row_indexer, column_indexer)] = value; + } catch (py::error_already_set) + { + // Try this as a fallback. Works better for strings. See issue #286 + df[columns].attr("iloc")[row_indexer] = value; + } } - } - else - { - // If we only have one column, convert it to a series (broadcasts work with more types on a series) - if (pybind11::len(column_indexer) == 1) - { - column_indexer = column_indexer.cast()[0]; - } - - try - { - // Use iloc - df.attr("iloc")[pybind11::make_tuple(row_indexer, column_indexer)] = value; - } catch (py::error_already_set) - { - // Try this as a fallback. Works better for strings. See issue #286 - df[columns].attr("iloc")[row_indexer] = value; - } - } mutable_info.return_obj(std::move(df)); } diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index bfa7c1deda..992b49c8b9 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -139,6 +139,7 @@ PYBIND11_MODULE(messages, m) py::class_>(m, "TensorMemory") .def(py::init<>(&TensorMemoryInterfaceProxy::init), py::arg("count"), py::arg("tensors") = py::none()) .def_readonly("count", &TensorMemory::count) + .def_property_readonly("tensor_names", &TensorMemoryInterfaceProxy::tensor_names_getter) .def("has_tensor", &TensorMemoryInterfaceProxy::has_tensor) .def("get_tensors", &TensorMemoryInterfaceProxy::get_tensors, py::return_value_policy::move) .def("set_tensors", &TensorMemoryInterfaceProxy::set_tensors, py::arg("tensors")) @@ -250,11 +251,7 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("memory", &MultiTensorMessageInterfaceProxy::memory) .def_property_readonly("offset", &MultiTensorMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiTensorMessageInterfaceProxy::count) - .def_property("id_tensor_name", - &MultiTensorMessageInterfaceProxy::id_tensor_name_getter, - &MultiTensorMessageInterfaceProxy::id_tensor_name_setter) - .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor) - .def("get_id_tensor", &MultiTensorMessageInterfaceProxy::get_id_tensor); + .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor); py::class_>( m, "MultiInferenceMessage") diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index 91b0f0aa09..aed3ad1bee 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -63,6 +63,11 @@ pybind11::module_ CupyUtil::get_cp() return m; } +bool CupyUtil::is_cupy_array(pybind11::object test_obj) +{ + return py::isinstance(test_obj, CupyUtil::get_cp().attr("ndarray")); +} + pybind11::object CupyUtil::tensor_to_cupy(const TensorObject& tensor) { // These steps follow the cupy._convert_object_with_cuda_array_interface function shown here: @@ -175,4 +180,5 @@ CupyUtil::py_tensor_map_t CupyUtil::tensors_to_cupy(const tensor_map_t& tensors) return cupy_tensors; } + } // namespace morpheus diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 8c084d3dd2..7e63817c23 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -43,10 +43,13 @@ def __init__(self, memory: TensorMemory = None, offset: int = 0, count: int = -1, - probs_tensor_name: str = None): + id_tensor_name: str = "seq_ids", + probs_tensor_name: str = "probs"): - if (probs_tensor_name is not None): - self.probs_tensor_name = probs_tensor_name + if probs_tensor_name is None: + raise ValueError("Cannot use None for `probs_tensor_name`") + + self.probs_tensor_name = probs_tensor_name # Add the tensor name to the required list if (self.probs_tensor_name not in self.required_tensors): @@ -58,7 +61,8 @@ def __init__(self, mess_count=mess_count, memory=memory, offset=offset, - count=count) + count=count, + id_tensor_name=id_tensor_name) @property def outputs(self): @@ -91,30 +95,16 @@ def get_output(self, name: str): return self.get_tensor(name) def get_probs_tensor(self): + """ + Get the tensor that holds output probabilities. Equivalent to `get_tensor(probs_tensor_name)` - return self.get_tensor(self.probs_tensor_name) - - # def copy_output_ranges(self, ranges, mask=None): - # """ - # Perform a copy of the underlying output tensors for the given `ranges` of rows. - # Alias for `MultiResponseMessage.copy_output_ranges` - - # Parameters - # ---------- - # ranges : typing.List[typing.Tuple[int, int]] - # Rows to include in the copy in the form of `[(`start_row`, `stop_row`),...]` - # The `stop_row` isn't included. For example to copy rows 1-2 & 5-7 `ranges=[(1, 3), (5, 8)]` - - # mask : typing.Union[None, cupy.ndarray, numpy.ndarray] - # Optionally specify rows as a cupy array (when using cudf Dataframes) or a numpy array (when using pandas - # Dataframes) of booleans. When not-None `ranges` will be ignored. This is useful as an optimization as this - # avoids needing to generate the mask on it's own. + Returns + ------- + cupy.ndarray + The probabilities tensor + """ - # Returns - # ------- - # typing.Dict[str, cupy.ndarray] - # """ - # return self.copy_tensor_ranges(ranges, mask=mask) + return self.get_tensor(self.probs_tensor_name) @dataclasses.dataclass @@ -126,6 +116,10 @@ class MultiResponseProbsMessage(MultiResponseMessage, cpp_class=_messages.MultiR required_tensors: typing.ClassVar[typing.List[str]] = ["probs"] + def __new__(cls, *args, **kwargs): + morpheus_logger.deprecated_message_warning(logger, cls, MultiResponseMessage) + return super(MultiResponseMessage, cls).__new__(cls, *args, **kwargs) + def __init__(self, *, meta: MessageMeta, @@ -133,14 +127,18 @@ def __init__(self, mess_count: int = -1, memory: TensorMemory, offset: int = 0, - count: int = -1): + count: int = -1, + id_tensor_name: str = "seq_ids", + probs_tensor_name: str = "probs"): super().__init__(meta=meta, mess_offset=mess_offset, mess_count=mess_count, memory=memory, offset=offset, - count=count) + count=count, + id_tensor_name=id_tensor_name, + probs_tensor_name=probs_tensor_name) @property def probs(self): @@ -156,10 +154,6 @@ def probs(self): return self._get_tensor_prop("probs") - def __new__(cls, *args, **kwargs): - morpheus_logger.deprecated_message_warning(logger, cls, MultiResponseMessage) - return super(MultiResponseMessage, cls).__new__(cls, *args, **kwargs) - @dataclasses.dataclass class MultiResponseAEMessage(MultiResponseMessage, cpp_class=None): @@ -178,6 +172,8 @@ def __init__(self, memory: TensorMemory = None, offset: int = 0, count: int = -1, + id_tensor_name: str = "seq_ids", + probs_tensor_name: str = "probs", user_id: str = None): if (user_id is None): @@ -190,4 +186,6 @@ def __init__(self, mess_count=mess_count, memory=memory, offset=offset, - count=count) + count=count, + id_tensor_name=id_tensor_name, + probs_tensor_name=probs_tensor_name) diff --git a/morpheus/messages/multi_tensor_message.py b/morpheus/messages/multi_tensor_message.py index 722b43b73d..40e2ce5a49 100644 --- a/morpheus/messages/multi_tensor_message.py +++ b/morpheus/messages/multi_tensor_message.py @@ -47,7 +47,7 @@ class MultiTensorMessage(MultiMessage, cpp_class=_messages.MultiTensorMessage): required_tensors: typing.ClassVar[typing.List[str]] = [] """The tensor names that are required for instantiation""" id_tensor_name: typing.ClassVar[str] = "seq_ids" - """Name of the tensor correlates tensor rows to message IDs""" + """Name of the tensor that correlates tensor rows to message IDs""" def __init__(self, *, @@ -188,6 +188,19 @@ def get_tensor(self, name: str): return self.memory.get_tensor(name)[self.offset:self.offset + self.count, :] def get_id_tensor(self): + """ + Get the tensor that holds message ID information. Equivalent to `get_tensor(id_tensor_name)` + + Returns + ------- + cupy.ndarray + Array containing the ID information + + Raises + ------ + KeyError + If `self.id_tensor_name` is not found in the tensors + """ try: return self.get_tensor(self.id_tensor_name) diff --git a/morpheus/stages/postprocess/add_scores_stage_base.py b/morpheus/stages/postprocess/add_scores_stage_base.py index 6de1444d6d..cf180de62c 100644 --- a/morpheus/stages/postprocess/add_scores_stage_base.py +++ b/morpheus/stages/postprocess/add_scores_stage_base.py @@ -21,7 +21,6 @@ import mrc.core.operators as ops from morpheus._lib.common import TypeId -from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.messages import MultiResponseMessage from morpheus.pipeline.single_port_stage import SinglePortStage @@ -118,9 +117,8 @@ def _add_labels(x: MultiResponseMessage, idx2label: typing.Dict[int, str], thres probs = x.get_probs_tensor() if (probs.shape[1] <= max(idx2label.keys())): - raise RuntimeError( - "Model output did not contain enough columns to fufill the requested labels. Label indexes: {}, Model output columns: {}" - .format(idx2label, probs.shape[1])) + raise RuntimeError(("Model output did not contain enough columns to fufill the requested labels. " + "Label indexes: {}, Model output columns: {}").format(idx2label, probs.shape[1])) if (threshold is not None): probs = (probs > threshold).astype(bool) diff --git a/tests/test_add_classifications_stage.py b/tests/test_add_classifications_stage.py index 1eb7e5dccc..9bc9265bee 100755 --- a/tests/test_add_classifications_stage.py +++ b/tests/test_add_classifications_stage.py @@ -14,21 +14,15 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -from unittest import mock - import cupy as cp import pytest import cudf -from morpheus._lib.common import FileTypes -from morpheus.io.deserializers import read_file_to_df from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.messages.message_meta import MessageMeta from morpheus.messages.multi_response_message import MultiResponseMessage from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage -from utils import TEST_DIRS from utils import assert_df_equal From 7b7f895061be2b00e24fb86b1e0fd1f989f1ecbf Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 13:55:44 -0600 Subject: [PATCH 79/85] Fixing typo --- morpheus/_lib/src/messages/multi.cpp | 81 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 42 deletions(-) diff --git a/morpheus/_lib/src/messages/multi.cpp b/morpheus/_lib/src/messages/multi.cpp index 34779582eb..f6776fb00a 100644 --- a/morpheus/_lib/src/messages/multi.cpp +++ b/morpheus/_lib/src/messages/multi.cpp @@ -365,54 +365,51 @@ void MultiMessageInterfaceProxy::set_meta(MultiMessage& self, pybind11::object c auto [row_indexer, column_indexer] = get_indexers(self, df, columns); - if (py::isinstance(value, CupyUtil::asdf())) + // Check to see if this is adding a column. If so, we need to use .loc instead of .iloc + if (column_indexer.contains(-1)) + { + // cudf is really bad at adding new columns. Need to use loc with a unique and monotonic index + py::object saved_index = df.attr("index"); - // Check to see if this is adding a column. If so, we need to use .loc instead of .iloc - if (column_indexer.contains(-1)) + // Check to see if we can use slices + if (!(saved_index.attr("is_unique").cast() && (saved_index.attr("is_monotonic_increasing").cast() || + saved_index.attr("is_monotonic_decreasing").cast()))) { - // cudf is really bad at adding new columns. Need to use loc with a unique and monotonic index - py::object saved_index = df.attr("index"); - - // Check to see if we can use slices - if (!(saved_index.attr("is_unique").cast() && - (saved_index.attr("is_monotonic_increasing").cast() || - saved_index.attr("is_monotonic_decreasing").cast()))) - { - df.attr("reset_index")("drop"_a = true, "inplace"_a = true); - } - else - { - // Erase the saved index so we dont reset it - saved_index = py::none(); - } - - // Perform the update via slices - df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; - - // Reset the index if we changed it - if (!saved_index.is_none()) - { - df.attr("set_index")(saved_index, "inplace"_a = true); - } + df.attr("reset_index")("drop"_a = true, "inplace"_a = true); } else { - // If we only have one column, convert it to a series (broadcasts work with more types on a series) - if (pybind11::len(column_indexer) == 1) - { - column_indexer = column_indexer.cast()[0]; - } - - try - { - // Use iloc - df.attr("iloc")[pybind11::make_tuple(row_indexer, column_indexer)] = value; - } catch (py::error_already_set) - { - // Try this as a fallback. Works better for strings. See issue #286 - df[columns].attr("iloc")[row_indexer] = value; - } + // Erase the saved index so we dont reset it + saved_index = py::none(); + } + + // Perform the update via slices + df.attr("loc")[pybind11::make_tuple(df.attr("index")[row_indexer], columns)] = value; + + // Reset the index if we changed it + if (!saved_index.is_none()) + { + df.attr("set_index")(saved_index, "inplace"_a = true); } + } + else + { + // If we only have one column, convert it to a series (broadcasts work with more types on a series) + if (pybind11::len(column_indexer) == 1) + { + column_indexer = column_indexer.cast()[0]; + } + + try + { + // Use iloc + df.attr("iloc")[pybind11::make_tuple(row_indexer, column_indexer)] = value; + } catch (py::error_already_set) + { + // Try this as a fallback. Works better for strings. See issue #286 + df[columns].attr("iloc")[row_indexer] = value; + } + } mutable_info.return_obj(std::move(df)); } From d11aaec7faabb2d783421e2b1a64169f3079eaab Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 14:08:21 -0600 Subject: [PATCH 80/85] Fixing failing tests --- morpheus/_lib/src/python_modules/messages.cpp | 3 ++- morpheus/messages/multi_response_message.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/morpheus/_lib/src/python_modules/messages.cpp b/morpheus/_lib/src/python_modules/messages.cpp index 992b49c8b9..5ab1d7a293 100644 --- a/morpheus/_lib/src/python_modules/messages.cpp +++ b/morpheus/_lib/src/python_modules/messages.cpp @@ -251,7 +251,8 @@ PYBIND11_MODULE(messages, m) .def_property_readonly("memory", &MultiTensorMessageInterfaceProxy::memory) .def_property_readonly("offset", &MultiTensorMessageInterfaceProxy::offset) .def_property_readonly("count", &MultiTensorMessageInterfaceProxy::count) - .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor); + .def("get_tensor", &MultiTensorMessageInterfaceProxy::get_tensor) + .def("get_id_tensor", &MultiResponseMessageInterfaceProxy::get_id_tensor); py::class_>( m, "MultiInferenceMessage") diff --git a/morpheus/messages/multi_response_message.py b/morpheus/messages/multi_response_message.py index 7e63817c23..0ca3526990 100644 --- a/morpheus/messages/multi_response_message.py +++ b/morpheus/messages/multi_response_message.py @@ -102,9 +102,18 @@ def get_probs_tensor(self): ------- cupy.ndarray The probabilities tensor + + Raises + ------ + KeyError + If `self.probs_tensor_name` is not found in the tensors """ - return self.get_tensor(self.probs_tensor_name) + try: + return self.get_tensor(self.probs_tensor_name) + except KeyError as exc: + raise KeyError(f"Cannopt get ID tensor. Tensor with name '{self.probs_tensor_name}' " + "does not exist in the memory object") from exc @dataclasses.dataclass From e1bd57b5dff6b40d778c1dfa285e2764e441f814 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 14:29:30 -0600 Subject: [PATCH 81/85] IWYU fixes --- .../morpheus/messages/multi_inference.hpp | 1 - .../morpheus/messages/multi_inference_fil.hpp | 2 +- .../morpheus/messages/multi_inference_nlp.hpp | 5 ++-- .../morpheus/messages/multi_response.hpp | 1 - .../messages/multi_response_probs.hpp | 3 ++- morpheus/_lib/src/messages/multi.cpp | 1 - .../_lib/src/messages/multi_inference.cpp | 1 - .../_lib/src/messages/multi_inference_fil.cpp | 1 - .../_lib/src/messages/multi_inference_nlp.cpp | 1 - morpheus/_lib/src/messages/multi_response.cpp | 5 +++- .../src/messages/multi_response_probs.cpp | 1 - morpheus/_lib/src/messages/multi_tensor.cpp | 3 +-- .../_lib/src/stages/add_classification.cpp | 2 +- morpheus/_lib/src/stages/add_scores.cpp | 2 ++ morpheus/_lib/src/stages/triton_inference.cpp | 24 +++++++++---------- morpheus/_lib/src/utilities/string_util.cpp | 2 -- 16 files changed, 26 insertions(+), 29 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp index 8dc1c02839..fc17d690e0 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp @@ -17,7 +17,6 @@ #pragma once -#include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp index 80ba99dc1b..a5e31cfdb7 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp @@ -17,7 +17,6 @@ #pragma once -#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" // for MessageMeta #include "morpheus/messages/multi.hpp" @@ -28,6 +27,7 @@ #include // for object #include +#include namespace morpheus { /****** Component public implementations *******************/ diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp index 6153132e07..c802516598 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp @@ -17,8 +17,8 @@ #pragma once -#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory -#include "morpheus/messages/meta.hpp" // for MessageMeta +#include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/messages/meta.hpp" // for MessageMeta #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_inference.hpp" #include "morpheus/objects/tensor_object.hpp" @@ -27,6 +27,7 @@ #include // for object #include +#include namespace morpheus { /****** Component public implementations *******************/ diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index 7a30899c63..ac2127d4bc 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -17,7 +17,6 @@ #pragma once -#include "morpheus/messages/memory/response_memory.hpp" #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" diff --git a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp index 975de41dad..0761a30192 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp @@ -17,7 +17,7 @@ #pragma once -#include "morpheus/messages/memory/response_memory_probs.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_response.hpp" @@ -27,6 +27,7 @@ #include #include +#include namespace morpheus { /****** Component public implementations *******************/ diff --git a/morpheus/_lib/src/messages/multi.cpp b/morpheus/_lib/src/messages/multi.cpp index f6776fb00a..ca1f8d7a60 100644 --- a/morpheus/_lib/src/messages/multi.cpp +++ b/morpheus/_lib/src/messages/multi.cpp @@ -22,7 +22,6 @@ #include "morpheus/objects/table_info.hpp" #include "morpheus/objects/tensor_object.hpp" #include "morpheus/utilities/cudf_util.hpp" -#include "morpheus/utilities/cupy_util.hpp" #include // for cudaMemcpy, cudaMemcpy2D, cudaMemcpyDeviceToDevice #include // for column_view diff --git a/morpheus/_lib/src/messages/multi_inference.cpp b/morpheus/_lib/src/messages/multi_inference.cpp index 65a1384314..900a30e79f 100644 --- a/morpheus/_lib/src/messages/multi_inference.cpp +++ b/morpheus/_lib/src/messages/multi_inference.cpp @@ -17,7 +17,6 @@ #include "morpheus/messages/multi_inference.hpp" -#include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" diff --git a/morpheus/_lib/src/messages/multi_inference_fil.cpp b/morpheus/_lib/src/messages/multi_inference_fil.cpp index 36b6309462..d7e1c38fdd 100644 --- a/morpheus/_lib/src/messages/multi_inference_fil.cpp +++ b/morpheus/_lib/src/messages/multi_inference_fil.cpp @@ -17,7 +17,6 @@ #include "morpheus/messages/multi_inference_fil.hpp" -#include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" diff --git a/morpheus/_lib/src/messages/multi_inference_nlp.cpp b/morpheus/_lib/src/messages/multi_inference_nlp.cpp index 7e8721761f..d4ad6bb5e3 100644 --- a/morpheus/_lib/src/messages/multi_inference_nlp.cpp +++ b/morpheus/_lib/src/messages/multi_inference_nlp.cpp @@ -17,7 +17,6 @@ #include "morpheus/messages/multi_inference_nlp.hpp" -#include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi_inference.hpp" diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp index c8dd060299..67c17c72f7 100644 --- a/morpheus/_lib/src/messages/multi_response.cpp +++ b/morpheus/_lib/src/messages/multi_response.cpp @@ -17,12 +17,15 @@ #include "morpheus/messages/multi_response.hpp" -#include "morpheus/messages/memory/response_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/objects/tensor_object.hpp" +#include "morpheus/utilities/cupy_util.hpp" +#include "morpheus/utilities/string_util.hpp" #include +#include +#include #include #include diff --git a/morpheus/_lib/src/messages/multi_response_probs.cpp b/morpheus/_lib/src/messages/multi_response_probs.cpp index 13a0711817..2e33a370ed 100644 --- a/morpheus/_lib/src/messages/multi_response_probs.cpp +++ b/morpheus/_lib/src/messages/multi_response_probs.cpp @@ -17,7 +17,6 @@ #include "morpheus/messages/multi_response_probs.hpp" -#include "morpheus/messages/memory/response_memory_probs.hpp" #include "morpheus/messages/meta.hpp" #include diff --git a/morpheus/_lib/src/messages/multi_tensor.cpp b/morpheus/_lib/src/messages/multi_tensor.cpp index 8fd57f2fc7..156dff5bb3 100644 --- a/morpheus/_lib/src/messages/multi_tensor.cpp +++ b/morpheus/_lib/src/messages/multi_tensor.cpp @@ -24,8 +24,7 @@ #include // IWYU pragma: keep #include // for MRC_PTR_CAST -#include -#include // for key_error +#include // for key_error #include #include diff --git a/morpheus/_lib/src/stages/add_classification.cpp b/morpheus/_lib/src/stages/add_classification.cpp index ff7bc24fb2..ee682b82d8 100644 --- a/morpheus/_lib/src/stages/add_classification.cpp +++ b/morpheus/_lib/src/stages/add_classification.cpp @@ -35,8 +35,8 @@ #include #include #include // for divides, bind, placeholders +#include #include -#include #include // needed for logging #include // for move // IWYU thinks we need __alloc_traits<>::value_type for vector assignments diff --git a/morpheus/_lib/src/stages/add_scores.cpp b/morpheus/_lib/src/stages/add_scores.cpp index f6086a162b..ec638d3ae7 100644 --- a/morpheus/_lib/src/stages/add_scores.cpp +++ b/morpheus/_lib/src/stages/add_scores.cpp @@ -19,12 +19,14 @@ #include "morpheus/objects/tensor_object.hpp" // for TensorObject #include "morpheus/types.hpp" // for TensorIndex +#include "morpheus/utilities/string_util.hpp" #include #include // for size_t #include #include +#include #include #include #include // for logging diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index 6caf47901a..de97c9fbef 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -17,18 +17,18 @@ #include "morpheus/stages/triton_inference.hpp" -#include "morpheus/messages/memory/response_memory_probs.hpp" // for ResponseMemoryProbs -#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory -#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo -#include "morpheus/objects/dtype.hpp" // for DType -#include "morpheus/objects/tensor.hpp" // for Tensor::create -#include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include "morpheus/objects/triton_in_out.hpp" // for TritonInOut -#include "morpheus/types.hpp" // for TensorIndex, TensorMap -#include "morpheus/utilities/matx_util.hpp" // for MatxUtil::logits, MatxUtil::reduce_max -#include "morpheus/utilities/stage_util.hpp" // for foreach_map -#include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR -#include "morpheus/utilities/tensor_util.hpp" // for get_elem_count +#include "morpheus/messages/memory/response_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory +#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo +#include "morpheus/objects/dtype.hpp" // for DType +#include "morpheus/objects/tensor.hpp" // for Tensor::create +#include "morpheus/objects/tensor_object.hpp" // for TensorObject +#include "morpheus/objects/triton_in_out.hpp" // for TritonInOut +#include "morpheus/types.hpp" // for TensorIndex, TensorMap +#include "morpheus/utilities/matx_util.hpp" // for MatxUtil::logits, MatxUtil::reduce_max +#include "morpheus/utilities/stage_util.hpp" // for foreach_map +#include "morpheus/utilities/string_util.hpp" // for MORPHEUS_CONCAT_STR +#include "morpheus/utilities/tensor_util.hpp" // for get_elem_count #include // for cudaMemcpy, cudaMemcpy2D, cudaMemcpyDeviceToHost, cudaMemcpyHostToDevice #include diff --git a/morpheus/_lib/src/utilities/string_util.cpp b/morpheus/_lib/src/utilities/string_util.cpp index 4dd7e597ed..5073a195ff 100644 --- a/morpheus/_lib/src/utilities/string_util.cpp +++ b/morpheus/_lib/src/utilities/string_util.cpp @@ -17,8 +17,6 @@ #include "morpheus/utilities/string_util.hpp" -#include - namespace morpheus { bool StringUtil::str_contains(const std::string& str, const std::string& search_str) { From 80dde3a9ad82a2d7842977e4bf084f27e9346eaa Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Mon, 20 Mar 2023 15:53:00 -0600 Subject: [PATCH 82/85] Creating a base C++ class for AddScores and AddClassification --- morpheus/_lib/cmake/libraries/morpheus.cmake | 13 +- .../morpheus/stages/add_classification.hpp | 31 +---- .../include/morpheus/stages/add_scores.hpp | 28 +---- .../morpheus/stages/add_scores_stage_base.hpp | 83 +++++++++++++ .../_lib/src/stages/add_classification.cpp | 85 +------------- morpheus/_lib/src/stages/add_scores.cpp | 57 +-------- .../_lib/src/stages/add_scores_stage_base.cpp | 111 ++++++++++++++++++ 7 files changed, 216 insertions(+), 192 deletions(-) create mode 100644 morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp create mode 100644 morpheus/_lib/src/stages/add_scores_stage_base.cpp diff --git a/morpheus/_lib/cmake/libraries/morpheus.cmake b/morpheus/_lib/cmake/libraries/morpheus.cmake index 42316cdda9..e480762834 100644 --- a/morpheus/_lib/cmake/libraries/morpheus.cmake +++ b/morpheus/_lib/cmake/libraries/morpheus.cmake @@ -18,28 +18,29 @@ add_library(morpheus # Keep these sorted! ${MORPHEUS_LIB_ROOT}/src/io/deserializers.cpp ${MORPHEUS_LIB_ROOT}/src/io/serializers.cpp - ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory_fil.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory_nlp.cpp - ${MORPHEUS_LIB_ROOT}/src/messages/memory/response_memory.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/response_memory_probs.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/memory/response_memory.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/tensor_memory.cpp ${MORPHEUS_LIB_ROOT}/src/messages/meta.cpp - ${MORPHEUS_LIB_ROOT}/src/messages/multi.cpp - ${MORPHEUS_LIB_ROOT}/src/messages/multi_inference.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_inference_fil.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_inference_nlp.cpp - ${MORPHEUS_LIB_ROOT}/src/messages/multi_response.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/multi_inference.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_response_probs.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/multi_response.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_tensor.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/multi.cpp ${MORPHEUS_LIB_ROOT}/src/objects/fiber_queue.cpp ${MORPHEUS_LIB_ROOT}/src/objects/file_types.cpp ${MORPHEUS_LIB_ROOT}/src/objects/mutable_table_ctx_mgr.cpp - ${MORPHEUS_LIB_ROOT}/src/objects/wrapped_tensor.cpp ${MORPHEUS_LIB_ROOT}/src/objects/python_data_table.cpp ${MORPHEUS_LIB_ROOT}/src/objects/rmm_tensor.cpp ${MORPHEUS_LIB_ROOT}/src/objects/tensor.cpp + ${MORPHEUS_LIB_ROOT}/src/objects/wrapped_tensor.cpp ${MORPHEUS_LIB_ROOT}/src/stages/add_classification.cpp + ${MORPHEUS_LIB_ROOT}/src/stages/add_scores_stage_base.cpp ${MORPHEUS_LIB_ROOT}/src/stages/add_scores.cpp ${MORPHEUS_LIB_ROOT}/src/stages/deserialize.cpp ${MORPHEUS_LIB_ROOT}/src/stages/file_source.cpp diff --git a/morpheus/_lib/include/morpheus/stages/add_classification.hpp b/morpheus/_lib/include/morpheus/stages/add_classification.hpp index a22ee807b7..7425e24e2c 100644 --- a/morpheus/_lib/include/morpheus/stages/add_classification.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_classification.hpp @@ -17,23 +17,17 @@ #pragma once -#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage +#include "morpheus/stages/add_scores_stage_base.hpp" -#include -#include -#include #include -#include -#include -#include #include // for size_t #include #include #include -#include namespace morpheus { + /****** Component public implementations *******************/ /****** AddClassificationStage********************************/ @@ -48,15 +42,9 @@ namespace morpheus { * @brief Add detected classifications to each message. Classification labels based on probabilities calculated in * inference stage. Label indexes will be looked up in the idx2label property. */ -class AddClassificationsStage - : public mrc::pymrc::PythonNode, std::shared_ptr> +class AddClassificationsStage : public AddScoresStageBase { public: - using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; - using typename base_t::sink_type_t; - using typename base_t::source_type_t; - using typename base_t::subscribe_fn_t; - /** * @brief Construct a new Add Classifications Stage object * @@ -64,25 +52,12 @@ class AddClassificationsStage * @param idx2label : Index to classification labels map */ AddClassificationsStage(std::map idx2label, float threshold); - - private: - /** - * TODO(Documentation) - */ - subscribe_fn_t build_operator(); - - std::map m_idx2label; - float m_threshold; - - // The minimum number of columns needed to extract the label data - std::size_t m_min_col_count; }; /****** AddClassificationStageInterfaceProxy******************/ /** * @brief Interface proxy, used to insulate python bindings. */ - struct AddClassificationStageInterfaceProxy { /** diff --git a/morpheus/_lib/include/morpheus/stages/add_scores.hpp b/morpheus/_lib/include/morpheus/stages/add_scores.hpp index da2fc4bc6e..54c1b5c0ff 100644 --- a/morpheus/_lib/include/morpheus/stages/add_scores.hpp +++ b/morpheus/_lib/include/morpheus/stages/add_scores.hpp @@ -17,21 +17,14 @@ #pragma once -#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage +#include "morpheus/stages/add_scores_stage_base.hpp" -#include -#include -#include #include -#include -#include -#include // for apply, make_subscriber, observable_member, is_on_error<>::not_void, is_on_next_of<>::not_void, trace_activity #include // for size_t #include #include #include -#include namespace morpheus { /****** Component public implementations *******************/ @@ -48,32 +41,15 @@ namespace morpheus { * @brief Add probability scores to each message. Score labels based on probabilities calculated in inference stage. * Label indexes will be looked up in the idx2label property. */ -class AddScoresStage - : public mrc::pymrc::PythonNode, std::shared_ptr> +class AddScoresStage : public AddScoresStageBase { public: - using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; - using typename base_t::sink_type_t; - using typename base_t::source_type_t; - using typename base_t::subscribe_fn_t; - /** * @brief Construct a new Add Scores Stage object * - * @param num_class_labels : Number of classification labels * @param idx2label : Index to classification labels map */ AddScoresStage(std::map idx2label); - - /** - * TODO(Documentation) - */ - subscribe_fn_t build_operator(); - - std::map m_idx2label; - - // The minimum number of columns needed to extract the label data - std::size_t m_min_col_count; }; /****** AddScoresStageInterfaceProxy******************/ diff --git a/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp b/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp new file mode 100644 index 0000000000..3b97c3120f --- /dev/null +++ b/morpheus/_lib/include/morpheus/stages/add_scores_stage_base.hpp @@ -0,0 +1,83 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "morpheus/messages/multi_response.hpp" // for MultiResponseMessage + +#include +#include +#include +#include +#include +#include +#include + +#include // for size_t +#include +#include +#include +#include +#include + +namespace morpheus { +/****** Component public implementations *******************/ +/****** AddClassificationStage********************************/ + +/** + * @addtogroup stages + * @{ + * @file + */ + +#pragma GCC visibility push(default) +/** + * @brief Base class for both `AddScoresStage` and `AddClassificationStage` + */ +class AddScoresStageBase + : public mrc::pymrc::PythonNode, std::shared_ptr> +{ + public: + using base_t = mrc::pymrc::PythonNode, std::shared_ptr>; + using typename base_t::sink_type_t; + using typename base_t::source_type_t; + using typename base_t::subscribe_fn_t; + + /** + * @brief Construct a new Add Classifications Stage object + * + * @param threshold : Threshold to consider true/false for each class + * @param idx2label : Index to classification labels map + */ + AddScoresStageBase(std::map idx2label, std::optional threshold); + + private: + /** + * Called every time a message is passed to this stage + */ + source_type_t on_data(sink_type_t x); + + std::map m_idx2label; + std::optional m_threshold; + + // The minimum number of columns needed to extract the label data + std::size_t m_min_col_count; +}; + +#pragma GCC visibility pop +/** @} */ // end of group +} // namespace morpheus diff --git a/morpheus/_lib/src/stages/add_classification.cpp b/morpheus/_lib/src/stages/add_classification.cpp index ee682b82d8..3fe6609fd8 100644 --- a/morpheus/_lib/src/stages/add_classification.cpp +++ b/morpheus/_lib/src/stages/add_classification.cpp @@ -17,95 +17,21 @@ #include "morpheus/stages/add_classification.hpp" -#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo -#include "morpheus/objects/dtype.hpp" // for DType -#include "morpheus/objects/tensor.hpp" -#include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include "morpheus/types.hpp" // for TensorIndex -#include "morpheus/utilities/matx_util.hpp" -#include "morpheus/utilities/string_util.hpp" -#include "morpheus/utilities/tensor_util.hpp" // for TensorUtils::get_element_stride - -#include // for cudaMemcpy, cudaMemcpyDeviceToDevice -#include -#include // for MRC_CHECK_CUDA -#include // for cuda_stream_per_thread -#include // for device_buffer - #include -#include -#include // for divides, bind, placeholders -#include #include -#include // needed for logging +#include #include // for move // IWYU thinks we need __alloc_traits<>::value_type for vector assignments // IWYU pragma: no_include namespace morpheus { + // Component public implementations // ************ AddClassificationStage **************************** // AddClassificationsStage::AddClassificationsStage(std::map idx2label, float threshold) : - PythonNode(base_t::op_factory_from_sub_fn(build_operator())), - m_idx2label(std::move(idx2label)), - m_threshold(threshold), - m_min_col_count(m_idx2label.rbegin()->first) // Ordered map's largest key will be the last entry + AddScoresStageBase(std::move(idx2label), threshold) {} -AddClassificationsStage::subscribe_fn_t AddClassificationsStage::build_operator() -{ - return [this](rxcpp::observable input, rxcpp::subscriber output) { - return input.subscribe(rxcpp::make_observer( - [this, &output](sink_type_t x) { - const auto& probs = x->get_probs_tensor(); - const auto& shape = probs.get_shape(); - - // Depending on the input the stride is given in bytes or elements, convert to elements - auto stride = TensorUtils::get_element_stride(probs.get_stride()); - - CHECK(shape.size() == 2 && shape[1] > m_min_col_count) - << "Model output did not contain enough columns to fufill the requested labels. Label " - "indexes: " - << StringUtil::map_to_str(m_idx2label.begin(), m_idx2label.end()) - << ", Model output columns: " << shape[1]; - - const auto num_rows = shape[0]; - const auto num_columns = shape[1]; - - // A bit ugly, but we cant get access to the rmm::device_buffer here. So make a copy - auto tmp_buffer = std::make_shared(probs.bytes(), rmm::cuda_stream_per_thread); - - MRC_CHECK_CUDA( - cudaMemcpy(tmp_buffer->data(), probs.data(), tmp_buffer->size(), cudaMemcpyDeviceToDevice)); - - // Now call the threshold function - auto thresh_bool_buffer = - MatxUtil::threshold(DevMemInfo{tmp_buffer, probs.dtype(), shape, stride}, m_threshold, false); - - auto tensor_obj = Tensor::create(thresh_bool_buffer, DType::create(), shape, stride); - - std::vector columns(m_idx2label.size()); - std::vector tensors(m_idx2label.size()); - - std::size_t i = 0; - for (const auto& [column_num, column_name] : m_idx2label) - { - columns[i] = column_name; - tensors[i] = tensor_obj.slice({0, static_cast(column_num)}, - {num_rows, static_cast(column_num + 1)}); - - ++i; - } - - x->set_meta(columns, tensors); - - output.on_next(x); - }, - [&](std::exception_ptr error_ptr) { output.on_error(error_ptr); }, - [&]() { output.on_completed(); })); - }; -} - // ************ AddClassificationStageInterfaceProxy ************* // std::shared_ptr> AddClassificationStageInterfaceProxy::init( mrc::segment::Builder& builder, @@ -113,8 +39,7 @@ std::shared_ptr> AddClassification std::map idx2label, float threshold) { - auto stage = builder.construct_object(name, idx2label, threshold); - - return stage; + return builder.construct_object(name, idx2label, threshold); } + } // namespace morpheus diff --git a/morpheus/_lib/src/stages/add_scores.cpp b/morpheus/_lib/src/stages/add_scores.cpp index ec638d3ae7..ccba4e631e 100644 --- a/morpheus/_lib/src/stages/add_scores.cpp +++ b/morpheus/_lib/src/stages/add_scores.cpp @@ -17,76 +17,29 @@ #include "morpheus/stages/add_scores.hpp" -#include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include "morpheus/types.hpp" // for TensorIndex -#include "morpheus/utilities/string_util.hpp" - -#include +#include "morpheus/stages/add_scores_stage_base.hpp" #include // for size_t -#include -#include -#include #include #include -#include // for logging +#include #include // for move -#include // IWYU thinks we need __alloc_traits<>::value_type for vector assignments // IWYU pragma: no_include namespace morpheus { + // Component public implementations // ************ AddScoresStage **************************** // AddScoresStage::AddScoresStage(std::map idx2label) : - PythonNode(base_t::op_factory_from_sub_fn(build_operator())), - m_idx2label(std::move(idx2label)), - m_min_col_count(m_idx2label.rbegin()->first) + AddScoresStageBase(std::move(idx2label), std::nullopt) {} -AddScoresStage::subscribe_fn_t AddScoresStage::build_operator() -{ - return [this](rxcpp::observable input, rxcpp::subscriber output) { - return input.subscribe(rxcpp::make_observer( - [this, &output](sink_type_t x) { - const auto& probs = x->get_probs_tensor(); - const auto& shape = probs.get_shape(); - - CHECK(shape.size() == 2 && shape[1] > m_min_col_count) - << "Model output did not contain enough columns to fufill the requested labels. Label " - "indexes: " - << StringUtil::map_to_str(m_idx2label.begin(), m_idx2label.end()) - << ", Model output columns: " << shape[1]; - - const auto num_rows = shape[0]; - const auto num_columns = shape[1]; - - std::vector columns(m_idx2label.size()); - std::vector tensors(m_idx2label.size()); - - std::size_t i = 0; - for (const auto& [column_num, column_name] : m_idx2label) - { - columns[i] = column_name; - tensors[i] = probs.slice({0, static_cast(column_num)}, - {num_rows, static_cast(column_num + 1)}); - - ++i; - } - - x->set_meta(columns, tensors); - - output.on_next(x); - }, - [&](std::exception_ptr error_ptr) { output.on_error(error_ptr); }, - [&]() { output.on_completed(); })); - }; -} - // ************ AddScoresStageInterfaceProxy ************* // std::shared_ptr> AddScoresStageInterfaceProxy::init( mrc::segment::Builder& builder, const std::string& name, std::map idx2label) { return builder.construct_object(name, std::move(idx2label)); } + } // namespace morpheus diff --git a/morpheus/_lib/src/stages/add_scores_stage_base.cpp b/morpheus/_lib/src/stages/add_scores_stage_base.cpp new file mode 100644 index 0000000000..0d86f115ad --- /dev/null +++ b/morpheus/_lib/src/stages/add_scores_stage_base.cpp @@ -0,0 +1,111 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "morpheus/stages/add_scores_stage_base.hpp" + +#include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo +#include "morpheus/objects/dtype.hpp" // for DType +#include "morpheus/objects/tensor.hpp" +#include "morpheus/objects/tensor_object.hpp" // for TensorObject +#include "morpheus/types.hpp" // for TensorIndex +#include "morpheus/utilities/matx_util.hpp" +#include "morpheus/utilities/string_util.hpp" +#include "morpheus/utilities/tensor_util.hpp" // for TensorUtils::get_element_stride + +#include // for cudaMemcpy, cudaMemcpyDeviceToDevice +#include +#include // for MRC_CHECK_CUDA +#include +#include // for cuda_stream_per_thread +#include // for device_buffer + +#include +#include +#include +#include // needed for logging +#include // for move +// IWYU thinks we need __alloc_traits<>::value_type for vector assignments +// IWYU pragma: no_include + +namespace morpheus { + +// Component public implementations +// ************ AddClassificationStage **************************** // +AddScoresStageBase::AddScoresStageBase(std::map idx2label, std::optional threshold) : + PythonNode(), + m_idx2label(std::move(idx2label)), + m_threshold(threshold), + m_min_col_count(m_idx2label.rbegin()->first) // Ordered map's largest key will be the last entry +{ + this->pipe(rxcpp::operators::map([this](sink_type_t x) { return this->on_data(std::move(x)); })); +} + +AddScoresStageBase::source_type_t AddScoresStageBase::on_data(sink_type_t x) +{ + const auto& probs = x->get_probs_tensor(); + const auto& shape = probs.get_shape(); + + // Depending on the input the stride is given in bytes or elements, convert to elements + auto stride = TensorUtils::get_element_stride(probs.get_stride()); + + CHECK(shape.size() == 2 && shape[1] > m_min_col_count) + << "Model output did not contain enough columns to fufill the requested labels. Label " + "indexes: " + << StringUtil::map_to_str(m_idx2label.begin(), m_idx2label.end()) << ", Model output columns: " << shape[1]; + + const auto num_rows = shape[0]; + const auto num_columns = shape[1]; + + TensorObject output_tensor; + + if (m_threshold.has_value()) + { + // A bit ugly, but we cant get access to the rmm::device_buffer here. So make a copy + auto tmp_buffer = std::make_shared(probs.bytes(), rmm::cuda_stream_per_thread); + + MRC_CHECK_CUDA(cudaMemcpy(tmp_buffer->data(), probs.data(), tmp_buffer->size(), cudaMemcpyDeviceToDevice)); + + // Now call the threshold function + auto thresh_bool_buffer = + MatxUtil::threshold(DevMemInfo{tmp_buffer, probs.dtype(), shape, stride}, *m_threshold, false); + + output_tensor = Tensor::create(thresh_bool_buffer, DType::create(), shape, stride); + } + else + { + output_tensor = std::move(probs); + } + + std::vector columns(m_idx2label.size()); + std::vector tensors(m_idx2label.size()); + + std::size_t i = 0; + for (const auto& [column_num, column_name] : m_idx2label) + { + columns[i] = column_name; + tensors[i] = output_tensor.slice({0, static_cast(column_num)}, + {num_rows, static_cast(column_num + 1)}); + + ++i; + } + + x->set_meta(columns, tensors); + + return x; +} + +} // namespace morpheus From b8c1a0d31fd45c5618670b7491f160a5c2963de5 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 21 Mar 2023 11:34:46 -0600 Subject: [PATCH 83/85] Adding ignore for MRC warnings --- pyproject.toml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bfd703f280..edd67611b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,10 @@ filterwarnings = [ 'ignore:`np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe', 'ignore:Warning the df property returns a copy, please use the copy_dataframe method or the mutable_dataframe context manager to modify the DataFrame in-place instead.', 'ignore:`np.MachAr` is deprecated \(NumPy 1.22\):DeprecationWarning', - 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning' + 'ignore:Please use `spmatrix` from the `scipy.sparse` namespace, the `scipy.sparse.base` namespace is deprecated:DeprecationWarning', + # Warnings coming from MRC make_node and make_node full. Remove after #782 is completed + 'ignore:make_node_full\(name, sub_fn\) is deprecated and will be removed in a future version.:DeprecationWarning', + 'ignore:Passing a map function object to make_node\(\) is deprecated and will be removed in a future version.:DeprecationWarning' ] testpaths = ["tests"] From 775b5833c8fc7b11008e8b94e6f98b607b59bdf6 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 21 Mar 2023 12:53:27 -0600 Subject: [PATCH 84/85] Fixing broken tests by removing --- .../messages/memory/tensor_memory.hpp | 29 ++++--- .../morpheus/messages/multi_response.hpp | 2 +- .../morpheus/messages/multi_tensor.hpp | 2 +- .../morpheus/objects/tensor_object.hpp | 31 +++++++- .../src/messages/memory/tensor_memory.cpp | 75 ++++++++++--------- morpheus/_lib/src/messages/multi_response.cpp | 2 +- morpheus/_lib/src/messages/multi_tensor.cpp | 2 +- .../_lib/src/stages/add_scores_stage_base.cpp | 16 ++-- morpheus/_lib/src/stages/preprocess_nlp.cpp | 27 ++++--- morpheus/_lib/src/stages/triton_inference.cpp | 11 +-- morpheus/_lib/src/utilities/cupy_util.cpp | 2 +- 11 files changed, 118 insertions(+), 81 deletions(-) diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 0e4c388556..4615d3c24b 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -63,7 +63,6 @@ class TensorMemory virtual ~TensorMemory() = default; TensorIndex count{0}; - TensorMap tensors; /** * @brief Verify whether the specified tensor name is present in the tensor memory @@ -74,15 +73,6 @@ class TensorMemory */ bool has_tensor(const std::string& name) const; - /** - * @brief Copy tensor ranges - * - * @param ranges - * @param num_selected_rows - * @return TensorMap - */ - TensorMap copy_tensor_ranges(const std::vector& ranges, TensorIndex num_selected_rows) const; - /** * @brief Get the tensor object identified by `name` * @@ -110,6 +100,13 @@ class TensorMemory */ void set_tensor(const std::string& name, TensorObject&& tensor); + /** + * @brief Get a reference to the internal tensors map + * + * @return const TensorMap& + */ + const TensorMap& get_tensors() const; + /** * @brief Set the tensors object * @@ -118,6 +115,15 @@ class TensorMemory */ void set_tensors(TensorMap&& tensors); + /** + * @brief Copy tensor ranges + * + * @param ranges + * @param num_selected_rows + * @return TensorMap + */ + TensorMap copy_tensor_ranges(const std::vector& ranges, TensorIndex num_selected_rows) const; + protected: /** * @brief Checks if the number of rows in `tensor` matches `count` @@ -142,6 +148,9 @@ class TensorMemory * @throws std::runtime_error If no tensor matching `name` exists */ void verify_tensor_exists(const std::string& name) const; + + private: + TensorMap m_tensors; }; /****** TensorMemoryInterfaceProxy *************************/ diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index ac2127d4bc..5a830cf73a 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -109,7 +109,7 @@ class MultiResponseMessage : public DerivedMultiMessage new_message, TensorIndex start, TensorIndex stop) const override; diff --git a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp index 1361d4e09e..4df52b7813 100644 --- a/morpheus/_lib/include/morpheus/objects/tensor_object.hpp +++ b/morpheus/_lib/include/morpheus/objects/tensor_object.hpp @@ -368,18 +368,39 @@ struct TensorObject final return output; } - // move assignment - TensorObject& operator=(TensorObject&& other) noexcept + /** + * @brief Explicitly swap the pointers to the underlying data with another tensor. Use inplace of the move operator + * since it's hard to determine when you want to perform a move vs copy the data. + * + * @return TensorObject& + */ + TensorObject& swap(TensorObject&& other) noexcept { // Guard self assignment if (this == &other) return *this; - m_md = std::exchange(other.m_md, nullptr); // leave other in valid state - m_tensor = std::exchange(other.m_tensor, nullptr); + using std::swap; + + swap(m_md, other.m_md); + swap(m_tensor, other.m_tensor); + return *this; } + /** + * @brief Swap this tensor with another. Only the pointers to the enderlying data are exchanged. No values are + * moved. + * + */ + friend void swap(TensorObject& lhs, TensorObject& rhs) noexcept + { + using std::swap; + + swap(lhs.m_md, rhs.m_md); + swap(lhs.m_tensor, rhs.m_tensor); + } + // copy assignment TensorObject& operator=(const TensorObject& other) { @@ -387,6 +408,8 @@ struct TensorObject final if (this == &other) return *this; + CHECK(m_md && m_tensor) << "Cannot set an empty tensor. Use `std::swap(tensor1, tensor2)` instead."; + // Check for valid assignment if (this->get_shape() != other.get_shape()) { diff --git a/morpheus/_lib/src/messages/memory/tensor_memory.cpp b/morpheus/_lib/src/messages/memory/tensor_memory.cpp index 52c96a99a1..6b87a04136 100644 --- a/morpheus/_lib/src/messages/memory/tensor_memory.cpp +++ b/morpheus/_lib/src/messages/memory/tensor_memory.cpp @@ -38,20 +38,49 @@ namespace morpheus { /****** Component public implementations *******************/ /****** TensorMemory****************************************/ TensorMemory::TensorMemory(TensorIndex count) : count(count) {} -TensorMemory::TensorMemory(TensorIndex count, TensorMap&& tensors) : count(count), tensors(std::move(tensors)) +TensorMemory::TensorMemory(TensorIndex count, TensorMap&& tensors) : count(count), m_tensors(std::move(tensors)) { - check_tensors_length(this->tensors); + check_tensors_length(this->m_tensors); } bool TensorMemory::has_tensor(const std::string& name) const { - return this->tensors.find(name) != this->tensors.end(); + return this->m_tensors.find(name) != this->m_tensors.end(); +} + +const TensorObject& TensorMemory::get_tensor(const std::string& name) const +{ + verify_tensor_exists(name); + return m_tensors.at(name); +} + +TensorObject& TensorMemory::get_tensor(const std::string& name) +{ + verify_tensor_exists(name); + return m_tensors[name]; +} + +void TensorMemory::set_tensor(const std::string& name, TensorObject&& tensor) +{ + check_tensor_length(tensor); + this->m_tensors[name].swap(std::move(tensor)); +} + +const TensorMap& TensorMemory::get_tensors() const +{ + return m_tensors; +} + +void TensorMemory::set_tensors(TensorMap&& tensors) +{ + check_tensors_length(tensors); + this->m_tensors = std::move(tensors); } TensorMap TensorMemory::copy_tensor_ranges(const std::vector& ranges, TensorIndex num_selected_rows) const { TensorMap tensors; - for (const auto& p : this->tensors) + for (const auto& p : this->m_tensors) { tensors.insert(std::pair{p.first, p.second.copy_rows(ranges, num_selected_rows)}); } @@ -69,32 +98,6 @@ void TensorMemory::check_tensor_length(const TensorObject& tensor) } } -void TensorMemory::verify_tensor_exists(const std::string& name) const -{ - if (!has_tensor(name)) - { - throw std::runtime_error(MORPHEUS_CONCAT_STR("Tensor: '" << name << "' not found in memory")); - } -} - -const TensorObject& TensorMemory::get_tensor(const std::string& name) const -{ - verify_tensor_exists(name); - return tensors.at(name); -} - -TensorObject& TensorMemory::get_tensor(const std::string& name) -{ - verify_tensor_exists(name); - return tensors[name]; -} - -void TensorMemory::set_tensor(const std::string& name, TensorObject&& tensor) -{ - check_tensor_length(tensor); - this->tensors.insert_or_assign(name, std::move(tensor)); -} - void TensorMemory::check_tensors_length(const TensorMap& tensors) { for (const auto& p : tensors) @@ -103,10 +106,12 @@ void TensorMemory::check_tensors_length(const TensorMap& tensors) } } -void TensorMemory::set_tensors(TensorMap&& tensors) +void TensorMemory::verify_tensor_exists(const std::string& name) const { - check_tensors_length(tensors); - this->tensors = std::move(tensors); + if (!has_tensor(name)) + { + throw std::runtime_error(MORPHEUS_CONCAT_STR("Tensor: '" << name << "' not found in memory")); + } } /****** TensorMemoryInterfaceProxy *************************/ @@ -130,7 +135,7 @@ TensorIndex TensorMemoryInterfaceProxy::get_count(TensorMemory& self) std::vector TensorMemoryInterfaceProxy::tensor_names_getter(TensorMemory& self) { - return foreach_map(self.tensors, [](const auto& item) -> std::string { + return foreach_map(self.get_tensors(), [](const auto& item) -> std::string { // Just return the keys return item.first; }); @@ -143,7 +148,7 @@ bool TensorMemoryInterfaceProxy::has_tensor(TensorMemory& self, std::string name CupyUtil::py_tensor_map_t TensorMemoryInterfaceProxy::get_tensors(TensorMemory& self) { - return CupyUtil::tensors_to_cupy(self.tensors); + return CupyUtil::tensors_to_cupy(self.get_tensors()); } void TensorMemoryInterfaceProxy::set_tensors(TensorMemory& self, CupyUtil::py_tensor_map_t tensors) diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp index 67c17c72f7..010f4cf67f 100644 --- a/morpheus/_lib/src/messages/multi_response.cpp +++ b/morpheus/_lib/src/messages/multi_response.cpp @@ -58,7 +58,7 @@ void MultiResponseMessage::set_output(const std::string& name, const TensorObjec set_tensor(name, value); } -const TensorObject MultiResponseMessage::get_probs_tensor() const +TensorObject MultiResponseMessage::get_probs_tensor() const { try { diff --git a/morpheus/_lib/src/messages/multi_tensor.cpp b/morpheus/_lib/src/messages/multi_tensor.cpp index 156dff5bb3..4828ee8346 100644 --- a/morpheus/_lib/src/messages/multi_tensor.cpp +++ b/morpheus/_lib/src/messages/multi_tensor.cpp @@ -159,7 +159,7 @@ void MultiTensorMessage::set_tensor(const std::string& name, const TensorObject& slice = value; } -const TensorObject MultiTensorMessage::get_id_tensor() const +TensorObject MultiTensorMessage::get_id_tensor() const { try { diff --git a/morpheus/_lib/src/stages/add_scores_stage_base.cpp b/morpheus/_lib/src/stages/add_scores_stage_base.cpp index 0d86f115ad..16dbf8f6bf 100644 --- a/morpheus/_lib/src/stages/add_scores_stage_base.cpp +++ b/morpheus/_lib/src/stages/add_scores_stage_base.cpp @@ -56,7 +56,7 @@ AddScoresStageBase::AddScoresStageBase(std::map idx2la AddScoresStageBase::source_type_t AddScoresStageBase::on_data(sink_type_t x) { - const auto& probs = x->get_probs_tensor(); + auto probs = x->get_probs_tensor(); const auto& shape = probs.get_shape(); // Depending on the input the stride is given in bytes or elements, convert to elements @@ -83,22 +83,22 @@ AddScoresStageBase::source_type_t AddScoresStageBase::on_data(sink_type_t x) auto thresh_bool_buffer = MatxUtil::threshold(DevMemInfo{tmp_buffer, probs.dtype(), shape, stride}, *m_threshold, false); - output_tensor = Tensor::create(thresh_bool_buffer, DType::create(), shape, stride); + output_tensor.swap(Tensor::create(thresh_bool_buffer, DType::create(), shape, stride)); } else { - output_tensor = std::move(probs); + output_tensor.swap(std::move(probs)); } - std::vector columns(m_idx2label.size()); - std::vector tensors(m_idx2label.size()); + std::vector columns; + std::vector tensors; std::size_t i = 0; for (const auto& [column_num, column_name] : m_idx2label) { - columns[i] = column_name; - tensors[i] = output_tensor.slice({0, static_cast(column_num)}, - {num_rows, static_cast(column_num + 1)}); + columns.push_back(column_name); + tensors.emplace_back(output_tensor.slice({0, static_cast(column_num)}, + {num_rows, static_cast(column_num + 1)})); ++i; } diff --git a/morpheus/_lib/src/stages/preprocess_nlp.cpp b/morpheus/_lib/src/stages/preprocess_nlp.cpp index e08fde4015..7de690574b 100644 --- a/morpheus/_lib/src/stages/preprocess_nlp.cpp +++ b/morpheus/_lib/src/stages/preprocess_nlp.cpp @@ -101,23 +101,23 @@ PreprocessNLPStage::subscribe_fn_t PreprocessNLPStage::build_operator() cudf::cast(token_results.tensor_token_ids->view(), cudf::data_type(cudf::type_id::INT32)) ->release(); - memory->tensors["input_ids"] = - std::move(Tensor::create(std::move(input_ids_released.data), - DType::create(), - {length, static_cast(token_results.sequence_length)}, - {}, - 0)); + memory->set_tensor("input_ids", + Tensor::create(std::move(input_ids_released.data), + DType::create(), + {length, static_cast(token_results.sequence_length)}, + {}, + 0)); length = token_results.tensor_attention_mask->size() / token_results.sequence_length; auto input_mask_released = cudf::cast(token_results.tensor_attention_mask->view(), cudf::data_type(cudf::type_id::INT32)) ->release(); - memory->tensors["input_mask"] = - std::move(Tensor::create(std::move(input_mask_released.data), - DType::create(), - {length, static_cast(token_results.sequence_length)}, - {}, - 0)); + memory->set_tensor("input_mask", + Tensor::create(std::move(input_mask_released.data), + DType::create(), + {length, static_cast(token_results.sequence_length)}, + {}, + 0)); auto tensor_index_dtype = DType::create(); length = token_results.tensor_metadata->size() / 3; @@ -134,8 +134,7 @@ PreprocessNLPStage::subscribe_fn_t PreprocessNLPStage::build_operator() DevMemInfo{seq_ids_data, tensor_index_dtype.type_id(), {length, 3}, {1, 3}}, x->mess_offset); } - memory->tensors["seq_ids"] = - std::move(Tensor::create(seq_ids_data, tensor_index_dtype, {length, 3}, {}, 0)); + memory->set_tensor("seq_ids", Tensor::create(seq_ids_data, tensor_index_dtype, {length, 3}, {}, 0)); auto next = std::make_shared( x->meta, x->mess_offset, x->mess_count, std::move(memory), 0, memory->count); diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index de97c9fbef..5faea798ae 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -96,8 +96,8 @@ void build_output_tensors(TensorIndex count, // Triton results are always in row-major as required by the KServe protocol // https://github.com/kserve/kserve/blob/master/docs/predict-api/v2/required_api.md#tensor-data ShapeType stride{total_shape[1], 1}; - output_tensors[model_output.mapped_name] = - Tensor::create(std::move(output_buffer), model_output.datatype, total_shape, stride, 0); + output_tensors[model_output.mapped_name].swap( + Tensor::create(std::move(output_buffer), model_output.datatype, total_shape, stride, 0)); } } @@ -183,8 +183,8 @@ void reduce_outputs(const InferenceClientStage::sink_type_t& x, buffer_map_t& ou output_buffers[output.first] = reduced_buffer; - reduced_outputs[output.first] = - Tensor::create(std::move(reduced_buffer), tensor.dtype(), reduced_shape, stride, 0); + reduced_outputs[output.first].swap( + Tensor::create(std::move(reduced_buffer), tensor.dtype(), reduced_shape, stride, 0)); } output_tensors = std::move(reduced_outputs); @@ -208,7 +208,8 @@ void apply_logits(buffer_map_t& output_buffers, TensorMap& output_tensors) output_buffers[output.first] = output_buffer; // For logits the input and output shapes will be the same - logit_outputs[output.first] = Tensor::create(std::move(output_buffer), input_tensor.dtype(), shape, stride, 0); + logit_outputs[output.first].swap( + Tensor::create(std::move(output_buffer), input_tensor.dtype(), shape, stride, 0)); } output_tensors = std::move(logit_outputs); diff --git a/morpheus/_lib/src/utilities/cupy_util.cpp b/morpheus/_lib/src/utilities/cupy_util.cpp index aed3ad1bee..18833af887 100644 --- a/morpheus/_lib/src/utilities/cupy_util.cpp +++ b/morpheus/_lib/src/utilities/cupy_util.cpp @@ -164,7 +164,7 @@ TensorMap CupyUtil::cupy_to_tensors(const py_tensor_map_t& cupy_tensors) tensor_map_t tensors; for (const auto& tensor : cupy_tensors) { - tensors[tensor.first] = std::move(cupy_to_tensor(tensor.second)); + tensors[tensor.first].swap(std::move(cupy_to_tensor(tensor.second))); } return tensors; From b6fe24b55bcb4a3e8864f77f7910bed6439e235a Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Tue, 21 Mar 2023 13:03:12 -0600 Subject: [PATCH 85/85] IWYU fixes --- morpheus/_lib/src/stages/preprocess_nlp.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/morpheus/_lib/src/stages/preprocess_nlp.cpp b/morpheus/_lib/src/stages/preprocess_nlp.cpp index 7de690574b..f785c7fe59 100644 --- a/morpheus/_lib/src/stages/preprocess_nlp.cpp +++ b/morpheus/_lib/src/stages/preprocess_nlp.cpp @@ -23,8 +23,7 @@ #include "morpheus/objects/dtype.hpp" #include "morpheus/objects/table_info.hpp" // for TableInfo #include "morpheus/objects/tensor.hpp" -#include "morpheus/objects/tensor_object.hpp" // for TensorObject -#include "morpheus/types.hpp" // for TensorIndex, TensorMap +#include "morpheus/types.hpp" // for TensorIndex, TensorMap #include "morpheus/utilities/matx_util.hpp" #include // for column, column::contents