Fix Python bindings for TensorMemory (#655)

* Makes the Python API compatible with the Python C++ bindings for `TensorMemory`, `InferenceMemory` & `ResponseMemory` classes. * Replaces the `tensors` attribute for memory classes with explicit `get_tensors` and `set_tensors` methods. * Make get_input, set_input, get_output & set_output methods actual methods on the base * Associated interface proxy classes now inherit from each other limiting redundant python conversion code. * Expose `MultiTensorMessage` class to Python allowing Python inheritance hierarchy to match that of C++, and consolidating some duplicated code. The reason for removing the attribute is that on the C++ side returning a Python representation of a tensor is rather costly and is always returned as a copy. We want to avoid the obvious bugs that can occur with anyone doing: ```python m = tensor_memory.TensorMemory(10) m.tensors['c'] = cp.zeros(count) ``` Which would have worked when C++ execution is disabled, and the old API is implying that it *should* work. Instead the API is changed to: ```python m = tensor_memory.TensorMemory(10) tensors = m.get_tensors() tensors['c'] = cp.zeros(count) m.set_tensors(tensors) ``` fixes #604 Authors: - David Gardner (https://github.com/dagardner-nv) - Michael Demoret (https://github.com/mdemoret-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: #655
nv-morpheus · Mar 7, 2023 · 9cb5a4d · 9cb5a4d
1 parent e3c6f52
commit 9cb5a4d
Show file tree

Hide file tree

Showing 45 changed files with 1,521 additions and 977 deletions.
diff --git a/ci/iwyu/mappings.imp b/ci/iwyu/mappings.imp
@@ -37,6 +37,9 @@
 # Protobuf
 { "include": [ "<google/protobuf/repeated_ptr_field.h>", private, "<google/protobuf/repeated_field.h>", "public" ] },
 
+# pybind11
+{ "include": [ "<pybind11/detail/common.h>", private, "<pybind11/pytypes.h>", "public" ] },
+
 # rxcpp
 # Hide includes that are exported by <rxcpp/rx.hpp>
 { "include": [ "\"rx-includes.hpp\"", private, "<rxcpp/rx.hpp>", "public" ] },

diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp
@@ -20,7 +20,10 @@
 #include "morpheus/messages/memory/tensor_memory.hpp"
 #include "morpheus/types.hpp"  // for TensorMap
 
-#include <cstddef>
+#include <pybind11/pytypes.h>  // for object
+
+#include <cstddef>  // for size_t
+#include <memory>   // for shared_ptr
 #include <string>
 
 namespace morpheus {
@@ -54,7 +57,7 @@ class InferenceMemory : public TensorMemory
     InferenceMemory(size_t count, TensorMap&& tensors);
 
     /**
-     * @brief Checks if a tensor named `name` exists in `tensors`
+     * @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
      *
      * @param name
      * @return true
@@ -67,15 +70,17 @@ class InferenceMemory : public TensorMemory
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct InferenceMemoryInterfaceProxy
+struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
 {
     /**
-     * @brief Get the count object
+     * @brief Create and initialize a InferenceMemory object, and return a shared pointer to the result. Each array in
+     * `tensors` should be of length `count`.
      *
-     * @param self
-     * @return std::size_t
+     * @param count : Lenght of each array in `tensors`
+     * @param tensors : Map of string on to cupy arrays
+     * @return std::shared_ptr<InferenceMemory>
      */
-    static std::size_t get_count(InferenceMemory& self);
+    static std::shared_ptr<InferenceMemory> init(std::size_t count, pybind11::object& tensors);
 };
 #pragma GCC visibility pop
 

diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_fil.hpp
@@ -25,7 +25,6 @@
 
 #include <cstddef>
 #include <memory>
-#include <string>
 
 namespace morpheus {
 /****** Component public implementations *******************/
@@ -52,7 +51,7 @@ class InferenceMemoryFIL : public InferenceMemory
      * @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
      * inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
      */
-    InferenceMemoryFIL(size_t count, TensorObject input__0, TensorObject seq_ids);
+    InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids);
 
     /**
      * @brief Returns the 'input__0' tensor, throws a `std::runtime_error` if it does not exist
@@ -73,27 +72,26 @@ class InferenceMemoryFIL : public InferenceMemory
     /**
      * @brief Sets a tensor named 'input__0'
      *
-     * @param input_ids
-     * @throw std::runtime_error
-     * @throw std::runtime_error
+     * @param input__0
+     * @throws std::length_error If the number of rows in `input__0` does not match `count`.
      */
-    void set_input__0(TensorObject input_ids);
+    void set_input__0(TensorObject&& input__0);
 
     /**
      * @brief Sets a tensor named 'seq_ids'
      *
      * @param seq_ids
-     * @throw std::runtime_error
+     * @throws std::length_error If the number of rows in `seq_ids` does not match `count`.
      */
-    void set_seq_ids(TensorObject seq_ids);
+    void set_seq_ids(TensorObject&& seq_ids);
 };
 
 /****** InferenceMemoryFILInterfaceProxy *************************/
 #pragma GCC visibility push(default)
 /**
  * @brief Interface proxy, used to insulate python bindings
  */
-struct InferenceMemoryFILInterfaceProxy
+struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize an InferenceMemoryFIL object, and return a shared pointer to the result
@@ -108,23 +106,6 @@ struct InferenceMemoryFILInterfaceProxy
                                                     pybind11::object input__0,
                                                     pybind11::object seq_ids);
 
-    /**
-     * Get messages count in the inference memory instance
-     *
-     * @param self
-     * @return std::size_t
-     */
-    static std::size_t count(InferenceMemoryFIL& self);
-
-    /**
-     * Return the requested tensor for a given name
-     *
-     * @param self
-     * @param name Tensor name
-     * @return TensorObject
-     */
-    static TensorObject get_tensor(InferenceMemoryFIL& self, const std::string& name);
-
     /**
      * @brief Returns the 'input__0' as cupy array
      *

diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory_nlp.hpp
@@ -52,57 +52,63 @@ class InferenceMemoryNLP : public InferenceMemory
      * @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
      inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
      */
-    InferenceMemoryNLP(std::size_t count, TensorObject input_ids, TensorObject input_mask, TensorObject seq_ids);
+    InferenceMemoryNLP(std::size_t count, TensorObject&& input_ids, TensorObject&& input_mask, TensorObject&& seq_ids);
 
     /**
      * @brief Get the input ids object
      *
      * @return const TensorObject&
+     * @throws std::runtime_error If no tensor named "input_ids" exists
      */
     const TensorObject& get_input_ids() const;
 
     /**
      * @brief Get the input mask object
      *
      * @return const TensorObject&
+     * @throws std::runtime_error If no tensor named "input_mask" exists
      */
     const TensorObject& get_input_mask() const;
 
     /**
      * @brief Get the seq ids object
      *
      * @return const TensorObject&
+     * @throws std::runtime_error If no tensor named "seq_ids" exists
      */
     const TensorObject& get_seq_ids() const;
 
     /**
      * @brief Set the input ids object
      *
      * @param input_ids
+     * @throws std::length_error If the number of rows in `input_ids` does not match `count`.
      */
-    void set_input_ids(TensorObject input_ids);
+    void set_input_ids(TensorObject&& input_ids);
 
     /**
      * @brief Set the input mask object
      *
      * @param input_mask
+     * @throws std::length_error If the number of rows in `input_mask` does not match `count`.
      */
-    void set_input_mask(TensorObject input_mask);
+    void set_input_mask(TensorObject&& input_mask);
 
     /**
      * @brief Set the seq ids object
      *
      * @param seq_ids
+     * @throws std::length_error If the number of rows in `seq_ids` does not match `count`.
      */
-    void set_seq_ids(TensorObject seq_ids);
+    void set_seq_ids(TensorObject&& seq_ids);
 };
 
 /****** InferenceMemoryNLPInterfaceProxy********************/
 #pragma GCC visibility push(default)
 /**
  * @brief Interface proxy, used to insulate python bindings.
  */
-struct InferenceMemoryNLPInterfaceProxy
+struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize an InferenceMemoryNLP object, and return a shared pointer to the result
@@ -119,19 +125,12 @@ struct InferenceMemoryNLPInterfaceProxy
                                                     pybind11::object input_mask,
                                                     pybind11::object seq_ids);
 
-    /**
-     * Get messages count in the inference memory object
-     *
-     * @param self
-     * @return std::size_t
-     */
-    static std::size_t count(InferenceMemoryNLP& self);
-
     /**
      * @brief : Returns token-ids for each string padded with 0s to max_length as python object
      *
      * @param self
      * @return pybind11::object
+     * @throws pybind11::attribute_error
      */
     static pybind11::object get_input_ids(InferenceMemoryNLP& self);
 
@@ -148,6 +147,7 @@ struct InferenceMemoryNLPInterfaceProxy
      *
      * @param self
      * @return pybind11::object
+     * @throws pybind11::attribute_error
      */
     static pybind11::object get_input_mask(InferenceMemoryNLP& self);
 
@@ -164,6 +164,7 @@ struct InferenceMemoryNLPInterfaceProxy
      *
      * @param self
      * @return pybind11::object
+     * @throws pybind11::attribute_error
      */
     static pybind11::object get_seq_ids(InferenceMemoryNLP& self);
 

diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp
@@ -18,12 +18,12 @@
 #pragma once
 
 #include "morpheus/messages/memory/tensor_memory.hpp"
-#include "morpheus/objects/tensor_object.hpp"  // for TensorObject
-#include "morpheus/types.hpp"                  // for TensorMap
+#include "morpheus/types.hpp"  // for TensorMap
 
-#include <pybind11/pytypes.h>
+#include <pybind11/pytypes.h>  // for object
 
 #include <cstddef>  // for size_t
+#include <memory>   // for shared_ptr
 #include <string>
 
 namespace morpheus {
@@ -57,7 +57,7 @@ class ResponseMemory : public TensorMemory
     ResponseMemory(size_t count, TensorMap&& tensors);
 
     /**
-     * @brief Checks if a tensor named `name` exists in `tensors`
+     * @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
      *
      * @param name
      * @return true
@@ -72,25 +72,17 @@ class ResponseMemory : public TensorMemory
  * @brief Interface proxy, used to insulate python bindings.
  *
  */
-struct ResponseMemoryInterfaceProxy
+struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
 {
     /**
-     * @brief Get the output object
+     * @brief Create and initialize a ResponseMemory object, and return a shared pointer to the result. Each array in
+     * `cupy_tensors` should be of length `count`.
      *
-     * @param self
-     * @param name
-     * @return pybind11::object
-     */
-    static pybind11::object get_output(ResponseMemory& self, const std::string& name);
-
-    /**
-     * @brief Get the output tensor object
-     *
-     * @param self
-     * @param name
-     * @return TensorObject
+     * @param count : Lenght of each array in `cupy_tensors`
+     * @param cupy_tensors : Map of string on to cupy arrays
+     * @return std::shared_ptr<ResponseMemory>
      */
-    static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name);
+    static std::shared_ptr<ResponseMemory> init(std::size_t count, pybind11::object& tensors);
 };
 #pragma GCC visibility pop
 

diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp
@@ -50,7 +50,7 @@ class ResponseMemoryProbs : public ResponseMemory
      * @param count
      * @param probs
      */
-    ResponseMemoryProbs(size_t count, TensorObject probs);
+    ResponseMemoryProbs(size_t count, TensorObject&& probs);
     /**
      * @brief Construct a new Response Memory Probs object
      *
@@ -60,26 +60,28 @@ class ResponseMemoryProbs : public ResponseMemory
     ResponseMemoryProbs(size_t count, TensorMap&& tensors);
 
     /**
-     * @brief Returns the tensor named 'probs', throws a `std::runtime_error` if it does not exist
+     * @brief Returns the tensor named 'probs'. alias for `get_tensor("probs")`
      *
      * @return const TensorObject&
+     * @throws std::runtime_error If no tensor named "probs" exists
      */
     const TensorObject& get_probs() const;
 
     /**
      * @brief Update the tensor named 'probs'
      *
      * @param probs
+     * @throws std::length_error If the number of rows in `probs` does not match `count`.
      */
-    void set_probs(TensorObject probs);
+    void set_probs(TensorObject&& probs);
 };
 
 /****** ResponseMemoryProbsInterfaceProxy*******************/
 #pragma GCC visibility push(default)
 /**
  * @brief Interface proxy, used to insulate python bindings
  */
-struct ResponseMemoryProbsInterfaceProxy
+struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
 {
     /**
      * @brief Create and initialize a ResponseMemoryProbs object, and return a shared pointer to the result
@@ -91,23 +93,16 @@ struct ResponseMemoryProbsInterfaceProxy
     static std::shared_ptr<ResponseMemoryProbs> init(cudf::size_type count, pybind11::object probs);
 
     /**
-     * @brief Get messages count in the response memory probs object
-     *
-     * @param self
-     * @return std::size_t
-     */
-    static std::size_t count(ResponseMemoryProbs& self);
-
-    /**
-     * @brief Get the response memory probs object
+     * @brief Get the response memory probs object ()
      *
      * @param self
      * @return pybind11::object
+     * @throws pybind11::key_error When no tensor named "probs" exists.
      */
     static pybind11::object get_probs(ResponseMemoryProbs& self);
 
     /**
-     * @brief Set the response memory probs object
+     * @brief Set the response memory probs object (alias for `set_tensor("probs", cupy_values)`)
      *
      * @param self
      * @param cupy_values