Skip to content

Commit

Permalink
Fix Python bindings for TensorMemory (#655)
Browse files Browse the repository at this point in the history
* Makes the Python API compatible with the Python C++ bindings for `TensorMemory`, `InferenceMemory` & `ResponseMemory` classes.
* Replaces the `tensors` attribute for memory classes with explicit `get_tensors` and `set_tensors` methods. 
* Make get_input, set_input, get_output & set_output methods actual methods on the base
* Associated interface proxy classes now inherit from each other limiting redundant python conversion code.
* Expose `MultiTensorMessage` class to Python allowing Python inheritance hierarchy to match that of C++, and consolidating some duplicated code.


The reason for removing the attribute is that on the C++ side returning a Python representation of a tensor is rather costly and is always returned as a copy. We want to avoid the obvious bugs that can occur with anyone doing:
```python
m = tensor_memory.TensorMemory(10)
m.tensors['c'] = cp.zeros(count)
```

Which would have worked when C++ execution is disabled, and the old API is implying that it *should* work. Instead the API is changed to:
```python
m = tensor_memory.TensorMemory(10)
tensors = m.get_tensors()

tensors['c'] = cp.zeros(count)

m.set_tensors(tensors)
```


fixes #604

Authors:
  - David Gardner (https://github.com/dagardner-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: #655
  • Loading branch information
dagardner-nv authored Mar 7, 2023
1 parent e3c6f52 commit 9cb5a4d
Show file tree
Hide file tree
Showing 45 changed files with 1,521 additions and 977 deletions.
3 changes: 3 additions & 0 deletions ci/iwyu/mappings.imp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
# Protobuf
{ "include": [ "<google/protobuf/repeated_ptr_field.h>", private, "<google/protobuf/repeated_field.h>", "public" ] },

# pybind11
{ "include": [ "<pybind11/detail/common.h>", private, "<pybind11/pytypes.h>", "public" ] },

# rxcpp
# Hide includes that are exported by <rxcpp/rx.hpp>
{ "include": [ "\"rx-includes.hpp\"", private, "<rxcpp/rx.hpp>", "public" ] },
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
#include "morpheus/messages/memory/tensor_memory.hpp"
#include "morpheus/types.hpp" // for TensorMap

#include <cstddef>
#include <pybind11/pytypes.h> // for object

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <string>

namespace morpheus {
Expand Down Expand Up @@ -54,7 +57,7 @@ class InferenceMemory : public TensorMemory
InferenceMemory(size_t count, TensorMap&& tensors);

/**
* @brief Checks if a tensor named `name` exists in `tensors`
* @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
*
* @param name
* @return true
Expand All @@ -67,15 +70,17 @@ class InferenceMemory : public TensorMemory
/**
* @brief Interface proxy, used to insulate python bindings.
*/
struct InferenceMemoryInterfaceProxy
struct InferenceMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
{
/**
* @brief Get the count object
* @brief Create and initialize a InferenceMemory object, and return a shared pointer to the result. Each array in
* `tensors` should be of length `count`.
*
* @param self
* @return std::size_t
* @param count : Lenght of each array in `tensors`
* @param tensors : Map of string on to cupy arrays
* @return std::shared_ptr<InferenceMemory>
*/
static std::size_t get_count(InferenceMemory& self);
static std::shared_ptr<InferenceMemory> init(std::size_t count, pybind11::object& tensors);
};
#pragma GCC visibility pop

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@

#include <cstddef>
#include <memory>
#include <string>

namespace morpheus {
/****** Component public implementations *******************/
Expand All @@ -52,7 +51,7 @@ class InferenceMemoryFIL : public InferenceMemory
* @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
* inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
*/
InferenceMemoryFIL(size_t count, TensorObject input__0, TensorObject seq_ids);
InferenceMemoryFIL(size_t count, TensorObject&& input__0, TensorObject&& seq_ids);

/**
* @brief Returns the 'input__0' tensor, throws a `std::runtime_error` if it does not exist
Expand All @@ -73,27 +72,26 @@ class InferenceMemoryFIL : public InferenceMemory
/**
* @brief Sets a tensor named 'input__0'
*
* @param input_ids
* @throw std::runtime_error
* @throw std::runtime_error
* @param input__0
* @throws std::length_error If the number of rows in `input__0` does not match `count`.
*/
void set_input__0(TensorObject input_ids);
void set_input__0(TensorObject&& input__0);

/**
* @brief Sets a tensor named 'seq_ids'
*
* @param seq_ids
* @throw std::runtime_error
* @throws std::length_error If the number of rows in `seq_ids` does not match `count`.
*/
void set_seq_ids(TensorObject seq_ids);
void set_seq_ids(TensorObject&& seq_ids);
};

/****** InferenceMemoryFILInterfaceProxy *************************/
#pragma GCC visibility push(default)
/**
* @brief Interface proxy, used to insulate python bindings
*/
struct InferenceMemoryFILInterfaceProxy
struct InferenceMemoryFILInterfaceProxy : public InferenceMemoryInterfaceProxy
{
/**
* @brief Create and initialize an InferenceMemoryFIL object, and return a shared pointer to the result
Expand All @@ -108,23 +106,6 @@ struct InferenceMemoryFILInterfaceProxy
pybind11::object input__0,
pybind11::object seq_ids);

/**
* Get messages count in the inference memory instance
*
* @param self
* @return std::size_t
*/
static std::size_t count(InferenceMemoryFIL& self);

/**
* Return the requested tensor for a given name
*
* @param self
* @param name Tensor name
* @return TensorObject
*/
static TensorObject get_tensor(InferenceMemoryFIL& self, const std::string& name);

/**
* @brief Returns the 'input__0' as cupy array
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,57 +52,63 @@ class InferenceMemoryNLP : public InferenceMemory
* @param seq_ids : Ids used to index from an inference input to a message. Necessary since there can be more
inference inputs than messages (i.e., if some messages get broken into multiple inference requests)
*/
InferenceMemoryNLP(std::size_t count, TensorObject input_ids, TensorObject input_mask, TensorObject seq_ids);
InferenceMemoryNLP(std::size_t count, TensorObject&& input_ids, TensorObject&& input_mask, TensorObject&& seq_ids);

/**
* @brief Get the input ids object
*
* @return const TensorObject&
* @throws std::runtime_error If no tensor named "input_ids" exists
*/
const TensorObject& get_input_ids() const;

/**
* @brief Get the input mask object
*
* @return const TensorObject&
* @throws std::runtime_error If no tensor named "input_mask" exists
*/
const TensorObject& get_input_mask() const;

/**
* @brief Get the seq ids object
*
* @return const TensorObject&
* @throws std::runtime_error If no tensor named "seq_ids" exists
*/
const TensorObject& get_seq_ids() const;

/**
* @brief Set the input ids object
*
* @param input_ids
* @throws std::length_error If the number of rows in `input_ids` does not match `count`.
*/
void set_input_ids(TensorObject input_ids);
void set_input_ids(TensorObject&& input_ids);

/**
* @brief Set the input mask object
*
* @param input_mask
* @throws std::length_error If the number of rows in `input_mask` does not match `count`.
*/
void set_input_mask(TensorObject input_mask);
void set_input_mask(TensorObject&& input_mask);

/**
* @brief Set the seq ids object
*
* @param seq_ids
* @throws std::length_error If the number of rows in `seq_ids` does not match `count`.
*/
void set_seq_ids(TensorObject seq_ids);
void set_seq_ids(TensorObject&& seq_ids);
};

/****** InferenceMemoryNLPInterfaceProxy********************/
#pragma GCC visibility push(default)
/**
* @brief Interface proxy, used to insulate python bindings.
*/
struct InferenceMemoryNLPInterfaceProxy
struct InferenceMemoryNLPInterfaceProxy : public InferenceMemoryInterfaceProxy
{
/**
* @brief Create and initialize an InferenceMemoryNLP object, and return a shared pointer to the result
Expand All @@ -119,19 +125,12 @@ struct InferenceMemoryNLPInterfaceProxy
pybind11::object input_mask,
pybind11::object seq_ids);

/**
* Get messages count in the inference memory object
*
* @param self
* @return std::size_t
*/
static std::size_t count(InferenceMemoryNLP& self);

/**
* @brief : Returns token-ids for each string padded with 0s to max_length as python object
*
* @param self
* @return pybind11::object
* @throws pybind11::attribute_error
*/
static pybind11::object get_input_ids(InferenceMemoryNLP& self);

Expand All @@ -148,6 +147,7 @@ struct InferenceMemoryNLPInterfaceProxy
*
* @param self
* @return pybind11::object
* @throws pybind11::attribute_error
*/
static pybind11::object get_input_mask(InferenceMemoryNLP& self);

Expand All @@ -164,6 +164,7 @@ struct InferenceMemoryNLPInterfaceProxy
*
* @param self
* @return pybind11::object
* @throws pybind11::attribute_error
*/
static pybind11::object get_seq_ids(InferenceMemoryNLP& self);

Expand Down
30 changes: 11 additions & 19 deletions morpheus/_lib/include/morpheus/messages/memory/response_memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@
#pragma once

#include "morpheus/messages/memory/tensor_memory.hpp"
#include "morpheus/objects/tensor_object.hpp" // for TensorObject
#include "morpheus/types.hpp" // for TensorMap
#include "morpheus/types.hpp" // for TensorMap

#include <pybind11/pytypes.h>
#include <pybind11/pytypes.h> // for object

#include <cstddef> // for size_t
#include <memory> // for shared_ptr
#include <string>

namespace morpheus {
Expand Down Expand Up @@ -57,7 +57,7 @@ class ResponseMemory : public TensorMemory
ResponseMemory(size_t count, TensorMap&& tensors);

/**
* @brief Checks if a tensor named `name` exists in `tensors`
* @brief Checks if a tensor named `name` exists in `tensors`. Alias for `has_tensor`.
*
* @param name
* @return true
Expand All @@ -72,25 +72,17 @@ class ResponseMemory : public TensorMemory
* @brief Interface proxy, used to insulate python bindings.
*
*/
struct ResponseMemoryInterfaceProxy
struct ResponseMemoryInterfaceProxy : public TensorMemoryInterfaceProxy
{
/**
* @brief Get the output object
* @brief Create and initialize a ResponseMemory object, and return a shared pointer to the result. Each array in
* `cupy_tensors` should be of length `count`.
*
* @param self
* @param name
* @return pybind11::object
*/
static pybind11::object get_output(ResponseMemory& self, const std::string& name);

/**
* @brief Get the output tensor object
*
* @param self
* @param name
* @return TensorObject
* @param count : Lenght of each array in `cupy_tensors`
* @param cupy_tensors : Map of string on to cupy arrays
* @return std::shared_ptr<ResponseMemory>
*/
static TensorObject get_output_tensor(ResponseMemory& self, const std::string& name);
static std::shared_ptr<ResponseMemory> init(std::size_t count, pybind11::object& tensors);
};
#pragma GCC visibility pop

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class ResponseMemoryProbs : public ResponseMemory
* @param count
* @param probs
*/
ResponseMemoryProbs(size_t count, TensorObject probs);
ResponseMemoryProbs(size_t count, TensorObject&& probs);
/**
* @brief Construct a new Response Memory Probs object
*
Expand All @@ -60,26 +60,28 @@ class ResponseMemoryProbs : public ResponseMemory
ResponseMemoryProbs(size_t count, TensorMap&& tensors);

/**
* @brief Returns the tensor named 'probs', throws a `std::runtime_error` if it does not exist
* @brief Returns the tensor named 'probs'. alias for `get_tensor("probs")`
*
* @return const TensorObject&
* @throws std::runtime_error If no tensor named "probs" exists
*/
const TensorObject& get_probs() const;

/**
* @brief Update the tensor named 'probs'
*
* @param probs
* @throws std::length_error If the number of rows in `probs` does not match `count`.
*/
void set_probs(TensorObject probs);
void set_probs(TensorObject&& probs);
};

/****** ResponseMemoryProbsInterfaceProxy*******************/
#pragma GCC visibility push(default)
/**
* @brief Interface proxy, used to insulate python bindings
*/
struct ResponseMemoryProbsInterfaceProxy
struct ResponseMemoryProbsInterfaceProxy : public ResponseMemoryInterfaceProxy
{
/**
* @brief Create and initialize a ResponseMemoryProbs object, and return a shared pointer to the result
Expand All @@ -91,23 +93,16 @@ struct ResponseMemoryProbsInterfaceProxy
static std::shared_ptr<ResponseMemoryProbs> init(cudf::size_type count, pybind11::object probs);

/**
* @brief Get messages count in the response memory probs object
*
* @param self
* @return std::size_t
*/
static std::size_t count(ResponseMemoryProbs& self);

/**
* @brief Get the response memory probs object
* @brief Get the response memory probs object ()
*
* @param self
* @return pybind11::object
* @throws pybind11::key_error When no tensor named "probs" exists.
*/
static pybind11::object get_probs(ResponseMemoryProbs& self);

/**
* @brief Set the response memory probs object
* @brief Set the response memory probs object (alias for `set_tensor("probs", cupy_values)`)
*
* @param self
* @param cupy_values
Expand Down
Loading

0 comments on commit 9cb5a4d

Please sign in to comment.