diff --git a/morpheus/_lib/cmake/libraries/morpheus.cmake b/morpheus/_lib/cmake/libraries/morpheus.cmake index 61c0acd71b..e422791379 100644 --- a/morpheus/_lib/cmake/libraries/morpheus.cmake +++ b/morpheus/_lib/cmake/libraries/morpheus.cmake @@ -16,6 +16,7 @@ message(STATUS "Adding library: morpheus") add_library(morpheus # Keep these sorted! + ${MORPHEUS_LIB_ROOT}/src/io/deserializers.cpp ${MORPHEUS_LIB_ROOT}/src/io/serializers.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory.cpp ${MORPHEUS_LIB_ROOT}/src/messages/memory/inference_memory_fil.cpp @@ -30,6 +31,7 @@ add_library(morpheus ${MORPHEUS_LIB_ROOT}/src/messages/multi_inference_nlp.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_response.cpp ${MORPHEUS_LIB_ROOT}/src/messages/multi_response_probs.cpp + ${MORPHEUS_LIB_ROOT}/src/messages/multi_tensor.cpp ${MORPHEUS_LIB_ROOT}/src/objects/fiber_queue.cpp ${MORPHEUS_LIB_ROOT}/src/objects/file_types.cpp ${MORPHEUS_LIB_ROOT}/src/objects/wrapped_tensor.cpp diff --git a/morpheus/_lib/include/morpheus/io/deserializers.hpp b/morpheus/_lib/include/morpheus/io/deserializers.hpp new file mode 100644 index 0000000000..e2d3231e24 --- /dev/null +++ b/morpheus/_lib/include/morpheus/io/deserializers.hpp @@ -0,0 +1,55 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace morpheus { +#pragma GCC visibility push(default) + +/** + * @brief Loads a cudf table from either CSV or JSON file + * + * @param filename + * @return cudf::io::table_with_metadata + */ +cudf::io::table_with_metadata load_table_from_file(const std::string& filename); + +/** + * @brief Loads a cudf table from a json soruce, replacing any escape characters in the source data that cudf can't + * handle + * + * @param json_options + * @return cudf::io::table_with_metadata + */ +cudf::io::table_with_metadata load_json_table(cudf::io::json_reader_options&& json_options); + +/** + * @brief Return the number of index columns in `data_table`, in practice this will be a `0` or `1`. + * If `data_table` contains a column named "Unnamed: 0" it will be renamed to "". + * + * @param data_table + * @return int + */ +int get_index_col_count(cudf::io::table_with_metadata& data_table); + +#pragma GCC visibility pop +} // namespace morpheus diff --git a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp index 966d225c90..ac560cb4b3 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/inference_memory.hpp @@ -26,6 +26,8 @@ namespace morpheus { /** * TODO(Documentation) */ + +#pragma GCC visibility push(default) class InferenceMemory : public TensorMemory { public: @@ -43,7 +45,6 @@ class InferenceMemory : public TensorMemory }; /****** InferenceMemoryInterfaceProxy *************************/ -#pragma GCC visibility push(default) /** * @brief Interface proxy, used to insulate python bindings. */ diff --git a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp index 62c716e952..2dbe4628a9 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/response_memory_probs.hpp @@ -18,6 +18,7 @@ #pragma once #include "morpheus/messages/memory/response_memory.hpp" +#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/objects/tensor_object.hpp" #include @@ -36,14 +37,19 @@ class ResponseMemoryProbs : public ResponseMemory { public: ResponseMemoryProbs(size_t count, TensorObject probs); + ResponseMemoryProbs(size_t count, tensor_map_t &&tensors); /** - * TODO(Documentation) + * @brief Return the tensor named 'probs', throws a `std::runtime_error` if it does not exist. + * + * @return const TensorObject& */ const TensorObject &get_probs() const; /** - * TODO(Documentation) + * @brief Update the tensor named 'probs' + * + * @param probs */ void set_probs(TensorObject probs); }; diff --git a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp index 7f6fe90abc..2cb283ab60 100644 --- a/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp +++ b/morpheus/_lib/include/morpheus/messages/memory/tensor_memory.hpp @@ -41,6 +41,7 @@ class TensorMemory TensorMemory(size_t count); TensorMemory(size_t count, tensor_map_t &&tensors); + virtual ~TensorMemory() = default; size_t count{0}; tensor_map_t tensors; diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp index 13c3b9ce37..a36209d5c0 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference.hpp @@ -20,6 +20,7 @@ #include "morpheus/messages/memory/inference_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" +#include "morpheus/messages/multi_tensor.hpp" #include "morpheus/objects/tensor_object.hpp" #include @@ -38,7 +39,7 @@ namespace morpheus { * TODO(Documentation) */ #pragma GCC visibility push(default) -class MultiInferenceMessage : public DerivedMultiMessage +class MultiInferenceMessage : public DerivedMultiMessage { public: MultiInferenceMessage(const MultiInferenceMessage &other) = default; @@ -49,32 +50,26 @@ class MultiInferenceMessage : public DerivedMultiMessage memory; - std::size_t offset{0}; - std::size_t count{0}; - /** - * TODO(Documentation) + * @brief Return the input tensor for the given `name`. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_input(const std::string &name) const; /** - * TODO(Documentation) + * @brief Return the input tensor for the given `name`. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return TensorObject */ - const void set_input(const std::string &name, const TensorObject &value); + TensorObject get_input(const std::string &name); - protected: /** - * TODO(Documentation) + * Update the value of ain input tensor. The tensor must already exist, otherwise this will halt on a fatal error. */ - void get_slice_impl(std::shared_ptr new_message, std::size_t start, std::size_t stop) const override; - - void copy_ranges_impl(std::shared_ptr new_message, - const std::vector> &ranges, - size_t num_selected_rows) const override; - - std::shared_ptr copy_input_ranges(const std::vector> &ranges, - size_t num_selected_rows) const; + void set_input(const std::string &name, const TensorObject &value); }; /****** MultiInferenceMessageInterfaceProxy****************/ diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp index 73bb8b5068..711df6614c 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_fil.hpp @@ -45,22 +45,32 @@ class MultiInferenceFILMessage : public MultiInferenceMessage size_t count); /** - * TODO(Documentation) + * @brief Return the 'input__0' tensor, throws a `std::runtime_error` if it does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_input__0() const; /** - * TODO(Documentation) + * @brief Sets a tensor named 'input__0'. + * + * @param input__0 */ void set_input__0(const TensorObject& input__0); /** - * TODO(Documentation) + * @brief Return the 'seq_ids' tensor, throws a `std::runtime_error` if it does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_seq_ids() const; /** - * TODO(Documentation) + * @brief Sets a tensor named 'seq_ids'. + * + * @param seq_ids */ void set_seq_ids(const TensorObject& seq_ids); }; diff --git a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp index debbd6b384..cfa7ec3fef 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_inference_nlp.hpp @@ -46,32 +46,47 @@ class MultiInferenceNLPMessage : public MultiInferenceMessage std::size_t count); /** - * TODO(Documentation) + * @brief Return the 'input_ids' tensor, throws a `std::runtime_error` if it does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_input_ids() const; /** - * TODO(Documentation) + * @brief Sets a tensor named 'input_ids'. + * + * @param input_ids */ void set_input_ids(const TensorObject& input_ids); /** - * TODO(Documentation) + * @brief Return the 'input_mask' tensor, throws a `std::runtime_error` if it does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_input_mask() const; /** - * TODO(Documentation) + * @brief Sets a tensor named 'input_mask'. + * + * @param input_mask */ void set_input_mask(const TensorObject& input_mask); /** - * TODO(Documentation) + * @brief Return the 'seq_ids' tensor, throws a `std::runtime_error` if it does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_seq_ids() const; /** - * TODO(Documentation) + * @brief Sets a tensor named 'seq_ids'. + * + * @param seq_ids */ void set_seq_ids(const TensorObject& seq_ids); }; diff --git a/morpheus/_lib/include/morpheus/messages/multi_response.hpp b/morpheus/_lib/include/morpheus/messages/multi_response.hpp index 1730b09718..a2e2a5a42c 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response.hpp @@ -20,6 +20,7 @@ #include "morpheus/messages/memory/response_memory.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" +#include "morpheus/messages/multi_tensor.hpp" #include "morpheus/objects/tensor_object.hpp" #include @@ -38,7 +39,7 @@ namespace morpheus { * TODO(Documentation) */ #pragma GCC visibility push(default) -class MultiResponseMessage : public DerivedMultiMessage +class MultiResponseMessage : public DerivedMultiMessage { public: MultiResponseMessage(const MultiResponseMessage &other) = default; @@ -49,37 +50,30 @@ class MultiResponseMessage : public DerivedMultiMessage memory; - std::size_t offset{0}; - std::size_t count{0}; - - /** - * TODO(Documentation) - */ - TensorObject get_output(const std::string &name); - /** - * TODO(Documentation) + * @brief Returns the output tensor with the given name. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return const TensorObject */ const TensorObject get_output(const std::string &name) const; /** - * TODO(Documentation) + * @brief Returns the output tensor with the given name. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return TensorObject */ - const void set_output(const std::string &name, const TensorObject &value); + TensorObject get_output(const std::string &name); - protected: /** - * TODO(Documentation) + * @brief Update the value of a given output tensor. The tensor must already exist, otherwise this will halt on a + * fatal error. + * + * @param name + * @param value */ - void get_slice_impl(std::shared_ptr new_message, std::size_t start, std::size_t stop) const override; - - void copy_ranges_impl(std::shared_ptr new_message, - const std::vector> &ranges, - size_t num_selected_rows) const override; - - std::shared_ptr copy_output_ranges(const std::vector> &ranges, - size_t num_selected_rows) const; + void set_output(const std::string &name, const TensorObject &value); }; /****** MultiResponseMessageInterfaceProxy *************************/ diff --git a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp index 67e4164259..a454f89f06 100644 --- a/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp +++ b/morpheus/_lib/include/morpheus/messages/multi_response_probs.hpp @@ -17,7 +17,7 @@ #pragma once -#include "morpheus/messages/memory/response_memory.hpp" +#include "morpheus/messages/memory/response_memory_probs.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/messages/multi.hpp" #include "morpheus/messages/multi_response.hpp" @@ -43,17 +43,21 @@ class MultiResponseProbsMessage : public DerivedMultiMessage meta, size_t mess_offset, size_t mess_count, - std::shared_ptr memory, + std::shared_ptr memory, size_t offset, size_t count); /** - * TODO(Documentation) + * @brief Return the `probs` (probabilities) output tensor + * + * @return const TensorObject */ const TensorObject get_probs() const; /** - * TODO(Documentation) + * @brief Update the `probs` output tensor. Will halt on a fatal error if the `probs` output tensor does not exist. + * + * @param probs */ void set_probs(const TensorObject &probs); }; @@ -67,14 +71,14 @@ struct MultiResponseProbsMessageInterfaceProxy static std::shared_ptr init(std::shared_ptr meta, cudf::size_type mess_offset, cudf::size_type mess_count, - std::shared_ptr memory, + std::shared_ptr memory, cudf::size_type offset, cudf::size_type count); /** * TODO(Documentation) */ - static std::shared_ptr memory(MultiResponseProbsMessage &self); + static std::shared_ptr memory(MultiResponseProbsMessage &self); /** * TODO(Documentation) diff --git a/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp new file mode 100644 index 0000000000..03f6d733ca --- /dev/null +++ b/morpheus/_lib/include/morpheus/messages/multi_tensor.hpp @@ -0,0 +1,101 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "morpheus/messages/memory/tensor_memory.hpp" +#include "morpheus/messages/meta.hpp" +#include "morpheus/messages/multi.hpp" +#include "morpheus/objects/tensor_object.hpp" + +#include +#include +#include +#include // for pair +#include + +namespace morpheus { +#pragma GCC visibility push(default) + +/****** MultiTensorMessage*******************************/ +/** + * Base class for MultiInferenceMessage & MultiResponseMessage + * Contains a pointer to an instance of TensorMemory along with an + * offset & count to those tensors. + * + * mess_offset & mess_count refer to the range of records in meta. + * offset & count refer to the range of records in TensorMemory. + * + * While TensorMemory can contain multiple tensors, it is a requirement that + * they are all of the same length and that element N in each tensor refers + * to the same record. + */ +class MultiTensorMessage : public DerivedMultiMessage +{ + public: + MultiTensorMessage(const MultiTensorMessage &other) = default; + MultiTensorMessage(std::shared_ptr meta, + std::size_t mess_offset, + std::size_t mess_count, + std::shared_ptr memory, + std::size_t offset, + std::size_t count); + + std::shared_ptr memory; + std::size_t offset{0}; + std::size_t count{0}; + + /** + * @brief Returns a tensor with the given name. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return const TensorObject + */ + const TensorObject get_tensor(const std::string &name) const; + + /** + * @brief Returns a tensor with the given name. Will halt on a fatal error if the tensor does not exist. + * + * @param name + * @return TensorObject + */ + TensorObject get_tensor(const std::string &name); + + /** + * @brief Update the value of a given tensor. The tensor must already exist, otherwise this will halt on a fatal + * error. + * + * @param name + * @param value + */ + void set_tensor(const std::string &name, const TensorObject &value); + + protected: + void get_slice_impl(std::shared_ptr new_message, std::size_t start, std::size_t stop) const override; + + void copy_ranges_impl(std::shared_ptr new_message, + const std::vector> &ranges, + size_t num_selected_rows) const override; + + std::shared_ptr copy_input_ranges( + const std::vector> &ranges, std::size_t num_selected_rows) const; + + TensorObject get_tensor_impl(const std::string &name) const; +}; + +#pragma GCC visibility pop +} // namespace morpheus diff --git a/morpheus/_lib/include/morpheus/objects/tensor.hpp b/morpheus/_lib/include/morpheus/objects/tensor.hpp index e66752c7b7..a84fecff5c 100644 --- a/morpheus/_lib/include/morpheus/objects/tensor.hpp +++ b/morpheus/_lib/include/morpheus/objects/tensor.hpp @@ -34,6 +34,8 @@ namespace morpheus { /** * TODO(Documentation) */ + +#pragma GCC visibility push(default) class Tensor { public: @@ -80,4 +82,6 @@ class Tensor size_t m_offset; std::shared_ptr m_device_buffer; }; + +#pragma GCC visibility pop } // namespace morpheus diff --git a/morpheus/_lib/include/morpheus/stages/file_source.hpp b/morpheus/_lib/include/morpheus/stages/file_source.hpp index 7dc4c54f15..a3e8ff72c1 100644 --- a/morpheus/_lib/include/morpheus/stages/file_source.hpp +++ b/morpheus/_lib/include/morpheus/stages/file_source.hpp @@ -49,10 +49,6 @@ class FileSourceStage : public srf::pysrf::PythonSource +#include +#include // for string_scalar +#include +#include // IWYU pragma: keep +#include // for cudf::type_id +#include + +#include +#include // needed for logging +#include + +namespace morpheus { + +cudf::io::table_with_metadata load_json_table(cudf::io::json_reader_options&& json_options) +{ + auto tbl = cudf::io::read_json(json_options); + + auto found = std::find(tbl.metadata.column_names.begin(), tbl.metadata.column_names.end(), "data"); + + if (found == tbl.metadata.column_names.end()) + return tbl; + + // Super ugly but cudf cant handle newlines and add extra escapes. So we need to convert + // \\n -> \n + // \\/ -> \/ + auto columns = tbl.tbl->release(); + + size_t idx = found - tbl.metadata.column_names.begin(); + + auto updated_data = cudf::strings::replace( + cudf::strings_column_view{columns[idx]->view()}, cudf::string_scalar("\\n"), cudf::string_scalar("\n")); + + updated_data = cudf::strings::replace( + cudf::strings_column_view{updated_data->view()}, cudf::string_scalar("\\/"), cudf::string_scalar("/")); + + columns[idx] = std::move(updated_data); + + tbl.tbl = std::move(std::make_unique(std::move(columns))); + + return tbl; +} + +cudf::io::table_with_metadata load_table_from_file(const std::string& filename) +{ + auto file_path = std::filesystem::path(filename); + + if (file_path.extension() == ".json" || file_path.extension() == ".jsonlines") + { + // First, load the file into json + auto options = cudf::io::json_reader_options::builder(cudf::io::source_info{filename}).lines(true); + return load_json_table(options.build()); + } + else if (file_path.extension() == ".csv") + { + auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename}); + return cudf::io::read_csv(options.build()); + } + else + { + LOG(FATAL) << "Unknown extension for file: " << filename; + throw std::runtime_error("Unknown extension"); + } +} + +int get_index_col_count(cudf::io::table_with_metadata& data_table) +{ + int index_col_count = 0; + + // Check if we have a first column with INT64 data type + if (data_table.metadata.column_names.size() >= 1 && + data_table.tbl->get_column(0).type().id() == cudf::type_id::INT64) + { + std::regex index_regex(R"((unnamed: 0|id))", std::regex_constants::ECMAScript | std::regex_constants::icase); + + // Get the column name + auto col_name = data_table.metadata.column_names[0]; + + // Check it against some common terms + if (std::regex_search(col_name, index_regex)) + { + // Also, if its the hideous 'Unnamed: 0', then just use an empty string + if (col_name == "Unnamed: 0") + { + data_table.metadata.column_names[0] = ""; + } + + index_col_count = 1; + } + } + + return index_col_count; +} + +} // namespace morpheus diff --git a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp index 575182788e..fe24d41cc0 100644 --- a/morpheus/_lib/src/messages/memory/response_memory_probs.cpp +++ b/morpheus/_lib/src/messages/memory/response_memory_probs.cpp @@ -17,8 +17,6 @@ #include "morpheus/messages/memory/response_memory_probs.hpp" -#include "morpheus/messages/memory/response_memory.hpp" -#include "morpheus/messages/memory/tensor_memory.hpp" #include "morpheus/utilities/cupy_util.hpp" #include @@ -38,6 +36,12 @@ ResponseMemoryProbs::ResponseMemoryProbs(size_t count, TensorObject probs) : Res this->tensors["probs"] = std::move(probs); } +ResponseMemoryProbs::ResponseMemoryProbs(size_t count, tensor_map_t &&tensors) : + ResponseMemory(count, std::move(tensors)) +{ + CHECK(has_tensor("probs")) << "Tensor: 'probs' not found in memory"; +} + const TensorObject &ResponseMemoryProbs::get_probs() const { auto found = this->tensors.find("probs"); diff --git a/morpheus/_lib/src/messages/multi_inference.cpp b/morpheus/_lib/src/messages/multi_inference.cpp index c5fb6379e8..48944f4bb4 100644 --- a/morpheus/_lib/src/messages/multi_inference.cpp +++ b/morpheus/_lib/src/messages/multi_inference.cpp @@ -42,78 +42,22 @@ MultiInferenceMessage::MultiInferenceMessage(std::shared_ptr memory, std::size_t offset, std::size_t count) : - DerivedMultiMessage(meta, mess_offset, mess_count), - memory(std::move(memory)), - offset(offset), - count(count) + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) {} const TensorObject MultiInferenceMessage::get_input(const std::string &name) const { - CHECK(this->memory->has_input(name)) << "Cound not find input: " << name; - - // check if we are getting the entire input - if (this->offset == 0 && this->count == this->memory->count) - { - return this->memory->tensors[name]; - } - - // TODO(MDD): This really needs to return the slice of the tensor - return this->memory->tensors[name].slice({static_cast(this->offset), 0}, - {static_cast(this->offset + this->count), -1}); -} - -const void MultiInferenceMessage::set_input(const std::string &name, const TensorObject &value) -{ - // Get the input slice first - auto slice = this->get_input(name); - - // Set the value to use assignment - slice = value; -} - -void MultiInferenceMessage::get_slice_impl(std::shared_ptr new_message, - std::size_t start, - std::size_t stop) const -{ - auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); - - sliced_message->offset = start; - sliced_message->count = stop - start; - - // If we have more inference rows than message rows, we need to use the seq_ids to figure out the slicing. This - // will be slow and should be avoided at all costs - if (this->count != this->mess_count && this->memory->has_input("seq_ids")) - { - auto seq_ids = this->get_input("seq_ids"); - - // Determine the new start and stop before passing onto the base - start = seq_ids.read_element({(TensorIndex)start, 0}); - stop = seq_ids.read_element({(TensorIndex)stop - 1, 0}) + 1; - } - - // Pass onto the base - DerivedMultiMessage::get_slice_impl(new_message, start, stop); + return get_tensor(name); } -void MultiInferenceMessage::copy_ranges_impl(std::shared_ptr new_message, - const std::vector> &ranges, - size_t num_selected_rows) const +TensorObject MultiInferenceMessage::get_input(const std::string &name) { - auto copied_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); - DerivedMultiMessage::copy_ranges_impl(copied_message, ranges, num_selected_rows); - - copied_message->offset = 0; - copied_message->count = num_selected_rows; - copied_message->memory = copy_input_ranges(ranges, num_selected_rows); + return get_tensor(name); } -std::shared_ptr MultiInferenceMessage::copy_input_ranges( - const std::vector> &ranges, size_t num_selected_rows) const +void MultiInferenceMessage::set_input(const std::string &name, const TensorObject &value) { - auto offset_ranges = apply_offset_to_ranges(offset, ranges); - auto tensors = memory->copy_tensor_ranges(offset_ranges, num_selected_rows); - return std::make_shared(num_selected_rows, std::move(tensors)); + set_tensor(name, value); } /****** InterfaceProxy *************************/ @@ -131,7 +75,8 @@ std::shared_ptr MultiInferenceMessageInterfaceProxy::init std::shared_ptr MultiInferenceMessageInterfaceProxy::memory(MultiInferenceMessage &self) { - return self.memory; + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + return std::static_pointer_cast(self.memory); } std::size_t MultiInferenceMessageInterfaceProxy::offset(MultiInferenceMessage &self) diff --git a/morpheus/_lib/src/messages/multi_inference_fil.cpp b/morpheus/_lib/src/messages/multi_inference_fil.cpp index e22a1650a3..39deecf52f 100644 --- a/morpheus/_lib/src/messages/multi_inference_fil.cpp +++ b/morpheus/_lib/src/messages/multi_inference_fil.cpp @@ -73,7 +73,8 @@ std::shared_ptr MultiInferenceFILMessageInterfaceProxy std::shared_ptr MultiInferenceFILMessageInterfaceProxy::memory( MultiInferenceFILMessage &self) { - return self.memory; + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + return std::static_pointer_cast(self.memory); } std::size_t MultiInferenceFILMessageInterfaceProxy::offset(MultiInferenceFILMessage &self) diff --git a/morpheus/_lib/src/messages/multi_inference_nlp.cpp b/morpheus/_lib/src/messages/multi_inference_nlp.cpp index 16945b6702..fa60a60bae 100644 --- a/morpheus/_lib/src/messages/multi_inference_nlp.cpp +++ b/morpheus/_lib/src/messages/multi_inference_nlp.cpp @@ -86,7 +86,8 @@ std::shared_ptr MultiInferenceNLPMessageInterfaceProxy std::shared_ptr MultiInferenceNLPMessageInterfaceProxy::memory( MultiInferenceNLPMessage &self) { - return self.memory; + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + return std::static_pointer_cast(self.memory); } std::size_t MultiInferenceNLPMessageInterfaceProxy::offset(MultiInferenceNLPMessage &self) diff --git a/morpheus/_lib/src/messages/multi_response.cpp b/morpheus/_lib/src/messages/multi_response.cpp index 7ffbf59469..90912ecfd6 100644 --- a/morpheus/_lib/src/messages/multi_response.cpp +++ b/morpheus/_lib/src/messages/multi_response.cpp @@ -36,95 +36,28 @@ namespace morpheus { /****** Component public implementations *******************/ -/****** MultiResponseMessage****************************************/ -MultiResponseMessage::MultiResponseMessage(std::shared_ptr meta, +MultiResponseMessage::MultiResponseMessage(std::shared_ptr meta, std::size_t mess_offset, std::size_t mess_count, std::shared_ptr memory, std::size_t offset, std::size_t count) : - DerivedMultiMessage(meta, mess_offset, mess_count), - memory(std::move(memory)), - offset(offset), - count(count) + DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) {} -TensorObject MultiResponseMessage::get_output(const std::string &name) -{ - CHECK(this->memory->has_output(name)) << "Could not find output: " << name; - - // check if we are getting the entire input - if (this->offset == 0 && this->count == this->memory->count) - { - return this->memory->tensors[name]; - } - - // TODO(MDD): This really needs to return the slice of the tensor - return this->memory->tensors[name].slice({static_cast(this->offset), 0}, - {static_cast(this->offset + this->count), -1}); -} - const TensorObject MultiResponseMessage::get_output(const std::string &name) const { - CHECK(this->memory->has_output(name)) << "Could not find output: " << name; - - // check if we are getting the entire input - if (this->offset == 0 && this->count == this->memory->count) - { - return this->memory->tensors[name]; - } - - // TODO(MDD): This really needs to return the slice of the tensor - return this->memory->tensors[name].slice({static_cast(this->offset), 0}, - {static_cast(this->offset + this->count), -1}); + return get_tensor(name); } -const void MultiResponseMessage::set_output(const std::string &name, const TensorObject &value) -{ - // Get the input slice first - auto slice = this->get_output(name); - - // Set the value to use assignment - slice = value; -} - -void MultiResponseMessage::get_slice_impl(std::shared_ptr new_message, - std::size_t start, - std::size_t stop) const -{ - auto sliced_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); - - sliced_message->offset = start; - sliced_message->count = stop - start; - - // Currently our output lengths should always match mess_count, and even if they didn't we wouldn't have any way - // to associate rows in the output with rows in the dataframe. Note on the input side we have the seq_ids array - // to but we don't have any equivelant for the output. - DCHECK(this->count == this->mess_count) - << "Number of rows in response output does not match number of messages in DF"; - - // Pass onto the base - DerivedMultiMessage::get_slice_impl(new_message, start, stop); -} - -void MultiResponseMessage::copy_ranges_impl(std::shared_ptr new_message, - const std::vector> &ranges, - size_t num_selected_rows) const +TensorObject MultiResponseMessage::get_output(const std::string &name) { - auto copied_message = DCHECK_NOTNULL(std::dynamic_pointer_cast(new_message)); - DerivedMultiMessage::copy_ranges_impl(copied_message, ranges, num_selected_rows); - - copied_message->offset = 0; - copied_message->count = num_selected_rows; - copied_message->memory = copy_output_ranges(ranges, num_selected_rows); + return get_tensor(name); } -std::shared_ptr MultiResponseMessage::copy_output_ranges( - const std::vector> &ranges, size_t num_selected_rows) const +void MultiResponseMessage::set_output(const std::string &name, const TensorObject &value) { - auto offset_ranges = apply_offset_to_ranges(offset, ranges); - auto tensors = memory->copy_tensor_ranges(offset_ranges, num_selected_rows); - return std::make_shared(num_selected_rows, std::move(tensors)); + set_tensor(name, value); } /****** MultiResponseMessageInterfaceProxy *************************/ @@ -141,7 +74,9 @@ std::shared_ptr MultiResponseMessageInterfaceProxy::init(s std::shared_ptr MultiResponseMessageInterfaceProxy::memory(MultiResponseMessage &self) { - return self.memory; + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + + return std::static_pointer_cast(self.memory); } std::size_t MultiResponseMessageInterfaceProxy::offset(MultiResponseMessage &self) diff --git a/morpheus/_lib/src/messages/multi_response_probs.cpp b/morpheus/_lib/src/messages/multi_response_probs.cpp index cdc1171f02..fb854dce2f 100644 --- a/morpheus/_lib/src/messages/multi_response_probs.cpp +++ b/morpheus/_lib/src/messages/multi_response_probs.cpp @@ -32,7 +32,7 @@ namespace morpheus { MultiResponseProbsMessage::MultiResponseProbsMessage(std::shared_ptr meta, size_t mess_offset, size_t mess_count, - std::shared_ptr memory, + std::shared_ptr memory, size_t offset, size_t count) : DerivedMultiMessage(meta, mess_offset, mess_count, memory, offset, count) @@ -56,7 +56,7 @@ std::shared_ptr MultiResponseProbsMessageInterfacePro std::shared_ptr meta, cudf::size_type mess_offset, cudf::size_type mess_count, - std::shared_ptr memory, + std::shared_ptr memory, cudf::size_type offset, cudf::size_type count) { @@ -64,10 +64,12 @@ std::shared_ptr MultiResponseProbsMessageInterfacePro std::move(meta), mess_offset, mess_count, std::move(memory), offset, count); } -std::shared_ptr MultiResponseProbsMessageInterfaceProxy::memory( +std::shared_ptr MultiResponseProbsMessageInterfaceProxy::memory( MultiResponseProbsMessage &self) { - return self.memory; + DCHECK(std::dynamic_pointer_cast(self.memory) != nullptr); + + return std::static_pointer_cast(self.memory); } std::size_t MultiResponseProbsMessageInterfaceProxy::offset(MultiResponseProbsMessage &self) diff --git a/morpheus/_lib/src/messages/multi_tensor.cpp b/morpheus/_lib/src/messages/multi_tensor.cpp new file mode 100644 index 0000000000..ffdf93be03 --- /dev/null +++ b/morpheus/_lib/src/messages/multi_tensor.cpp @@ -0,0 +1,122 @@ +/** + * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "morpheus/messages/multi_tensor.hpp" + +#include "morpheus/utilities/cupy_util.hpp" + +#include // for cudf::size_type> +#include + +#include // for int32_t +#include // needed for logging + +namespace morpheus { +/****** Component public implementations *******************/ +/****** ****************************************/ +MultiTensorMessage::MultiTensorMessage(std::shared_ptr meta, + std::size_t mess_offset, + std::size_t mess_count, + std::shared_ptr memory, + std::size_t offset, + std::size_t count) : + DerivedMultiMessage(meta, mess_offset, mess_count), + memory(std::move(memory)), + offset(offset), + count(count) +{} + +const TensorObject MultiTensorMessage::get_tensor(const std::string &name) const +{ + return get_tensor_impl(name); +} + +TensorObject MultiTensorMessage::get_tensor(const std::string &name) +{ + return get_tensor_impl(name); +} + +TensorObject MultiTensorMessage::get_tensor_impl(const std::string &name) const +{ + CHECK(this->memory->has_tensor(name)) << "Cound not find tensor: " << name; + + // check if we are getting the entire input + if (this->offset == 0 && this->count == this->memory->count) + { + return this->memory->tensors[name]; + } + + return this->memory->tensors[name].slice({static_cast(this->offset), 0}, + {static_cast(this->offset + this->count), -1}); +} + +void MultiTensorMessage::set_tensor(const std::string &name, const TensorObject &value) +{ + // Get the input slice first + auto slice = this->get_tensor(name); + + // Set the value to use assignment + slice = value; +} + +void MultiTensorMessage::get_slice_impl(std::shared_ptr new_message, + std::size_t start, + std::size_t stop) const +{ + DCHECK(std::dynamic_pointer_cast(new_message) != nullptr); + auto sliced_message = std::static_pointer_cast(new_message); + + sliced_message->offset = start; + sliced_message->count = stop - start; + + // If we have more inference rows than message rows, we need to use the seq_ids to figure out the slicing. This + // will be slow and should be avoided at all costs + if (this->count != this->mess_count && this->memory->has_tensor("seq_ids")) + { + auto seq_ids = this->get_tensor("seq_ids"); + + // Determine the new start and stop before passing onto the base + start = seq_ids.read_element({(TensorIndex)start, 0}); + stop = seq_ids.read_element({(TensorIndex)stop - 1, 0}) + 1; + } + + // Pass onto the base + DerivedMultiMessage::get_slice_impl(new_message, start, stop); +} + +void MultiTensorMessage::copy_ranges_impl(std::shared_ptr new_message, + const std::vector> &ranges, + size_t num_selected_rows) const +{ + DCHECK(std::dynamic_pointer_cast(new_message) != nullptr); + auto copied_message = std::static_pointer_cast(new_message); + DerivedMultiMessage::copy_ranges_impl(copied_message, ranges, num_selected_rows); + + copied_message->offset = 0; + copied_message->count = num_selected_rows; + copied_message->memory = copy_input_ranges(ranges, num_selected_rows); +} + +std::shared_ptr MultiTensorMessage::copy_input_ranges( + const std::vector> &ranges, size_t num_selected_rows) const +{ + auto offset_ranges = apply_offset_to_ranges(offset, ranges); + auto tensors = memory->copy_tensor_ranges(offset_ranges, num_selected_rows); + return std::make_shared(num_selected_rows, std::move(tensors)); +} + +} // namespace morpheus diff --git a/morpheus/_lib/src/stages/file_source.cpp b/morpheus/_lib/src/stages/file_source.cpp index 9db0dd895f..428d29f03f 100644 --- a/morpheus/_lib/src/stages/file_source.cpp +++ b/morpheus/_lib/src/stages/file_source.cpp @@ -17,6 +17,8 @@ #include "morpheus/stages/file_source.hpp" +#include "morpheus/io/deserializers.hpp" + #include // for column #include #include @@ -55,35 +57,11 @@ FileSourceStage::FileSourceStage(std::string filename, int repeat) : FileSourceStage::subscriber_fn_t FileSourceStage::build() { return [this](rxcpp::subscriber output) { - auto data_table = this->load_table(); - - // Using 0 will default to creating a new range index - int index_col_count = 0; - - // Check if we have a first column with INT64 data type - if (data_table.metadata.column_names.size() >= 1 && - data_table.tbl->get_column(0).type().id() == cudf::type_id::INT64) - { - std::regex index_regex(R"((unnamed: 0|id))", - std::regex_constants::ECMAScript | std::regex_constants::icase); - - // Get the column name - auto col_name = data_table.metadata.column_names[0]; - - // Check it against some common terms - if (std::regex_search(col_name, index_regex)) - { - // Also, if its the hideous 'Unnamed: 0', then just use an empty string - if (col_name == "Unnamed: 0") - { - data_table.metadata.column_names[0] = ""; - } - - index_col_count = 1; - } - } + auto data_table = load_table_from_file(m_filename); + int index_col_count = get_index_col_count(data_table); // Next, create the message metadata. This gets reused for repeats + // When index_col_count is 0 this will cause a new range index to be created auto meta = MessageMeta::create_from_cpp(std::move(data_table), index_col_count); // Always push at least 1 @@ -114,54 +92,6 @@ FileSourceStage::subscriber_fn_t FileSourceStage::build() }; } -cudf::io::table_with_metadata FileSourceStage::load_table() -{ - auto file_path = std::filesystem::path(m_filename); - - if (file_path.extension() == ".json" || file_path.extension() == ".jsonlines") - { - // First, load the file into json - auto options = cudf::io::json_reader_options::builder(cudf::io::source_info{m_filename}).lines(true); - - auto tbl = cudf::io::read_json(options.build()); - - auto found = std::find(tbl.metadata.column_names.begin(), tbl.metadata.column_names.end(), "data"); - - if (found == tbl.metadata.column_names.end()) - return tbl; - - // Super ugly but cudf cant handle newlines and add extra escapes. So we need to convert - // \\n -> \n - // \\/ -> \/ - auto columns = tbl.tbl->release(); - - size_t idx = found - tbl.metadata.column_names.begin(); - - auto updated_data = cudf::strings::replace( - cudf::strings_column_view{columns[idx]->view()}, cudf::string_scalar("\\n"), cudf::string_scalar("\n")); - - updated_data = cudf::strings::replace( - cudf::strings_column_view{updated_data->view()}, cudf::string_scalar("\\/"), cudf::string_scalar("/")); - - columns[idx] = std::move(updated_data); - - tbl.tbl = std::move(std::make_unique(std::move(columns))); - - return tbl; - } - else if (file_path.extension() == ".csv") - { - auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{m_filename}); - - return cudf::io::read_csv(options.build()); - } - else - { - LOG(FATAL) << "Unknown extension for file: " << m_filename; - throw std::runtime_error("Unknown extension"); - } -} - // ************ FileSourceStageInterfaceProxy ************ // std::shared_ptr> FileSourceStageInterfaceProxy::init( srf::segment::Builder &builder, const std::string &name, std::string filename, int repeat) diff --git a/morpheus/_lib/src/stages/kafka_source.cpp b/morpheus/_lib/src/stages/kafka_source.cpp index a4eeaecfe1..801e0151e4 100644 --- a/morpheus/_lib/src/stages/kafka_source.cpp +++ b/morpheus/_lib/src/stages/kafka_source.cpp @@ -17,6 +17,7 @@ #include "morpheus/stages/kafka_source.hpp" +#include "morpheus/io/deserializers.hpp" #include "morpheus/messages/meta.hpp" #include "morpheus/utilities/stage_util.hpp" #include "morpheus/utilities/string_util.hpp" @@ -519,31 +520,7 @@ cudf::io::table_with_metadata KafkaSourceStage::load_table(const std::string &bu auto options = cudf::io::json_reader_options::builder(cudf::io::source_info(buffer.c_str(), buffer.size())).lines(true); - auto tbl = cudf::io::read_json(options.build()); - - auto found = std::find(tbl.metadata.column_names.begin(), tbl.metadata.column_names.end(), "data"); - - if (found == tbl.metadata.column_names.end()) - return tbl; - - // Super ugly but cudf cant handle newlines and add extra escapes. So we need to convert - // \\n -> \n - // \\/ -> \/ - auto columns = tbl.tbl->release(); - - size_t idx = found - tbl.metadata.column_names.begin(); - - auto updated_data = cudf::strings::replace( - cudf::strings_column_view{columns[idx]->view()}, cudf::string_scalar("\\n"), cudf::string_scalar("\n")); - - updated_data = cudf::strings::replace( - cudf::strings_column_view{updated_data->view()}, cudf::string_scalar("\\/"), cudf::string_scalar("/")); - - columns[idx] = std::move(updated_data); - - tbl.tbl = std::move(std::make_unique(std::move(columns))); - - return tbl; + return load_json_table(options.build()); } template diff --git a/morpheus/_lib/src/stages/triton_inference.cpp b/morpheus/_lib/src/stages/triton_inference.cpp index da58a5e063..434035f16f 100644 --- a/morpheus/_lib/src/stages/triton_inference.cpp +++ b/morpheus/_lib/src/stages/triton_inference.cpp @@ -17,9 +17,9 @@ #include "morpheus/stages/triton_inference.hpp" -#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory -#include "morpheus/messages/memory/response_memory.hpp" // for ResponseMemory -#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t +#include "morpheus/messages/memory/inference_memory.hpp" // for InferenceMemory +#include "morpheus/messages/memory/response_memory_probs.hpp" // for ResponseMemoryProbs +#include "morpheus/messages/memory/tensor_memory.hpp" // for TensorMemory::tensor_map_t #include "morpheus/messages/multi_response_probs.hpp" #include "morpheus/objects/dev_mem_info.hpp" // for DevMemInfo #include "morpheus/objects/tensor.hpp" @@ -113,7 +113,7 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() // When our tensor lengths are longer than our dataframe we will need to use the seq_ids // array to lookup how the values should map back into the dataframe const bool needs_seq_ids = x->mess_count != x->count; - auto reponse_memory = std::make_shared(x->mess_count); + std::map response_outputs; // Create the output memory blocks for (auto &model_output : m_model_outputs) @@ -128,13 +128,19 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() auto output_buffer = std::make_shared( elem_count * model_output.datatype.item_size(), rmm::cuda_stream_per_thread); - reponse_memory->tensors[model_output.mapped_name] = Tensor::create( + response_outputs[model_output.mapped_name] = Tensor::create( std::move(output_buffer), model_output.datatype, total_shape, std::vector{}, 0); } // This will be the final output of all mini-batches - auto response = std::make_shared( - x->meta, x->mess_offset, x->mess_count, std::move(reponse_memory), 0, reponse_memory->count); + auto response_mem_probs = + std::make_shared(x->mess_count, std::move(response_outputs)); + auto response = std::make_shared(x->meta, + x->mess_offset, + x->mess_count, + std::move(response_mem_probs), + 0, + response_mem_probs->count); std::unique_ptr> host_seq_ids{nullptr}; if (needs_seq_ids) @@ -184,7 +190,7 @@ InferenceClientStage::subscribe_fn_t InferenceClientStage::build_operator() // Iterate on the model inputs in case the model takes less than what tensors are available std::vector, std::vector>> saved_inputs = foreach_map(m_model_inputs, [this, &mini_batch_input](auto const &model_input) { - DCHECK(mini_batch_input->memory->has_input(model_input.mapped_name)) + DCHECK(mini_batch_input->memory->has_tensor(model_input.mapped_name)) << "Model input '" << model_input.mapped_name << "' not found in InferenceMemory"; auto const &inp_tensor = mini_batch_input->get_input(model_input.mapped_name); diff --git a/morpheus/_lib/tests/CMakeLists.txt b/morpheus/_lib/tests/CMakeLists.txt index 4e5217574e..b2d4beb9ed 100644 --- a/morpheus/_lib/tests/CMakeLists.txt +++ b/morpheus/_lib/tests/CMakeLists.txt @@ -20,7 +20,6 @@ add_executable(test_libmorpheus # test_cuda.cu test_main.cpp test_matx_util.cpp - test_morpheus.cpp test_multi_slices.cpp test_tensor.cpp test_type_util_detail.cpp @@ -29,6 +28,7 @@ add_executable(test_libmorpheus target_link_libraries(test_libmorpheus PRIVATE cuda_utils + morpheus srf::pysrf GTest::gtest matx::matx diff --git a/morpheus/_lib/tests/test_matx_util.cpp b/morpheus/_lib/tests/test_matx_util.cpp index 53d076085c..417b3e856f 100644 --- a/morpheus/_lib/tests/test_matx_util.cpp +++ b/morpheus/_lib/tests/test_matx_util.cpp @@ -17,6 +17,7 @@ #include "./test_morpheus.hpp" // IWYU pragma: associated +#include "morpheus/io/deserializers.hpp" #include "morpheus/objects/dev_mem_info.hpp" #include "morpheus/utilities/matx_util.hpp" #include "morpheus/utilities/type_util.hpp" @@ -30,7 +31,7 @@ #include #include // for cuda_stream_per_thread #include -#include +#include // for SRF_CHECK_CUDA #include // for int64_t, int32_t, uint8_t #include // for std::getenv @@ -135,7 +136,7 @@ TEST_F(TestMatxUtil, ReduceMax2dColMajor) std::filesystem::path morpheus_root{std::getenv("MORPHEUS_ROOT")}; auto input_file = morpheus_root / "tests/tests_data/filter_probs.csv"; - auto table_m = load_table_from_csv(input_file); + auto table_m = morpheus::load_table_from_file(input_file); auto num_rows = table_m.tbl->num_rows(); auto num_cols = table_m.tbl->num_columns(); diff --git a/morpheus/_lib/tests/test_morpheus.cpp b/morpheus/_lib/tests/test_morpheus.cpp deleted file mode 100644 index 73da7bbca1..0000000000 --- a/morpheus/_lib/tests/test_morpheus.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/** - * SPDX-FileCopyrightText: Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "test_morpheus.hpp" - -#include -#include // IWYU pragma: keep - -cudf::io::table_with_metadata load_table_from_csv(std::string filename) -{ - auto options = cudf::io::csv_reader_options::builder(cudf::io::source_info{filename}); - return cudf::io::read_csv(options.build()); -} diff --git a/morpheus/_lib/tests/test_morpheus.hpp b/morpheus/_lib/tests/test_morpheus.hpp index 50c97c8cba..1b8c9d2667 100644 --- a/morpheus/_lib/tests/test_morpheus.hpp +++ b/morpheus/_lib/tests/test_morpheus.hpp @@ -17,14 +17,9 @@ #pragma once -#include #include // IWYU pragma: keep #include // IWYU pragma: keep -#include - -cudf::io::table_with_metadata load_table_from_csv(std::string filename); - #define TEST_CLASS(name) \ class Test##name : public ::testing::Test \ {} diff --git a/morpheus/_lib/tests/test_multi_slices.cpp b/morpheus/_lib/tests/test_multi_slices.cpp index ee94b80701..6bb4600dc0 100644 --- a/morpheus/_lib/tests/test_multi_slices.cpp +++ b/morpheus/_lib/tests/test_multi_slices.cpp @@ -17,26 +17,56 @@ #include "./test_morpheus.hpp" // IWYU pragma: associated +#include "morpheus/io/deserializers.hpp" +#include "morpheus/messages/meta.hpp" +#include "morpheus/messages/multi_inference.hpp" +#include "morpheus/messages/multi_response.hpp" +#include "morpheus/objects/tensor.hpp" +#include "morpheus/utilities/matx_util.hpp" // for MatxUtil::create_seg_ids +#include "morpheus/utilities/type_util.hpp" // for TypeId + +#include // for cudaMemcpy, cudaMemcpyHostToDevice #include #include #include #include #include #include +#include +#include +#include // for cuda_stream_per_thread +#include +#include // for SRF_CHECK_CUDA +#include #include #include #include // for unique_ptr +#include +#include //for typeid #include -TEST_CLASS(Masking); +using namespace morpheus; +namespace py = pybind11; + +namespace { +int random_int(int lower, int upper) +{ + std::random_device r; + std::default_random_engine e1(r()); + std::uniform_int_distribution uniform_dist(lower, upper); + return uniform_dist(e1); +} +} // namespace + +TEST_CLASS(MultiSlices); -TEST_F(TestMasking, Ranges) +TEST_F(TestMultiSlices, Ranges) { std::filesystem::path morpheus_root{std::getenv("MORPHEUS_ROOT")}; auto input_file = morpheus_root / "tests/tests_data/filter_probs.csv"; - auto table_m = load_table_from_csv(input_file); + auto table_m = load_table_from_file(input_file); EXPECT_EQ(table_m.tbl->num_rows(), 20); auto table_v = table_m.tbl->view();