Skip to content

Commit

Permalink
Update Rapids version to 22.10 (#617)
Browse files Browse the repository at this point in the history
Closes #486
Closes #251

Authors:
  - Eli Fajardo (https://github.com/efajardo-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

Approvers:
  - Devin Robison (https://github.com/drobison00)
  - David Gardner (https://github.com/dagardner-nv)
  - Michael Demoret (https://github.com/mdemoret-nv)

URL: #617
  • Loading branch information
efajardo-nv authored Jan 30, 2023
1 parent 41b182e commit 312bfba
Show file tree
Hide file tree
Showing 30 changed files with 260 additions and 136 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ services:
triton:
container_name: morpheus-triton
runtime: nvidia
image: nvcr.io/nvidia/tritonserver:22.08-py3
image: nvcr.io/nvidia/tritonserver:22.10-py3
command: tritonserver --model-repository=/models --exit-on-error=false ${TRITON_MODEL_ARGS}
ports:
- 8000:8000
Expand Down
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
branch = branch-23.01
[submodule "morpheus_utils"]
path = external/utilities
url = ../../nv-morpheus/utilities.git
branch = branch-23.01
url = https://github.com/nv-morpheus/utilities.git
branch = branch-23.01
6 changes: 4 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ option(MORPHEUS_USE_IWYU "Enable running include-what-you-use as part of the bui
option(MORPHEUS_BUILD_DOCS "Enable building of API documentation" OFF)
set(MORPHEUS_PY_INSTALL_DIR "${CMAKE_CURRENT_BINARY_DIR}/wheel" CACHE STRING "Location to install the python directory")

set(MORPHEUS_RAPIDS_CMAKE_VERSION "22.08" CACHE STRING "Sets default versions for RAPIDS CMake.")
set(MORPHEUS_RAPIDS_VERSION "22.10" CACHE STRING "Sets default versions for RAPIDS libraries.")
set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data")
mark_as_advanced(MORPHEUS_CACHE_DIR)

Expand Down Expand Up @@ -68,8 +68,10 @@ set(MORPHEUS_CMAKE_PREFIX_PATH_EXTENSIONS
list(PREPEND CMAKE_MODULE_PATH "${MORPHEUS_CMAKE_MODULE_PATH_EXTENSIONS}")
list(PREPEND CMAKE_PREFIX_PATH "${MORPHEUS_CMAKE_PREFIX_PATH_EXTENSIONS}")

# Force the MORPHEUS_UTILS_RAPIDS_VERSION to match our value
set(MORPHEUS_UTILS_RAPIDS_VERSION ${MORPHEUS_RAPIDS_VERSION} CACHE STRING "" FORCE)

# Load morpheus utils and update CMake paths
set(MORPHEUS_UTILS_RAPIDS_CMAKE_VERSION ${MORPHEUS_RAPIDS_CMAKE_VERSION})
include(morpheus_utils/load)

# Configure project package manager
Expand Down
4 changes: 1 addition & 3 deletions ci/conda/recipes/morpheus/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
{% set py_version=environ.get('CONDA_PY', '3.8') %}
{% set cuda_version='.'.join(environ.get('CUDA', '11.5').split('.')[:2]) %}
{% set cuda_major=cuda_version.split('.')[0] %}
{% set rapids_version = "22.08" %}
{% set rapids_version = "22.10" %}

package:
name: morpheus-split
Expand Down Expand Up @@ -55,7 +55,6 @@ outputs:
- ccache
- ninja
host:
- cuda-python <=11.7.0 # Remove when Issue #251 is closed
- cudf {{ rapids_version }}
- cython >=0.29,<0.30
- libcudf {{ rapids_version }}
Expand All @@ -71,7 +70,6 @@ outputs:
# Runtime only requirements. This + setup.y is the definitive runtime requirement list
- click >=8
- configargparse 1.5
- cuda-python <=11.7.0 # Remove when Issue #251 is closed
- cudf {{ rapids_version }}
- cudf_kafka {{ rapids_version }}
- cupy # Version determined from cudf
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ FROM ${FROM_IMAGE}:${CUDA_VER}-devel-${LINUX_DISTRO}${LINUX_VER} AS base
# Required arguments
ARG IMAGE_TYPE=base
ARG RAPIDS_CHANNEL=rapidsai-nightly
ARG RAPIDS_VER=22.08
ARG RAPIDS_VER=22.10
ARG PYTHON_VER=3.8
ARG CONDA_CHANNEL=rapidsai

Expand Down
2 changes: 1 addition & 1 deletion docker/build_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ FROM_IMAGE=${FROM_IMAGE:-"gpuci/miniforge-cuda"}
CUDA_VER=${CUDA_VER:-11.5}
LINUX_DISTRO=${LINUX_DISTRO:-ubuntu}
LINUX_VER=${LINUX_VER:-20.04}
RAPIDS_VER=${RAPIDS_VER:-22.08}
RAPIDS_VER=${RAPIDS_VER:-22.10}
PYTHON_VER=${PYTHON_VER:-3.8}
TENSORRT_VERSION=${TENSORRT_VERSION:-8.2.1.3}

Expand Down
5 changes: 2 additions & 3 deletions docker/conda/environments/cuda11.5_dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ dependencies:
- cmake=3.24
- configargparse=1.5
- cuda-nvml-dev=11.5
- cuda-python<=11.5.0 # Remove when Issue #251 is closed
- cudatoolkit=11.5
- cudf 22.08
- cudf 22.10
- cupy=9.5.0
- cython=0.29.24
- datacompy=0.8
Expand Down Expand Up @@ -63,7 +62,7 @@ dependencies:
- networkx=2.8
- ninja=1.10
- nodejs=17.4.0
- numba==0.55
- numba>=0.56.2
- numpydoc=1.4
- nvcc_linux-64=11.5
- pandas=1.3
Expand Down
2 changes: 2 additions & 0 deletions morpheus/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "morpheus")

option(MORPHEUS_PYTHON_INPLACE_BUILD
"Whether or not to copy built python modules back to the source tree for debug purposes." OFF)
option(MORPHEUS_PYTHON_PERFORM_INSTALL
"Whether or not to automatically `pip install` any built python library. WARNING: This may overwrite any existing installation of the same name." OFF)
option(MORPHEUS_BUILD_PYTHON_STUBS
"Whether or not to generate stubs for python files." ON)

Expand Down
21 changes: 18 additions & 3 deletions morpheus/_lib/cudf_helpers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ from libcpp.vector cimport vector

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.io.types cimport column_name_info
from cudf._lib.cpp.io.types cimport table_metadata
from cudf._lib.cpp.io.types cimport table_with_metadata
from cudf._lib.cpp.table.table_view cimport table_view
Expand Down Expand Up @@ -60,9 +61,23 @@ cdef public api:
index_names = None

if (index_col_count > 0):
index_names = [x.decode() for x in table.metadata.column_names[0:index_col_count]]

column_names = [x.decode() for x in table.metadata.column_names[index_col_count:]]
index_names = []

# Need to support both column_names and schema_info
if (table.metadata.column_names.size() > 0):
index_names = [x.decode() for x in table.metadata.column_names[0:index_col_count]]
elif (table.metadata.schema_info.size() > 0):
for i in range(min(index_col_count, table.metadata.schema_info.size())):
index_names.append(table.metadata.schema_info[i].name.decode())

column_names = []

# Need to support both column_names and schema_info
if (table.metadata.column_names.size() > 0):
column_names = [x.decode() for x in table.metadata.column_names[index_col_count:]]
elif (table.metadata.schema_info.size() > 0):
for i in range(index_col_count, table.metadata.schema_info.size()):
column_names.append(table.metadata.schema_info[i].name.decode())

data, index = data_from_unique_ptr(move(table.tbl), column_names=column_names, index_names=index_names)

Expand Down
9 changes: 9 additions & 0 deletions morpheus/_lib/include/morpheus/io/deserializers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@ namespace morpheus {
* @file
*/

/**
* @brief Get the column names from table object. Looks at both column_names as well as schema_info and returns the
* correct one.
*
* @param table The table to pull the columns from
* @return std::vector<std::string>
*/
std::vector<std::string> get_column_names_from_table(const cudf::io::table_with_metadata& table);

/**
* @brief Loads a cudf table from either CSV or JSON file
*
Expand Down
6 changes: 0 additions & 6 deletions morpheus/_lib/include/morpheus/utilities/type_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,5 @@ struct DType : DataType // NOLINT
static DType from_triton(const std::string& type_str);
};

template <typename T>
DType type_to_dtype()
{
return DType::from_triton(cudf::type_to_id<T>);
}

/** @} */ // end of group
} // namespace morpheus
38 changes: 32 additions & 6 deletions morpheus/_lib/src/io/deserializers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include "morpheus/io/deserializers.hpp"

#include "morpheus/utilities/stage_util.hpp"

#include <cudf/io/csv.hpp>
#include <cudf/io/json.hpp>
#include <cudf/scalar/scalar.hpp> // for string_scalar
Expand All @@ -25,27 +27,51 @@
#include <cudf/types.hpp> // for cudf::type_id
#include <glog/logging.h>

#include <algorithm>
#include <filesystem>
#include <ostream> // needed for logging
#include <regex>

namespace morpheus {

std::vector<std::string> get_column_names_from_table(const cudf::io::table_with_metadata& table)
{
DCHECK(!(!table.metadata.column_names.empty() && !table.metadata.schema_info.empty()))
<< "Both column_names and schema_info were set on the table_with_metadata object. Defaulting to column_names";

// If column_names is populated, use that
if (!table.metadata.column_names.empty())
{
return table.metadata.column_names;
}

// Otherwise, use schema_info
if (!table.metadata.schema_info.empty())
{
return foreach_map(table.metadata.schema_info, [](auto schema) { return schema.name; });
}

// Return empty
return {};
}

cudf::io::table_with_metadata load_json_table(cudf::io::json_reader_options&& json_options)
{
auto tbl = cudf::io::read_json(json_options);

auto found = std::find(tbl.metadata.column_names.begin(), tbl.metadata.column_names.end(), "data");
auto column_names = get_column_names_from_table(tbl);

auto found = std::find(column_names.begin(), column_names.end(), "data");

if (found == tbl.metadata.column_names.end())
if (found == column_names.end())
return tbl;

// Super ugly but cudf cant handle newlines and add extra escapes. So we need to convert
// \\n -> \n
// \\/ -> \/
auto columns = tbl.tbl->release();

size_t idx = found - tbl.metadata.column_names.begin();
size_t idx = found - column_names.begin();

auto updated_data = cudf::strings::replace(
cudf::strings_column_view{columns[idx]->view()}, cudf::string_scalar("\\n"), cudf::string_scalar("\n"));
Expand Down Expand Up @@ -87,21 +113,21 @@ int get_index_col_count(cudf::io::table_with_metadata& data_table)
int index_col_count = 0;

// Check if we have a first column with INT64 data type
if (data_table.metadata.column_names.size() >= 1 &&
if (data_table.metadata.schema_info.size() >= 1 &&
data_table.tbl->get_column(0).type().id() == cudf::type_id::INT64)
{
std::regex index_regex(R"((unnamed: 0|id))", std::regex_constants::ECMAScript | std::regex_constants::icase);

// Get the column name
auto col_name = data_table.metadata.column_names[0];
auto col_name = data_table.metadata.schema_info[0].name;

// Check it against some common terms
if (std::regex_search(col_name, index_regex))
{
// Also, if its the hideous 'Unnamed: 0', then just use an empty string
if (col_name == "Unnamed: 0")
{
data_table.metadata.column_names[0] = "";
data_table.metadata.schema_info[0].name = "";
}

index_col_count = 1;
Expand Down
12 changes: 11 additions & 1 deletion morpheus/_lib/src/io/serializers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,20 @@ void df_to_csv(const TableInfo& tbl, std::ostream& out_stream, bool include_head
.false_value("False"s);

cudf::io::table_metadata metadata{};

if (include_header)
{
metadata.column_names = column_names;
options_builder = options_builder.metadata(&metadata);

// After cuDF PR #11364, use schema_info instead of column_names (actually just set both)
metadata.schema_info = std::vector<cudf::io::column_name_info>();

for (auto& name : column_names)
{
metadata.schema_info.emplace_back(cudf::io::column_name_info{name});
}

options_builder = options_builder.metadata(&metadata);
}

cudf::io::write_csv(options_builder.build(), rmm::mr::get_current_device_resource());
Expand Down
14 changes: 7 additions & 7 deletions morpheus/_lib/src/messages/meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,14 +106,14 @@ TableInfo MessageMeta::get_info() const
return this->m_data->get_info();
}

std::shared_ptr<MessageMeta> MessageMeta::create_from_python(pybind11::object &&data_table)
std::shared_ptr<MessageMeta> MessageMeta::create_from_python(pybind11::object&& data_table)
{
auto data = std::make_unique<PyDataTable>(std::move(data_table));

return std::shared_ptr<MessageMeta>(new MessageMeta(std::move(data)));
}

std::shared_ptr<MessageMeta> MessageMeta::create_from_cpp(cudf::io::table_with_metadata &&data_table,
std::shared_ptr<MessageMeta> MessageMeta::create_from_cpp(cudf::io::table_with_metadata&& data_table,
int index_col_count)
{
// Convert to py first
Expand All @@ -126,7 +126,7 @@ std::shared_ptr<MessageMeta> MessageMeta::create_from_cpp(cudf::io::table_with_m

MessageMeta::MessageMeta(std::shared_ptr<IDataTable> data) : m_data(std::move(data)) {}

pybind11::object MessageMeta::cpp_to_py(cudf::io::table_with_metadata &&table, int index_col_count)
pybind11::object MessageMeta::cpp_to_py(cudf::io::table_with_metadata&& table, int index_col_count)
{
pybind11::gil_scoped_acquire gil;

Expand All @@ -145,17 +145,17 @@ pybind11::object MessageMeta::cpp_to_py(cudf::io::table_with_metadata &&table, i
}

/********** MessageMetaInterfaceProxy **********/
std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::init_python(pybind11::object &&data_frame)
std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::init_python(pybind11::object&& data_frame)
{
return MessageMeta::create_from_python(std::move(data_frame));
}

cudf::size_type MessageMetaInterfaceProxy::count(MessageMeta &self)
cudf::size_type MessageMetaInterfaceProxy::count(MessageMeta& self)
{
return self.count();
}

pybind11::object MessageMetaInterfaceProxy::get_data_frame(MessageMeta &self)
pybind11::object MessageMetaInterfaceProxy::get_data_frame(MessageMeta& self)
{
// // Get the column and convert to cudf
// auto py_table_struct = make_table_from_view_and_meta(self.m_pydf;.tbl->view(),
Expand All @@ -168,7 +168,7 @@ pybind11::object MessageMetaInterfaceProxy::get_data_frame(MessageMeta &self)
return self.get_py_table();
}

std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::init_cpp(const std::string &filename)
std::shared_ptr<MessageMeta> MessageMetaInterfaceProxy::init_cpp(const std::string& filename)
{
// Load the file
auto df_with_meta = CuDFTableUtil::load_table(filename);
Expand Down
Loading

0 comments on commit 312bfba

Please sign in to comment.