Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Triton to v2.11 #7142

Merged
merged 8 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake.spec
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
### RPM external cmake 3.17.2
### RPM external cmake 3.18.2
%define downloaddir %(echo %realversion | cut -d. -f1,2)
Source: http://www.cmake.org/files/v%{downloaddir}/%n-%realversion.tar.gz
Requires: bz2lib curl expat zlib
Expand Down
2 changes: 1 addition & 1 deletion cmssw-tool-conf.spec
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ Requires: hls-toolfile
Requires: opencv-toolfile
Requires: grpc-toolfile
Requires: onnxruntime-toolfile
Requires: triton-inference-server-toolfile
Requires: triton-inference-client-toolfile
Requires: hdf5-toolfile
Requires: rivet-toolfile
Requires: cascade-toolfile
Expand Down
25 changes: 25 additions & 0 deletions triton-inference-client-toolfile.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
### RPM external triton-inference-client-toolfile 1.0
Requires: triton-inference-client

%prep

%build

%install
mkdir -p %i/etc/scram.d
cat << \EOF_TOOLFILE >%i/etc/scram.d/triton-inference-client.xml
<tool name="triton-inference-client" version="@TOOL_VERSION@">
<info url="https://github.com/triton-inference-server/client"/>
<lib name="grpcclient"/>
<client>
<environment name="TRITON_INFERENCE_CLIENT_BASE" default="@TOOL_ROOT@"/>
<environment name="INCLUDE" default="$TRITON_INFERENCE_CLIENT_BASE/include"/>
<environment name="LIBDIR" default="$TRITON_INFERENCE_CLIENT_BASE/lib"/>
</client>
<use name="protobuf"/>
<use name="grpc"/>
<use name="cuda"/>
</tool>
EOF_TOOLFILE

## IMPORT scram-tools-post
106 changes: 106 additions & 0 deletions triton-inference-client.spec
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
### RPM external triton-inference-client 2.11.0
%define branch main
%define github_user triton-inference-server
%define tag_2_11_0 36cd3b3c839288c85b15e4df82cfe8fca3fff21b

Source: git+https://github.com/%{github_user}/client.git?obj=%{branch}/%{tag_2_11_0}&export=%{n}-%{realversion}&output=/%{n}-%{realversion}.tgz
Source1: triton-inference-client/model_config.h
Source2: triton-inference-client/model_config.cc
BuildRequires: cmake git
Requires: protobuf grpc cuda

%prep

%setup -n %{n}-%{realversion}

%build

# locations of CMakeLists.txt
PROJ_DIR=../%{n}-%{realversion}/src/c++
CML_CPP=${PROJ_DIR}/CMakeLists.txt
CML_LIB=${PROJ_DIR}/library/CMakeLists.txt

# remove rapidjson dependence
sed -i '/RapidJSON CONFIG REQUIRED/,+13d;' ${CML_LIB}
sed -i '/triton-common-json/d' ${CML_LIB}
# core repo not needed for grpc-client-only install
sed -i '/FetchContent_MakeAvailable(repo-core)/d' ${CML_CPP}
# remove attempts to install external libs
sed -i '\~/../../_deps/repo-third-party-build/~d' ${CML_LIB}
sed -i '\~/../../third-party/~d' ${CML_LIB}
# keep typeinfo in .so by removing ldscript from properties
sed -i '/set_target_properties/,+5d' ${CML_LIB}
# change flag due to bug in gcc10 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95148
if [[ `gcc --version | head -1 | cut -d' ' -f3 | cut -d. -f1,2,3 | tr -d .` -gt 1000 ]] ; then
sed -i -e "s|Werror|Wtype-limits|g" ${CML_LIB}
fi

# these files were extracted from:
# https://github.com/triton-inference-server/server/blob/v2.11.0/src/core/model_config.h
# https://github.com/triton-inference-server/server/blob/v2.11.0/src/core/model_config.cc
cp %{_sourcedir}/model_config.h ${PROJ_DIR}/library/
cp %{_sourcedir}/model_config.cc ${PROJ_DIR}/library/

# add custom header to cmake build
sed -i 's/grpc_client.cc common.cc/& model_config.cc/' ${CML_LIB}
sed -i 's/grpc_client.h common.h/& model_config.h/' ${CML_LIB}
sed -i '\~${CMAKE_CURRENT_SOURCE_DIR}/common.h~a ${CMAKE_CURRENT_SOURCE_DIR}/model_config.h' ${CML_LIB}

rm -rf ../build
mkdir ../build
cd ../build

common_tag_2_11_0=249232758855cc764c78a12964c2a5c09c388d87
mkdir repo-common && pushd repo-common && curl -k -L https://github.com/%{github_user}/common/archive/${common_tag_2_11_0}.tar.gz | tar -xz --strip=1 && popd

# modifications to common repo (loaded by cmake through FetchContent_MakeAvailable)
COMMON_DIR=$PWD/repo-common
CML_TOP=${COMMON_DIR}/CMakeLists.txt
CML_PRB=${COMMON_DIR}/protobuf/CMakeLists.txt

# remove rapidjson dependence
sed -i '/RapidJSON CONFIG REQUIRED/,+1d;' ${CML_TOP}
sed -i '/JSON utilities/,+17d' ${CML_TOP}
sed -i '/triton-common-json/d' ${CML_TOP}
# remove python dependence
sed -i '/Python REQUIRED COMPONENTS Interpreter/,+10d;' ${CML_PRB}
# change flag due to bug in gcc10 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95148
if [[ `gcc --version | head -1 | cut -d' ' -f3 | cut -d. -f1,2,3 | tr -d .` -gt 1000 ]] ; then
sed -i -e "s|Werror|Wtype-limits|g" ${CML_PRB}
fi

if [ $(%{cuda_gcc_support}) = true ]; then
TRITON_ENABLE_GPU_VALUE=ON
else
TRITON_ENABLE_GPU_VALUE=OFF
fi

cmake ${PROJ_DIR} \
-DCMAKE_INSTALL_PREFIX="%{i}" \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_BUILD_TYPE=Release \
-DTRITON_ENABLE_CC_HTTP=OFF \
-DTRITON_ENABLE_CC_GRPC=ON \
-DTRITON_ENABLE_PYTHON_HTTP=OFF \
-DTRITON_ENABLE_PYTHON_GRPC=OFF \
-DTRITON_ENABLE_PERF_ANALYZER=OFF \
-DTRITON_ENABLE_EXAMPLES=OFF \
-DTRITON_ENABLE_TESTS=OFF \
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU_VALUE} \
-DTRITON_VERSION=%{realversion} \
-DCMAKE_CXX_FLAGS="-Wno-error -fPIC" \
-DFETCHCONTENT_SOURCE_DIR_REPO-COMMON=${COMMON_DIR} \

make %{makeprocesses}

%install
cd ../build
make install

if [ $(%{cuda_gcc_support}) = true ] ; then
# modify header for consistent definition of GPU support
sed -i '/^#ifdef TRITON_ENABLE_GPU/i #define TRITON_ENABLE_GPU' %{i}/include/ipc.h
fi

# remove unneeded
rm %{i}/include/triton/common/triton_json.h
100 changes: 100 additions & 0 deletions triton-inference-client/model_config.cc.file
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#include "model_config.h"

namespace nvidia { namespace inferenceserver {

size_t
GetDataTypeByteSize(const inference::DataType dtype)
{
switch (dtype) {
case inference::DataType::TYPE_BOOL:
return 1;
case inference::DataType::TYPE_UINT8:
return 1;
case inference::DataType::TYPE_UINT16:
return 2;
case inference::DataType::TYPE_UINT32:
return 4;
case inference::DataType::TYPE_UINT64:
return 8;
case inference::DataType::TYPE_INT8:
return 1;
case inference::DataType::TYPE_INT16:
return 2;
case inference::DataType::TYPE_INT32:
return 4;
case inference::DataType::TYPE_INT64:
return 8;
case inference::DataType::TYPE_FP16:
return 2;
case inference::DataType::TYPE_FP32:
return 4;
case inference::DataType::TYPE_FP64:
return 8;
case inference::DataType::TYPE_STRING:
return 0;
default:
break;
}

return 0;
}

inference::DataType
ProtocolStringToDataType(const std::string& dtype)
{
return ProtocolStringToDataType(dtype.c_str(), dtype.size());
}

inference::DataType
ProtocolStringToDataType(const char* dtype, size_t len)
{
if (len < 4 || len > 6) {
return inference::DataType::TYPE_INVALID;
}

if ((*dtype == 'I') && (len != 6)) {
if ((dtype[1] == 'N') && (dtype[2] == 'T')) {
if ((dtype[3] == '8') && (len == 4)) {
return inference::DataType::TYPE_INT8;
} else if ((dtype[3] == '1') && (dtype[4] == '6')) {
return inference::DataType::TYPE_INT16;
} else if ((dtype[3] == '3') && (dtype[4] == '2')) {
return inference::DataType::TYPE_INT32;
} else if ((dtype[3] == '6') && (dtype[4] == '4')) {
return inference::DataType::TYPE_INT64;
}
}
} else if ((*dtype == 'U') && (len != 4)) {
if ((dtype[1] == 'I') && (dtype[2] == 'N') && (dtype[3] == 'T')) {
if ((dtype[4] == '8') && (len == 5)) {
return inference::DataType::TYPE_UINT8;
} else if ((dtype[4] == '1') && (dtype[5] == '6')) {
return inference::DataType::TYPE_UINT16;
} else if ((dtype[4] == '3') && (dtype[5] == '2')) {
return inference::DataType::TYPE_UINT32;
} else if ((dtype[4] == '6') && (dtype[5] == '4')) {
return inference::DataType::TYPE_UINT64;
}
}
} else if ((*dtype == 'F') && (dtype[1] == 'P') && (len == 4)) {
if ((dtype[2] == '1') && (dtype[3] == '6')) {
return inference::DataType::TYPE_FP16;
} else if ((dtype[2] == '3') && (dtype[3] == '2')) {
return inference::DataType::TYPE_FP32;
} else if ((dtype[2] == '6') && (dtype[3] == '4')) {
return inference::DataType::TYPE_FP64;
}
} else if (*dtype == 'B') {
if (dtype[1] == 'Y') {
if (!strcmp(dtype + 2, "TES")) {
return inference::DataType::TYPE_STRING;
}
} else if (!strcmp(dtype + 1, "OOL")) {
return inference::DataType::TYPE_BOOL;
}
}

return inference::DataType::TYPE_INVALID;
}

}} // namespace nvidia::inferenceserver
14 changes: 14 additions & 0 deletions triton-inference-client/model_config.h.file
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#pragma once

#include <stdint.h>
#include "model_config.pb.h"

namespace nvidia { namespace inferenceserver {

size_t GetDataTypeByteSize(const inference::DataType dtype);

inference::DataType ProtocolStringToDataType(const std::string& dtype);

inference::DataType ProtocolStringToDataType(const char* dtype, size_t len);

}} // namespace nvidia::inferenceserver
26 changes: 0 additions & 26 deletions triton-inference-server-toolfile.spec

This file was deleted.

70 changes: 0 additions & 70 deletions triton-inference-server.spec

This file was deleted.