Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Triton data converter (CMSSW_11_2_0_pre9) #2

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef __TritonConverterBase_H__
drankincms marked this conversation as resolved.
Show resolved Hide resolved
#define __TritonConverterBase_H__

#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "DataFormats/Common/interface/Handle.h"

#include <string>

template <typename DT>
class TritonConverterBase {
drankincms marked this conversation as resolved.
Show resolved Hide resolved
public:
TritonConverterBase(const edm::ParameterSet& conf)
: converterName_(conf.getParameter<std::string>("converterName")), byteSize_(sizeof(DT)) {}
TritonConverterBase(const edm::ParameterSet& conf, size_t byteSize)
: converterName_(conf.getParameter<std::string>("converterName")), byteSize_(byteSize) {}
TritonConverterBase(const TritonConverterBase&) = delete;
virtual ~TritonConverterBase() = default;
TritonConverterBase& operator=(const TritonConverterBase&) = delete;

virtual const uint8_t* convertIn(const DT* in) = 0;
drankincms marked this conversation as resolved.
Show resolved Hide resolved
virtual const DT* convertOut(const uint8_t* in) = 0;

virtual const int64_t getByteSize() const { return byteSize_; }
drankincms marked this conversation as resolved.
Show resolved Hide resolved

const std::string& name() const { return converterName_; }

private:
const std::string converterName_;
const int64_t byteSize_;
};

#include "FWCore/PluginManager/interface/PluginFactory.h"

template <typename DT>
using TritonConverterFactory = edmplugin::PluginFactory<TritonConverterBase<DT>*(const edm::ParameterSet&)>;
drankincms marked this conversation as resolved.
Show resolved Hide resolved

#endif
17 changes: 17 additions & 0 deletions HeterogeneousCore/SonicTriton/interface/TritonData.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/Span.h"

#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

#include <vector>
#include <string>
#include <unordered_map>
Expand Down Expand Up @@ -40,6 +42,13 @@ class TritonData {
bool setShape(const ShapeType& newShape) { return setShape(newShape, true); }
bool setShape(unsigned loc, int64_t val) { return setShape(loc, val, true); }

void setConverterParams(const edm::ParameterSet& conf) {
converterConf_ = conf;
converterName_ = conf.getParameter<std::string>("converterName");
drankincms marked this conversation as resolved.
Show resolved Hide resolved
}
template <typename DT>
std::unique_ptr<TritonConverterBase<DT>> createConverter() const;

//io accessors
template <typename DT>
void toServer(std::shared_ptr<TritonInput<DT>> ptr);
Expand Down Expand Up @@ -93,6 +102,8 @@ class TritonData {
int64_t byteSize_;
std::any holder_;
std::shared_ptr<Result> result_;
edm::ParameterSet converterConf_;
std::string converterName_;
};

using TritonInputData = TritonData<nvidia::inferenceserver::client::InferInput>;
Expand All @@ -108,6 +119,12 @@ template <>
template <typename DT>
TritonOutput<DT> TritonOutputData::fromServer() const;
template <>
template <typename DT>
std::unique_ptr<TritonConverterBase<DT>> TritonOutputData::createConverter() const;
template <>
template <typename DT>
std::unique_ptr<TritonConverterBase<DT>> TritonInputData::createConverter() const;
template <>
void TritonInputData::reset();
template <>
void TritonOutputData::reset();
Expand Down
11 changes: 11 additions & 0 deletions HeterogeneousCore/SonicTriton/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<library name="HeterogeneousCoreSonicTritonPlugins_converters" file="converters/*.cc">
drankincms marked this conversation as resolved.
Show resolved Hide resolved
<use name="FWCore/Utilities"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/PluginManager"/>
<use name="DataFormats/Common"/>
drankincms marked this conversation as resolved.
Show resolved Hide resolved
<use name="HeterogeneousCore/SonicTriton"/>
drankincms marked this conversation as resolved.
Show resolved Hide resolved
<use name="triton-inference-server"/>
<use name="protobuf"/>
<use name="hls"/>
drankincms marked this conversation as resolved.
Show resolved Hide resolved
<flags EDM_PLUGIN="1"/>
</library>
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

#include "ap_fixed.h"

template <int I>
class FloatApFixed16Converter : public TritonConverterBase<float> {
public:
FloatApFixed16Converter(const edm::ParameterSet& conf) : TritonConverterBase<float>(conf, 2) {}

const uint8_t* convertIn(const float* in) override;
const float* convertOut(const uint8_t* in) override;

private:
std::vector<ap_fixed<16, I>> makeVecIn(const float* in) {
unsigned int nfeat = sizeof(in) / sizeof(float);
std::vector<ap_fixed<16, I>> temp_storage(in, in + nfeat);
return temp_storage;
}

std::vector<float> makeVecOut(const ap_fixed<16, I>* in) {
unsigned int nfeat = sizeof(in) / sizeof(ap_fixed<16, I>);
std::vector<float> temp_storage(in, in + nfeat);
return temp_storage;
}
};

DEFINE_EDM_PLUGIN(TritonConverterFactory<float>, FloatApFixed16Converter<6>, "FloatApFixed16F6Converter");

template <int I>
const uint8_t* FloatApFixed16Converter<I>::convertIn(const float* in) {
drankincms marked this conversation as resolved.
Show resolved Hide resolved
return reinterpret_cast<const uint8_t*>((this->makeVecIn(in)).data());
}

template <int I>
const float* FloatApFixed16Converter<I>::convertOut(const uint8_t* in) {
return (this->makeVecOut(reinterpret_cast<const ap_fixed<16, I>*>(in))).data();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

class FloatStandardConverter : public TritonConverterBase<float> {
public:
FloatStandardConverter(const edm::ParameterSet& conf) : TritonConverterBase<float>(conf) {}

const uint8_t* convertIn(const float* in) override;
const float* convertOut(const uint8_t* in) override;
};

DEFINE_EDM_PLUGIN(TritonConverterFactory<float>, FloatStandardConverter, "FloatStandardConverter");

const uint8_t* FloatStandardConverter::convertIn(const float* in) { return reinterpret_cast<const uint8_t*>(in); }
drankincms marked this conversation as resolved.
Show resolved Hide resolved

const float* FloatStandardConverter::convertOut(const uint8_t* in) { return reinterpret_cast<const float*>(in); }
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

class Int64StandardConverter : public TritonConverterBase<int64_t> {
public:
Int64StandardConverter(const edm::ParameterSet& conf) : TritonConverterBase<int64_t>(conf) {}

const uint8_t* convertIn(const int64_t* in) override;
const int64_t* convertOut(const uint8_t* in) override;
};

DEFINE_EDM_PLUGIN(TritonConverterFactory<int64_t>, Int64StandardConverter, "Int64StandardConverter");

const uint8_t* Int64StandardConverter::convertIn(const int64_t* in) { return reinterpret_cast<const uint8_t*>(in); }

const int64_t* Int64StandardConverter::convertOut(const uint8_t* in) { return reinterpret_cast<const int64_t*>(in); }
7 changes: 7 additions & 0 deletions HeterogeneousCore/SonicTriton/src/TritonClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
if (!msg_str.empty())
throw cms::Exception("ModelErrors") << msg_str;

const edm::ParameterSet& converterDefs = params.getParameterSet("converterDefinition");
//setup input map
std::stringstream io_msg;
if (verbose_)
Expand All @@ -90,6 +91,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
auto [curr_itr, success] = input_.emplace(
std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput, noBatch_));
auto& curr_input = curr_itr->second;
curr_input.setConverterParams(converterDefs);
inputsTriton_.push_back(curr_input.data());
if (verbose_) {
io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
Expand All @@ -113,6 +115,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
auto [curr_itr, success] = output_.emplace(
std::piecewise_construct, std::forward_as_tuple(oname), std::forward_as_tuple(oname, nicOutput, noBatch_));
auto& curr_output = curr_itr->second;
curr_output.setConverterParams(converterDefs);
outputsTriton_.push_back(curr_output.data());
if (verbose_) {
io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
Expand Down Expand Up @@ -336,10 +339,14 @@ inference::ModelStatistics TritonClient::getServerSideStatus() const {

//for fillDescriptions
void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) {
edm::ParameterSetDescription descConverter;
fillBasePSetDescription(descConverter);
drankincms marked this conversation as resolved.
Show resolved Hide resolved
descConverter.add<std::string>("converterName");
edm::ParameterSetDescription descClient;
fillBasePSetDescription(descClient);
descClient.add<std::string>("modelName");
descClient.add<std::string>("modelVersion", "");
descClient.add<edm::ParameterSetDescription>("converterDefinition", descConverter);
drankincms marked this conversation as resolved.
Show resolved Hide resolved
//server parameters should not affect the physics results
descClient.addUntracked<unsigned>("batchSize");
descClient.addUntracked<std::string>("address");
Expand Down
34 changes: 29 additions & 5 deletions HeterogeneousCore/SonicTriton/src/TritonData.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
#include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"
#include "FWCore/PluginManager/interface/PluginFactory.h"

#include "model_config.pb.h"

Expand Down Expand Up @@ -101,6 +103,18 @@ void TritonData<IO>::setBatchSize(unsigned bsize) {
fullShape_[0] = batchSize_;
}

template <>
template <typename DT>
std::unique_ptr<TritonConverterBase<DT>> TritonInputData::createConverter() const {
return TritonConverterFactory<DT>::get()->create(converterName_, converterConf_);
}

template <>
template <typename DT>
std::unique_ptr<TritonConverterBase<DT>> TritonOutputData::createConverter() const {
drankincms marked this conversation as resolved.
Show resolved Hide resolved
return TritonConverterFactory<DT>::get()->create(converterName_, converterConf_);
}

//io accessors
template <>
template <typename DT>
Expand All @@ -116,14 +130,16 @@ void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr) {
//shape must be specified for variable dims or if batch size changes
data_->SetShape(fullShape_);

if (byteSize_ != sizeof(DT))
throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << sizeof(DT)
std::unique_ptr<TritonConverterBase<DT>> converter = createConverter<DT>();
drankincms marked this conversation as resolved.
Show resolved Hide resolved

if (byteSize_ != converter->getByteSize())

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure if this change is correct. The original purpose here was a runtime check that the type in the user's code (DT) corresponds to the model specification on the server (byteSize_). The converter can have a different byte size in the FPGA case, but is that what the server will return from the model specification?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is necessary. I think the model specification on the server should use a type with the appropriate bitwidth (i.e. for the ap_fixed<16,6> FPGA setup the server returns UINT16 for the data type, and for a FP16 GPU model I think something similar would get done). But the user wants to just load the data as float32s in these cases.

throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << converter->getByteSize()
<< " (should be " << byteSize_ << " for " << dname_ << ")";

int64_t nInput = sizeShape();
for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
const DT* arr = data_in[i0].data();
triton_utils::throwIfError(data_->AppendRaw(reinterpret_cast<const uint8_t*>(arr), nInput * byteSize_),
triton_utils::throwIfError(data_->AppendRaw(converter->convertIn(arr), nInput * byteSize_),
name_ + " input(): unable to set data for batch entry " + std::to_string(i0));
}

Expand All @@ -138,6 +154,8 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
throw cms::Exception("TritonDataError") << name_ << " output(): missing result";
}

std::unique_ptr<TritonConverterBase<DT>> converter = createConverter<DT>();

if (byteSize_ != sizeof(DT)) {
throw cms::Exception("TritonDataError") << name_ << " output(): inconsistent byte size " << sizeof(DT)
<< " (should be " << byteSize_ << " for " << dname_ << ")";
Expand All @@ -147,14 +165,14 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
TritonOutput<DT> dataOut;
const uint8_t* r0;
size_t contentByteSize;
size_t expectedContentByteSize = nOutput * byteSize_ * batchSize_;
size_t expectedContentByteSize = nOutput * converter->getByteSize() * batchSize_;
triton_utils::throwIfError(result_->RawData(name_, &r0, &contentByteSize), "output(): unable to get raw");
if (contentByteSize != expectedContentByteSize) {
throw cms::Exception("TritonDataError") << name_ << " output(): unexpected content byte size " << contentByteSize
<< " (expected " << expectedContentByteSize << ")";
}

const DT* r1 = reinterpret_cast<const DT*>(r0);
const DT* r1 = converter->convertOut(r0);
dataOut.reserve(batchSize_);
for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
auto offset = i0 * nOutput;
Expand Down Expand Up @@ -183,3 +201,9 @@ template void TritonInputData::toServer(std::shared_ptr<TritonInput<float>> data
template void TritonInputData::toServer(std::shared_ptr<TritonInput<int64_t>> data_in);

template TritonOutput<float> TritonOutputData::fromServer() const;

template std::unique_ptr<TritonConverterBase<float>> TritonInputData::createConverter() const;
template std::unique_ptr<TritonConverterBase<int64_t>> TritonInputData::createConverter() const;

template std::unique_ptr<TritonConverterBase<float>> TritonOutputData::createConverter() const;
template std::unique_ptr<TritonConverterBase<int64_t>> TritonOutputData::createConverter() const;
4 changes: 4 additions & 0 deletions HeterogeneousCore/SonicTriton/src/pluginFactories.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<float>, "TritonConverterFloatFactory");
EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<int64_t>, "TritonConverterInt64Factory");
3 changes: 3 additions & 0 deletions HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@
modelVersion = cms.string(""),
verbose = cms.untracked.bool(options.verbose),
allowedTries = cms.untracked.uint32(0),
converterDefinition = cms.PSet(
converterName = cms.string("FloatStandardConverter"),
),
)
)
if options.producer=="TritonImageProducer":
Expand Down