Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Triton data converter (CMSSW_11_2_0_pre9) #2

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions HeterogeneousCore/SonicTriton/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<use name="FWCore/Utilities"/>
<use name="FWCore/ParameterSet"/>
<use name="FWCore/MessageLogger"/>
<use name="DataFormats/Common"/>
<use name="HeterogeneousCore/SonicCore"/>
<use name="triton-inference-server"/>
<use name="protobuf"/>
Expand Down
43 changes: 43 additions & 0 deletions HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef HeterogeneousCore_SonicTriton_TritonConverterBase
#define HeterogeneousCore_SonicTriton_TritonConverterBase

#include "FWCore/ParameterSet/interface/ParameterSet.h"
#include "DataFormats/Common/interface/Handle.h"

#include <string>

template <typename DT>
class TritonConverterBase {
drankincms marked this conversation as resolved.
Show resolved Hide resolved
//class needs to be templated since the convert functions require the data type, but need to also be virtual, and virtual member function templates are not allowed in C++
public:
TritonConverterBase(const std::string convName)
: converterName_(convName), byteSize_(sizeof(DT)) {}
TritonConverterBase(const std::string convName, size_t byteSize)
: converterName_(convName), byteSize_(byteSize) {}
TritonConverterBase(const TritonConverterBase&) = delete;
virtual ~TritonConverterBase() = default;
TritonConverterBase& operator=(const TritonConverterBase&) = delete;

virtual const uint8_t* convertIn (const DT* in) const = 0;
virtual const DT* convertOut (const uint8_t* in) const = 0;

const int64_t byteSize() const { return byteSize_; }

const std::string& name() const { return converterName_; }

virtual void clear() const {}

private:
const std::string converterName_;
const int64_t byteSize_;
};

#include "FWCore/PluginManager/interface/PluginFactory.h"

template <typename DT>
using TritonConverterFactory = edmplugin::PluginFactory<TritonConverterBase<DT>*()>;

#define DEFINE_TRITON_CONVERTER(input, type, name) DEFINE_EDM_PLUGIN(TritonConverterFactory<input>, type, name)
#define DEFINE_TRITON_CONVERTER_SIMPLE(input, type) DEFINE_EDM_PLUGIN(TritonConverterFactory<input>, type, #type)

#endif
40 changes: 40 additions & 0 deletions HeterogeneousCore/SonicTriton/interface/TritonData.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/Utilities/interface/Span.h"

#include "FWCore/PluginManager/interface/PluginFactory.h"
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

#include <vector>
#include <string>
#include <unordered_map>
Expand Down Expand Up @@ -69,6 +72,40 @@ class TritonData {
void setResult(std::shared_ptr<Result> result) { result_ = result; }
IO* data() { return data_.get(); }

std::string defaultConverter(const std::string name) const {
size_t dcpos = name.find("_DataConverter:");
if (dcpos != std::string::npos) {
return name.substr(dcpos+15);
}
else {
std::string base = "StandardConverter";
if (dtype_ == inference::DataType::TYPE_INT64) {
return "Int64"+base;
}
else if (dtype_ == inference::DataType::TYPE_FP32) {
return "Float"+base;
} else {
throw cms::Exception("ConverterErrors") << "Unable to create default converter for " << name_ << " of " << dname_ << " type\n";
}
}
}

void setConverterParams(const std::string& convName) {
converterName_ = convName;
}

template <typename DT>
std::shared_ptr<TritonConverterBase<DT>> createConverter() const {
using ConverterType = std::shared_ptr<TritonConverterBase<DT>>;
//this construction catches bad any_cast without throwing std exception
if (auto ptr = std::any_cast<ConverterType>(&converter_)) {
} else {
converter_ = ConverterType(TritonConverterFactory<DT>::get()->create(converterName_));
converter_clear_ = std::bind(&TritonConverterBase<DT>::clear, std::any_cast<ConverterType>(converter_).get());
}
return std::any_cast<ConverterType>(converter_);
}

//helpers
bool anyNeg(const ShapeView& vec) const {
return std::any_of(vec.begin(), vec.end(), [](int64_t i) { return i < 0; });
Expand All @@ -93,6 +130,9 @@ class TritonData {
int64_t byteSize_;
std::any holder_;
std::shared_ptr<Result> result_;
mutable std::any converter_;
std::string converterName_;
mutable std::function<void()> converter_clear_;
};

using TritonInputData = TritonData<nvidia::inferenceserver::client::InferInput>;
Expand Down
5 changes: 5 additions & 0 deletions HeterogeneousCore/SonicTriton/plugins/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<library name="HeterogeneousCoreSonicTritonPlugins_converters" file="converters/*.cc">
<use name="HeterogeneousCore/SonicTriton"/>
<use name="hls"/>
<flags EDM_PLUGIN="1"/>
</library>
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

#include <string>
#include "ap_fixed.h"

template <int I>
class FloatApFixed16Converter : public TritonConverterBase<float> {
public:
FloatApFixed16Converter() : TritonConverterBase<float>("FloatApFixed16F"+std::to_string(I)+"Converter", 2) {}

const uint8_t* convertIn(const float* in) const override {
auto temp_vec = std::make_shared<std::vector<ap_fixed<16, I>>>(std::move(this->makeVecIn(in)));
inputHolder_.push_back(temp_vec);
return reinterpret_cast<const uint8_t*>(temp_vec->data());
}
const float* convertOut(const uint8_t* in) const override {
auto temp_vec = std::make_shared<std::vector<float>>(std::move(this->makeVecOut(reinterpret_cast<const ap_fixed<16, I>*>(in))));
outputHolder_.push_back(temp_vec);
return temp_vec->data();
}

void clear() const override {
inputHolder_.clear();
outputHolder_.clear();
}

private:
std::vector<ap_fixed<16, I>> makeVecIn(const float* in) const {
unsigned int nfeat = sizeof(in) / sizeof(float);
std::vector<ap_fixed<16, I>> temp_storage(in, in + nfeat);
return temp_storage;
}

std::vector<float> makeVecOut(const ap_fixed<16, I>* in) const {
unsigned int nfeat = sizeof(in) / sizeof(ap_fixed<16, I>);
std::vector<float> temp_storage(in, in + nfeat);
return temp_storage;
}

mutable std::vector<std::shared_ptr<std::vector<ap_fixed<16, I>>>> inputHolder_;
mutable std::vector<std::shared_ptr<std::vector<float>>> outputHolder_;
};

DEFINE_TRITON_CONVERTER(float, FloatApFixed16Converter<6>, "FloatApFixed16F6Converter");
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

class FloatStandardConverter : public TritonConverterBase<float> {
public:
FloatStandardConverter() : TritonConverterBase<float>("FloatStandardConverter") {}

const uint8_t* convertIn(const float* in) const override { return reinterpret_cast<const uint8_t*>(in); }
const float* convertOut(const uint8_t* in) const override { return reinterpret_cast<const float*>(in); }
};

DEFINE_TRITON_CONVERTER_SIMPLE(float, FloatStandardConverter);
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

class Int64StandardConverter : public TritonConverterBase<int64_t> {
public:
Int64StandardConverter() : TritonConverterBase<int64_t>("Int64StandardConverter") {}

const uint8_t* convertIn(const int64_t* in) const override { return reinterpret_cast<const uint8_t*>(in); }
const int64_t* convertOut(const uint8_t* in) const override { return reinterpret_cast<const int64_t*>(in); }
};

DEFINE_TRITON_CONVERTER_SIMPLE(int64_t, Int64StandardConverter);
37 changes: 35 additions & 2 deletions HeterogeneousCore/SonicTriton/src/TritonClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,29 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
if (!msg_str.empty())
throw cms::Exception("ModelErrors") << msg_str;

const std::vector<edm::ParameterSet>& inputConverterDefs = params.getParameterSetVector("inputConverters");
std::unordered_map<std::string,std::string> inConvMap;
for (const auto& converterDef : inputConverterDefs) {
inConvMap[converterDef.getParameter<std::string>("inputName")] = converterDef.getParameter<std::string>("converterName");
}

//setup input map
std::stringstream io_msg;
if (verbose_)
io_msg << "Model inputs: "
<< "\n";
inputsTriton_.reserve(nicInputs.size());
for (const auto& nicInput : nicInputs) {
const auto& iname = nicInput.name();
const std::string iname_full = nicInput.name();
const auto& iname = iname_full.find("_DataConverter:") != std::string::npos ? iname_full.substr(0,iname_full.find("_DataConverter:")) : iname_full;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here's how I would reduce duplication:

  1. define a static function of TritonClient:
    static std::pair<std::string, std::string> splitNameConverter(const std::string& fullname) {
      static const std::string indicator("_DataConverter:");
      size_t dcpos = fullname.find(indicator);
      if (dcpos != std::string::npos)
        return {fullname.substr(0,dcpos), fullname.substr(dcpos+indicator.size())};
      else
        return {fullname, ""};
    }
  2. use the static function here:
    const auto& [iname, iconverter] = TritonClient::splitNameConverter(iname_full);
  3. pass iconverter to curr_input.defaultConverter() below (& do the same for the outputs further down)
  4. defaultConverter() just has to check if the input parameter is non-empty, otherwise it tries to get a standard converter (as currently implemented)
  5. inConvMap can be removed entirely (maybe not right away if you still want to test things)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, yes, this is cleaner, Ill make this change now, thanks. Regarding 5), I think we still want to let the user define their own converter. For example, in the case of FACILE the server would be configured to request FloatToApFixed16F6 as the converter, but if a dedicated FACILE for FPGA producer is written, it can use the same server and would need to use a different converter.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can imagine there might be cases where the user wants to provide an alternate converter, but I'm confused about your example. Why would a dedicated producer need to use a different converter for the same server?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Im envisioning a dedicated FPGA producer that is aware of the ap_fixed<16,6> type that is needed by the server, so it places each input in the data vector already converted, so the converter is basically just a default UINT16 converter.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah okay, I understand now.

auto [curr_itr, success] = input_.emplace(
std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput, noBatch_));
auto& curr_input = curr_itr->second;
if ( inConvMap.find(iname) == inConvMap.end() ) {
curr_input.setConverterParams(curr_input.defaultConverter(iname_full));
} else {
curr_input.setConverterParams(inConvMap[iname]);
}
inputsTriton_.push_back(curr_input.data());
if (verbose_) {
io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
Expand All @@ -101,18 +113,30 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());

const std::vector<edm::ParameterSet>& outputConverterDefs = params.getParameterSetVector("outputConverters");
std::unordered_map<std::string,std::string> outConvMap;
for (const auto& converterDef : outputConverterDefs) {
outConvMap[converterDef.getParameter<std::string>("outputName")] = converterDef.getParameter<std::string>("converterName");
}

//setup output map
if (verbose_)
io_msg << "Model outputs: "
<< "\n";
outputsTriton_.reserve(nicOutputs.size());
for (const auto& nicOutput : nicOutputs) {
const auto& oname = nicOutput.name();
const std::string oname_full = nicOutput.name();
const auto& oname = oname_full.find("_DataConverter:") != std::string::npos ? oname_full.substr(0,oname_full.find("_DataConverter:")) : oname_full;
if (!s_outputs.empty() and s_outputs.find(oname) == s_outputs.end())
continue;
auto [curr_itr, success] = output_.emplace(
std::piecewise_construct, std::forward_as_tuple(oname), std::forward_as_tuple(oname, nicOutput, noBatch_));
auto& curr_output = curr_itr->second;
if ( outConvMap.find(oname) == outConvMap.end() ) {
curr_output.setConverterParams(curr_output.defaultConverter(oname_full));
} else {
curr_output.setConverterParams(outConvMap[oname]);
}
outputsTriton_.push_back(curr_output.data());
if (verbose_) {
io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
Expand Down Expand Up @@ -336,10 +360,19 @@ inference::ModelStatistics TritonClient::getServerSideStatus() const {

//for fillDescriptions
void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) {
edm::ParameterSetDescription descInConverter;
descInConverter.add<std::string>("converterName");
descInConverter.add<std::string>("inputName");
edm::ParameterSetDescription descOutConverter;
descOutConverter.add<std::string>("converterName");
descOutConverter.add<std::string>("outputName");
std::vector<edm::ParameterSet> blankVPSet;
edm::ParameterSetDescription descClient;
fillBasePSetDescription(descClient);
descClient.add<std::string>("modelName");
descClient.add<std::string>("modelVersion", "");
descClient.addVPSet("inputConverters", descInConverter, blankVPSet);
descClient.addVPSet("outputConverters", descOutConverter, blankVPSet);
//server parameters should not affect the physics results
descClient.addUntracked<unsigned>("batchSize");
descClient.addUntracked<std::string>("address");
Expand Down
19 changes: 13 additions & 6 deletions HeterogeneousCore/SonicTriton/src/TritonData.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonData.h"
#include "HeterogeneousCore/SonicTriton/interface/triton_utils.h"
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"
#include "FWCore/MessageLogger/interface/MessageLogger.h"

#include "model_config.pb.h"
Expand Down Expand Up @@ -116,14 +117,16 @@ void TritonInputData::toServer(std::shared_ptr<TritonInput<DT>> ptr) {
//shape must be specified for variable dims or if batch size changes
data_->SetShape(fullShape_);

if (byteSize_ != sizeof(DT))
throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << sizeof(DT)
auto converter = createConverter<DT>();

if (byteSize_ != converter->byteSize())
throw cms::Exception("TritonDataError") << name_ << " input(): inconsistent byte size " << converter->byteSize()
<< " (should be " << byteSize_ << " for " << dname_ << ")";

int64_t nInput = sizeShape();
for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
const DT* arr = data_in[i0].data();
triton_utils::throwIfError(data_->AppendRaw(reinterpret_cast<const uint8_t*>(arr), nInput * byteSize_),
triton_utils::throwIfError(data_->AppendRaw(converter->convertIn(arr), nInput * byteSize_),
name_ + " input(): unable to set data for batch entry " + std::to_string(i0));
}

Expand All @@ -138,7 +141,9 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
throw cms::Exception("TritonDataError") << name_ << " output(): missing result";
}

if (byteSize_ != sizeof(DT)) {
auto converter = createConverter<DT>();

if (byteSize_ != converter->byteSize()) {
throw cms::Exception("TritonDataError") << name_ << " output(): inconsistent byte size " << sizeof(DT)
<< " (should be " << byteSize_ << " for " << dname_ << ")";
}
Expand All @@ -147,14 +152,14 @@ TritonOutput<DT> TritonOutputData::fromServer() const {
TritonOutput<DT> dataOut;
const uint8_t* r0;
size_t contentByteSize;
size_t expectedContentByteSize = nOutput * byteSize_ * batchSize_;
size_t expectedContentByteSize = nOutput * converter->byteSize() * batchSize_;
triton_utils::throwIfError(result_->RawData(name_, &r0, &contentByteSize), "output(): unable to get raw");
if (contentByteSize != expectedContentByteSize) {
throw cms::Exception("TritonDataError") << name_ << " output(): unexpected content byte size " << contentByteSize
<< " (expected " << expectedContentByteSize << ")";
}

const DT* r1 = reinterpret_cast<const DT*>(r0);
const DT* r1 = converter->convertOut(r0);
dataOut.reserve(batchSize_);
for (unsigned i0 = 0; i0 < batchSize_; ++i0) {
auto offset = i0 * nOutput;
Expand All @@ -168,11 +173,13 @@ template <>
void TritonInputData::reset() {
data_->Reset();
holder_.reset();
converter_clear_();
}

template <>
void TritonOutputData::reset() {
result_.reset();
converter_clear_();
}

//explicit template instantiation declarations
Expand Down
4 changes: 4 additions & 0 deletions HeterogeneousCore/SonicTriton/src/pluginFactories.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#include "HeterogeneousCore/SonicTriton/interface/TritonConverterBase.h"

EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<float>, "TritonConverterFloatFactory");
EDM_REGISTER_PLUGINFACTORY(TritonConverterFactory<int64_t>, "TritonConverterInt64Factory");