Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Triton data converter (CMSSW_11_2_0_pre9) #2

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions HeterogeneousCore/SonicTriton/interface/TritonData.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ class TritonData {
if (auto ptr = std::any_cast<ConverterType>(&converter_)) {
} else {
converter_ = ConverterType(TritonConverterFactory<DT>::get()->create(converterName_));
converter_clear = std::bind(&TritonConverterBase<DT>::clear, std::any_cast<ConverterType>(converter_).get());
converter_clear_ = std::bind(&TritonConverterBase<DT>::clear, std::any_cast<ConverterType>(converter_).get());
}
return std::any_cast<ConverterType>(converter_);
}
Expand Down Expand Up @@ -114,7 +114,7 @@ class TritonData {
std::shared_ptr<Result> result_;
mutable std::any converter_;
std::string converterName_;
mutable std::function<void()> converter_clear;
mutable std::function<void()> converter_clear_;
};

using TritonInputData = TritonData<nvidia::inferenceserver::client::InferInput>;
Expand Down
33 changes: 27 additions & 6 deletions HeterogeneousCore/SonicTriton/src/TritonClient.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
if (!msg_str.empty())
throw cms::Exception("ModelErrors") << msg_str;

const edm::ParameterSet& converterDefs = params.getParameterSet("converterDefinition");
const std::vector<edm::ParameterSet>& inputConverterDefs = params.getParameter<std::vector<edm::ParameterSet>>("inputConverters");
drankincms marked this conversation as resolved.
Show resolved Hide resolved
//setup input map
std::stringstream io_msg;
if (verbose_)
Expand All @@ -91,7 +91,14 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
auto [curr_itr, success] = input_.emplace(
std::piecewise_construct, std::forward_as_tuple(iname), std::forward_as_tuple(iname, nicInput, noBatch_));
auto& curr_input = curr_itr->second;
curr_input.setConverterParams(converterDefs);
bool foundConverter = false;
for (const auto converterDef : inputConverterDefs) {
drankincms marked this conversation as resolved.
Show resolved Hide resolved
if (converterDef.getParameter<std::string>("inputName") == iname) {
curr_input.setConverterParams(converterDef);
foundConverter = true;
}
}
if (!foundConverter) throw cms::Exception("ModelErrors") << "No matching converter definition for input: " << iname;
inputsTriton_.push_back(curr_input.data());
if (verbose_) {
io_msg << " " << iname << " (" << curr_input.dname() << ", " << curr_input.byteSize()
Expand All @@ -103,6 +110,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
const auto& v_outputs = params.getUntrackedParameter<std::vector<std::string>>("outputs");
std::unordered_set s_outputs(v_outputs.begin(), v_outputs.end());

const std::vector<edm::ParameterSet>& outputConverterDefs = params.getParameter<std::vector<edm::ParameterSet>>("outputConverters");

//setup output map
if (verbose_)
io_msg << "Model outputs: "
Expand All @@ -115,7 +124,14 @@ TritonClient::TritonClient(const edm::ParameterSet& params)
auto [curr_itr, success] = output_.emplace(
std::piecewise_construct, std::forward_as_tuple(oname), std::forward_as_tuple(oname, nicOutput, noBatch_));
auto& curr_output = curr_itr->second;
curr_output.setConverterParams(converterDefs);
bool foundConverter = false;
for (const auto converterDef : outputConverterDefs) {
if (converterDef.getParameter<std::string>("outputName") == oname) {
curr_output.setConverterParams(converterDef);
foundConverter = true;
}
}
if (!foundConverter) throw cms::Exception("ModelErrors") << "No matching converter definition for output: " << oname;
outputsTriton_.push_back(curr_output.data());
if (verbose_) {
io_msg << " " << oname << " (" << curr_output.dname() << ", " << curr_output.byteSize()
Expand Down Expand Up @@ -339,13 +355,18 @@ inference::ModelStatistics TritonClient::getServerSideStatus() const {

//for fillDescriptions
void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) {
edm::ParameterSetDescription descConverter;
descConverter.add<std::string>("converterName");
edm::ParameterSetDescription descInConverter;
descInConverter.add<std::string>("converterName");
descInConverter.add<std::string>("inputName");
edm::ParameterSetDescription descOutConverter;
descOutConverter.add<std::string>("converterName");
descOutConverter.add<std::string>("outputName");
edm::ParameterSetDescription descClient;
fillBasePSetDescription(descClient);
descClient.add<std::string>("modelName");
descClient.add<std::string>("modelVersion", "");
descClient.add<edm::ParameterSetDescription>("converterDefinition", descConverter);
descClient.addVPSet("inputConverters", descInConverter);
descClient.addVPSet("outputConverters", descOutConverter);
//server parameters should not affect the physics results
descClient.addUntracked<unsigned>("batchSize");
descClient.addUntracked<std::string>("address");
Expand Down
4 changes: 2 additions & 2 deletions HeterogeneousCore/SonicTriton/src/TritonData.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ template <>
void TritonInputData::reset() {
data_->Reset();
holder_.reset();
converter_clear();
converter_clear_();
}

template <>
void TritonOutputData::reset() {
result_.reset();
converter_clear();
converter_clear_();
}

//explicit template instantiation declarations
Expand Down
39 changes: 36 additions & 3 deletions HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,40 @@
"TritonGraphProducer": "gat_test",
}

inConvs = {
"TritonImageProducer": cms.VPSet(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To simplify this for users, we might want to add some additional infrastructure:

  1. To handle default cases, we could map the standard converters to the appropriate Triton data type names/enums (see the TritonData constructor). Then these wouldn't need to be specified in the python config explicitly.
  2. We might want to investigate if there's a way to augment the model config and protocols so that, when a FaaST server is used, it could automatically specify (probably via some gRPC message) when an alternate converter needs to be used (and which one).

Item 2 assumes that there would not be a case where the client wants to use a different converter than specified by the model in the repository/on the server.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One idea to avoid a) overloading the meaning of some existing protobuf field (potentially dangerous) or b) extending the proto definitions (maintenance burden):
we could come up with a policy whereby the input and output names in the config.pbtxt are extended to indicate which non-standard converter (if any) is needed. Then these extended names could be handled by the client.
We could try to have the FaaST server perform this extension automatically (in order to keep the same config.pbtxt files for GPU and FPGA versions of a model). But since other model files might need to be different for FPGA usage, changing the config.pbtxt might be okay.

Even with this, it could still be useful to have the option introduced here to specify a converter manually (for testing).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Ok yeah, this is a good suggestion, Ill set this up.

  2. Hmm, perhaps we could use something existing like the platform field in ModelMetadataResponse, and fill it with the server's desired converter. This seems simple enough for FaaST, but I'm not sure how easy this would be with a standard GPU Triton server.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's still only one platform per model, so we would have to pack all the information for every input and output into that one string, which isn't ideal.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yeah, I keep forgetting that other models have multiple inputs/outputs.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this now uses defaults successfully when they are available, so 1) is addressed.

For 2), Ok, I missed your suggestion of the model name field. I think the version field might be even more accurate but it's a minor point. But can we enforce a policy whereby we use the field to encode the necessary converter? I'm wondering if you think this is actually feasible or if you were only thinking out loud a bit here.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be a bit more explicit, the most automatic implementation would be:

  1. config.pbtxt retains the normal input/output names (same for GPU and FPGA)
  2. FaaST appends information to the input/output names when returning the ModelMetadataResponse, e.g. "input1_FPGAConverter:foo"
  3. TritonClient knows to check for the presence of "_FPGAConverter:foo"

The question here would be how "foo" is provided for each model input/output. Maybe there could be a separate section of the config.pbtxt; we have to learn more about how Triton parses it and what (if anything) it ignores.

The alternative would be a bit more manual on the user side:

  1. make a separate FPGA copy of the GPU config.pbtxt, and add "_FPGAConverter:foo" to the input/output names.
  2. FaaST uses the FPGA copy of the config.pbtxt (maybe this could also be automated...)

cms.PSet(
converterName = cms.string("FloatStandardConverter"),
inputName = cms.string("gpu_0/data"),
),
),
"TritonGraphProducer": cms.VPSet(
cms.PSet(
converterName = cms.string("FloatStandardConverter"),
inputName = cms.string("x__0"),
),
cms.PSet(
converterName = cms.string("Int64StandardConverter"),
inputName = cms.string("edgeindex__1"),
),
),
}

outConvs = {
"TritonImageProducer": cms.VPSet(
cms.PSet(
converterName = cms.string("FloatStandardConverter"),
outputName = cms.string("gpu_0/softmax"),
),
),
"TritonGraphProducer": cms.VPSet(
cms.PSet(
converterName = cms.string("FloatStandardConverter"),
outputName = cms.string("logits__0"),
),
),
}

if options.producer not in models:
raise ValueError("Unknown producer: "+options.producer)

Expand All @@ -49,9 +83,8 @@
modelVersion = cms.string(""),
verbose = cms.untracked.bool(options.verbose),
allowedTries = cms.untracked.uint32(0),
converterDefinition = cms.PSet(
converterName = cms.string("FloatStandardConverter"),
),
inputConverters = inConvs[options.producer],
outputConverters = outConvs[options.producer],
)
)
if options.producer=="TritonImageProducer":
Expand Down