From 6abef2c97bf0176177fda617c995ca67afeb808a Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 23 Jan 2024 15:50:49 -0600 Subject: [PATCH 01/19] extend default server startup timeout to account for non-cached case --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 9c84be2b62616..48d24cb5a8747 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -5,7 +5,7 @@ USEDOCKER="" GPU="" VERBOSE="" VERBOSE_ARGS="--log-verbose=1 --log-error=1 --log-warning=1 --log-info=1" -WTIME=300 +WTIME=600 SERVER=triton_server_instance RETRIES=3 REPOS=() From eab88918848d2a6ae1d085b25003da0679d3acd5 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 23 Jan 2024 16:17:06 -0600 Subject: [PATCH 02/19] do not apply thread control settings if GPU use is requested --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 48d24cb5a8747..29be94c161559 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -160,6 +160,12 @@ SEGFAULT_INDICATOR="Address already in use" EXTRA="" COMPAT_SCRIPT=/etc/shinit_v2 +THREADCONTROL="" +# do not apply thread control settings if GPU use is requested +if [ "$INSTANCES" -gt 0 ] && [ -z "$GPU" ]; then + THREADCONTROL=true +fi + compute_ports(){ # compute derived port numbers export HTTPPORT=$BASEPORT @@ -341,7 +347,7 @@ wait_server(){ list_models(){ # make list of model repositories LOCALMODELREPO="local_model_repo" - if [ "$INSTANCES" -gt 0 ]; then + if [ -n "$THREADCONTROL" ]; then if [ -d "$TMPDIR/$LOCALMODELREPO" ]; then #Want to start with a fresh copy of model files in case this directory already exists with local edits rm -rf $TMPDIR/$LOCALMODELREPO @@ -359,7 +365,7 @@ list_models(){ if [ -f "$MODEL" ]; then MODEL="$(dirname "$MODEL")" fi - if [ "$INSTANCES" -gt 0 ]; then + if [ -n "$THREADCONTROL" ]; then $DRYRUN cmsTritonConfigTool threadcontrol -c ${MODEL}/config.pbtxt --copy $TMPDIR/$LOCALMODELREPO --nThreads $INSTANCES TOOL_EXIT=$? if [ "$TOOL_EXIT" -ne 0 ]; then @@ -370,7 +376,7 @@ list_models(){ REPOS+=("$(dirname "$MODEL")") fi done - if [ "$INSTANCES" -gt 0 ]; then + if [ -n "$THREADCONTROL" ]; then REPOS=$TMPDIR/$LOCALMODELREPO else for ((r=0; r < ${#REPOS[@]}; r++)); do @@ -570,7 +576,7 @@ elif [ "$OP" == start ]; then for ((counter=0; counter < ${RETRIES}; counter++)); do make_tmp #If we plan on editing model configs, must repull files into /tmp/local_model_repo, which is deleted upon retry - if [ "$counter" -eq 0 ] || [ "$INSTANCES" -gt 0 ]; then list_models; fi + if [ "$counter" -eq 0 ] || [ -n "$THREADCONTROL" ]; then list_models; fi check_drivers DRIVER_EXIT=$? if [ "$DRIVER_EXIT" -ne 0 ]; then exit $DRIVER_EXIT; fi From 86001328a18186c96e94f1406d7b4c7638345f97 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 23 Jan 2024 17:00:35 -0600 Subject: [PATCH 03/19] use OSG apptainer by default (if available) --- HeterogeneousCore/SonicTriton/README.md | 1 + HeterogeneousCore/SonicTriton/scripts/cmsTriton | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 7eed0f67989fa..2e29bee23cacd 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -132,6 +132,7 @@ The script has three operations (`start`, `stop`, `check`) and the following opt * `-C [dir]`: directory containing Nvidia compatibility drivers (checks CMSSW_BASE by default if available) * `-D`: dry run: print container commands rather than executing them * `-d`: use Docker instead of Apptainer +* `-E [path]`: include extra path(s) for executables (default: /cvmfs/oasis.opensciencegrid.org/mis/apptainer/current/bin) * `-f`: force reuse of (possibly) existing container instance * `-g`: use GPU instead of CPU * `-i` [name]`: server image name (default: fastml/triton-torchgeo:22.07-py3-geometric) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 29be94c161559..e6ede404877a4 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -23,6 +23,7 @@ NPORTS=3 IMAGE=fastml/triton-torchgeo:22.07-py3-geometric SANDBOX="" COMPAT_USR="" +EXTRAPATH=/cvmfs/oasis.opensciencegrid.org/mis/apptainer/current/bin get_sandbox(){ if [ -z "$SANDBOX" ]; then @@ -41,6 +42,7 @@ usage() { $ECHO "-C [dir] \t directory containing Nvidia compatibility drivers (checks CMSSW_BASE by default if available)" $ECHO "-D \t dry run: print container commands rather than executing them" $ECHO "-d \t use Docker instead of Apptainer" + $ECHO "-E [path] \t include extra path(s) for executables (default: ${EXTRAPATH})" $ECHO "-f \t force reuse of (possibly) existing container instance" $ECHO "-g \t use GPU instead of CPU" $ECHO "-i [name] \t server image name (default: ${IMAGE})" @@ -131,6 +133,11 @@ else TMPDIR=$(readlink -f $TMPDIR) fi +# update path +if [ -n "$EXTRAPATH" ]; then + export PATH="${EXTRAPATH}:${PATH}" +fi + # find executables if [ -n "$USEDOCKER" ]; then if [ -z "$DOCKER" ]; then @@ -149,7 +156,6 @@ else fi fi - SANDBOX=$(get_sandbox) SANDBOX=$(readlink -f ${SANDBOX}) LOG="log_${SERVER}.log" From 6efa3a06f2ef8cb6b01bb89dd2e959166bf7dcad Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 11:05:47 -0600 Subject: [PATCH 04/19] cleanup auto_stop --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index e6ede404877a4..5f24aec9a1f07 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -406,6 +406,7 @@ auto_stop(){ fi PCOUNTER=0 PMAX=5 + # builtin wait is not used here because it can only monitor a child process, not a parent process while [ "$PCOUNTER" -le "$PMAX" ]; do if ! kill -0 $PARENTPID >& /dev/null; then PCOUNTER=$((PCOUNTER+1)) @@ -427,13 +428,11 @@ auto_stop(){ $STOP_FN # move logs out of tmp dir - if [ -z "$DRYRUN" ]; then - if [ -n "$VERBOSE" ]; then - mv "$LOG" "$TOPDIR" - # only keep non-empty log - if [ -s "$STOPLOG" ]; then - mv "$STOPLOG" "$TOPDIR" - fi + if [ -z "$DRYRUN" ] && [ -n "$VERBOSE" ]; then + mv "$LOG" "$TOPDIR" + # only keep non-empty log + if [ -s "$STOPLOG" ]; then + mv "$STOPLOG" "$TOPDIR" fi fi From d0d4882b9b7ca1e10494bb21a8bd149a1e792867 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 11:09:37 -0600 Subject: [PATCH 05/19] extend sonic Run3 workflows to 14 TeV --- .../PyReleaseValidation/python/upgradeWorkflowComponents.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index d6dd761188b14..e180616080563 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -2501,7 +2501,7 @@ class UpgradeWorkflow_SonicTriton(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): stepDict[stepName][k] = merge([{'--procModifiers': 'allSonicTriton'}, stepDict[step][k]]) def condition(self, fragment, stepList, key, hasHarvest): - return (fragment=='TTbar_13' and '2021' in key) \ + return ((fragment=='TTbar_13' or fragment=='TTbar_14TeV') and '2021' in key) \ or (fragment=='TTbar_14TeV' and '2026' in key) upgradeWFs['SonicTriton'] = UpgradeWorkflow_SonicTriton( steps = [ From 9ada2b0899ae0b82de448aa0eae9e0fce94731bf Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 11:09:48 -0600 Subject: [PATCH 06/19] fix typo --- Configuration/PyReleaseValidation/scripts/runTheMatrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Configuration/PyReleaseValidation/scripts/runTheMatrix.py b/Configuration/PyReleaseValidation/scripts/runTheMatrix.py index 610460b31f02d..05bad56169e65 100755 --- a/Configuration/PyReleaseValidation/scripts/runTheMatrix.py +++ b/Configuration/PyReleaseValidation/scripts/runTheMatrix.py @@ -35,7 +35,7 @@ def runSelected(opt): ret = 0 if opt.show: mrd.show(opt.testList, opt.extended, opt.cafVeto) - if opt.testList : print('testListected items:', opt.testList) + if opt.testList : print('selected items:', opt.testList) else: mRunnerHi = MatrixRunner(mrd.workFlows, opt.nProcs, opt.nThreads) ret = mRunnerHi.runTests(opt) From d24a6f4e02c8d5ec3129b535603a2f188f5c0368 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 11:11:03 -0600 Subject: [PATCH 07/19] notify TritonService when a TritonException is issued (in order to save logs) --- .../SonicTriton/interface/TritonException.h | 5 ++++- .../SonicTriton/interface/TritonService.h | 3 +++ .../SonicTriton/interface/triton_utils.h | 10 +++++----- .../SonicTriton/src/TritonException.cc | 20 +++++++++++++++++-- .../SonicTriton/src/TritonService.cc | 14 ++++++++++++- 5 files changed, 43 insertions(+), 9 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonException.h b/HeterogeneousCore/SonicTriton/interface/TritonException.h index 418a98fc1bd8e..e78a4acebff36 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonException.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonException.h @@ -7,8 +7,11 @@ class TritonException : public cms::Exception { public: - explicit TritonException(std::string const& aCategory); + explicit TritonException(std::string const& aCategory, bool signal = false); void convertToWarning() const; + +protected: + bool signal_; }; #endif diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index 98d0d410924b8..b9d7a3d024b64 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -10,6 +10,7 @@ #include #include #include +#include #include "grpc_client.h" @@ -112,6 +113,7 @@ class TritonService { void addModel(const std::string& modelName, const std::string& path); Server serverInfo(const std::string& model, const std::string& preferred = "") const; const std::string& pid() const { return pid_; } + void notifyCallStatus(bool status) const; static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); @@ -132,6 +134,7 @@ class TritonService { unsigned currentModuleId_; bool allowAddModel_; bool startedFallback_; + CMS_SA_ALLOW mutable std::atomic callFails_; std::string pid_; std::unordered_map unservedModels_; //this represents a many:many:many map diff --git a/HeterogeneousCore/SonicTriton/interface/triton_utils.h b/HeterogeneousCore/SonicTriton/interface/triton_utils.h index d6c7612a5159c..9f04d84ef71da 100644 --- a/HeterogeneousCore/SonicTriton/interface/triton_utils.h +++ b/HeterogeneousCore/SonicTriton/interface/triton_utils.h @@ -72,11 +72,11 @@ inline bool triton_utils::checkType(inference::DataType dtype) { //helper to turn triton error into exception //implemented as a macro to avoid constructing the MSG string for successful function calls -#define TRITON_THROW_IF_ERROR(X, MSG) \ - { \ - triton::client::Error err = (X); \ - if (!err.IsOk()) \ - throw TritonException("TritonFailure") << (MSG) << (err.Message().empty() ? "" : ": " + err.Message()); \ +#define TRITON_THROW_IF_ERROR(X, MSG) \ + { \ + triton::client::Error err = (X); \ + if (!err.IsOk()) \ + throw TritonException("TritonFailure", true) << (MSG) << (err.Message().empty() ? "" : ": " + err.Message()); \ } extern template std::string triton_utils::printColl(const edm::Span::const_iterator>& coll, diff --git a/HeterogeneousCore/SonicTriton/src/TritonException.cc b/HeterogeneousCore/SonicTriton/src/TritonException.cc index ee160ffe957c2..71d0910867f25 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonException.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonException.cc @@ -1,6 +1,22 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ServiceRegistry/interface/Service.h" #include "HeterogeneousCore/SonicTriton/interface/TritonException.h" +#include "HeterogeneousCore/SonicTriton/interface/TritonService.h" -TritonException::TritonException(std::string const& aCategory) : cms::Exception(aCategory) {} +TritonException::TritonException(std::string const& aCategory, bool signal) + : cms::Exception(aCategory), signal_(signal) { + if (signal_) { + edm::Service ts; + ts->notifyCallStatus(false); + } +} -void TritonException::convertToWarning() const { edm::LogWarning(category()) << explainSelf(); } +void TritonException::convertToWarning() const { + //undo the previous signal + if (signal_) { + edm::Service ts; + ts->notifyCallStatus(true); + } + + edm::LogWarning(category()) << explainSelf(); +} diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 8a34b6197130c..fd1c9729fabb2 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -61,6 +61,7 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr currentModuleId_(0), allowAddModel_(false), startedFallback_(false), + callFails_(0), pid_(std::to_string(::getpid())) { //module construction is assumed to be serial (correct at the time this code was written) @@ -297,11 +298,22 @@ void TritonService::preBeginJob(edm::PathsAndConsumesOfModulesBase const&, edm:: << output; } +void TritonService::notifyCallStatus(bool status) const { + if (status) + --callFails_; + else + ++callFails_; +} + void TritonService::postEndJob() { if (!startedFallback_) return; - std::string command = fallbackOpts_.command + " stop"; + std::string command = fallbackOpts_.command; + //print logs if cmsRun is currently exiting because of a TritonException + if (callFails_ > 0 and !fallbackOpts_.verbose) + command += " -v"; + command += " stop"; if (verbose_) edm::LogInfo("TritonService") << command; From f6a5b7de52df737e6c54d3c9b9bfb25fee98790e Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 12:32:52 -0600 Subject: [PATCH 08/19] move signal undo --- .../SonicTriton/interface/TritonException.h | 3 --- HeterogeneousCore/SonicTriton/src/TritonClient.cc | 6 ++++++ .../SonicTriton/src/TritonException.cc | 15 +++------------ 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonException.h b/HeterogeneousCore/SonicTriton/interface/TritonException.h index e78a4acebff36..3066c969a8e14 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonException.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonException.h @@ -9,9 +9,6 @@ class TritonException : public cms::Exception { public: explicit TritonException(std::string const& aCategory, bool signal = false); void convertToWarning() const; - -protected: - bool signal_; }; #endif diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index 201ad40d35a0e..f0b37a3a381b6 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -346,6 +346,12 @@ void TritonClient::getResults(const std::vector //default case for sync and pseudo async void TritonClient::evaluate() { + //undo previous signal from TritonException + if (tries_ > 0) { + edm::Service ts; + ts->notifyCallStatus(true); + } + //in case there is nothing to process if (batchSize() == 0) { //call getResults on an empty vector diff --git a/HeterogeneousCore/SonicTriton/src/TritonException.cc b/HeterogeneousCore/SonicTriton/src/TritonException.cc index 71d0910867f25..d1a08dd653fee 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonException.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonException.cc @@ -3,20 +3,11 @@ #include "HeterogeneousCore/SonicTriton/interface/TritonException.h" #include "HeterogeneousCore/SonicTriton/interface/TritonService.h" -TritonException::TritonException(std::string const& aCategory, bool signal) - : cms::Exception(aCategory), signal_(signal) { - if (signal_) { +TritonException::TritonException(std::string const& aCategory, bool signal) : cms::Exception(aCategory) { + if (signal) { edm::Service ts; ts->notifyCallStatus(false); } } -void TritonException::convertToWarning() const { - //undo the previous signal - if (signal_) { - edm::Service ts; - ts->notifyCallStatus(true); - } - - edm::LogWarning(category()) << explainSelf(); -} +void TritonException::convertToWarning() const { edm::LogWarning(category()) << explainSelf(); } From 3ba5109cf05f8a9055bb0d46dfbe8dfe93fd083e Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 25 Jan 2024 15:38:18 -0600 Subject: [PATCH 09/19] print log instead of copying --- .../SonicTriton/src/TritonService.cc | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index fd1c9729fabb2..3a728770db313 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -310,19 +310,27 @@ void TritonService::postEndJob() { return; std::string command = fallbackOpts_.command; - //print logs if cmsRun is currently exiting because of a TritonException - if (callFails_ > 0 and !fallbackOpts_.verbose) - command += " -v"; + //prevent log cleanup during server stop + if (callFails_ > 0) + command += " -c"; command += " stop"; if (verbose_) edm::LogInfo("TritonService") << command; const auto& [output, rv] = execSys(command); - if (rv != 0) { + if (rv != 0 or callFails_ > 0) { + //print logs if cmsRun is currently exiting because of a TritonException edm::LogError("TritonService") << output; printFallbackServerLog(); - throw cms::Exception("FallbackFailed") - << "TritonService: Stopping the fallback server failed with exit code " << rv; + if (rv != 0) { + cms::Exception stopException("FallbackFailed"); + stopException << "TritonService: Stopping the fallback server failed with exit code " << rv; + //avoid throwing if the stack is already unwinding + if (callFails_ > 0) + edm::LogWarning(stopException.category()) << stopException.explainSelf(); + else + throw stopException; + } } else if (verbose_) { edm::LogInfo("TritonService") << output; printFallbackServerLog(); From 87e2dd482c6edf1ca145c0d72eab6d05810a573c Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 26 Jan 2024 09:50:41 -0600 Subject: [PATCH 10/19] only notify when error is related to local server --- .../SonicTriton/interface/TritonClient.h | 2 + .../SonicTriton/interface/triton_utils.h | 10 +- .../SonicTriton/src/TritonClient.cc | 96 ++++++++++--------- .../SonicTriton/src/TritonMemResource.cc | 21 ++-- .../SonicTriton/src/TritonService.cc | 12 ++- 5 files changed, 80 insertions(+), 61 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonClient.h b/HeterogeneousCore/SonicTriton/interface/TritonClient.h index 833d329417d18..4661c3e57c54c 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonClient.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonClient.h @@ -48,6 +48,7 @@ class TritonClient : public SonicClient { void resetBatchMode(); void reset() override; TritonServerType serverType() const { return serverType_; } + bool isLocal() const { return isLocal_; } //for fillDescriptions static void fillPSetDescription(edm::ParameterSetDescription& iDesc); @@ -78,6 +79,7 @@ class TritonClient : public SonicClient { bool verbose_; bool useSharedMemory_; TritonServerType serverType_; + bool isLocal_; grpc_compression_algorithm compressionAlgo_; triton::client::Headers headers_; diff --git a/HeterogeneousCore/SonicTriton/interface/triton_utils.h b/HeterogeneousCore/SonicTriton/interface/triton_utils.h index 9f04d84ef71da..edfb829a48a2c 100644 --- a/HeterogeneousCore/SonicTriton/interface/triton_utils.h +++ b/HeterogeneousCore/SonicTriton/interface/triton_utils.h @@ -72,11 +72,11 @@ inline bool triton_utils::checkType(inference::DataType dtype) { //helper to turn triton error into exception //implemented as a macro to avoid constructing the MSG string for successful function calls -#define TRITON_THROW_IF_ERROR(X, MSG) \ - { \ - triton::client::Error err = (X); \ - if (!err.IsOk()) \ - throw TritonException("TritonFailure", true) << (MSG) << (err.Message().empty() ? "" : ": " + err.Message()); \ +#define TRITON_THROW_IF_ERROR(X, MSG, NOTIFY) \ + { \ + triton::client::Error err = (X); \ + if (!err.IsOk()) \ + throw TritonException("TritonFailure", NOTIFY) << (MSG) << (err.Message().empty() ? "" : ": " + err.Message()); \ } extern template std::string triton_utils::printColl(const edm::Span::const_iterator>& coll, diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index f0b37a3a381b6..c0ada361da98d 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -71,11 +71,13 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //todo: could enforce async mode otherwise (unless mode was specified by user?) if (serverType_ == TritonServerType::LocalCPU) setMode(SonicMode::Sync); + isLocal_ = serverType_ == TritonServerType::LocalCPU or serverType_ == TritonServerType::LocalGPU; //connect to the server TRITON_THROW_IF_ERROR( tc::InferenceServerGrpcClient::Create(&client_, server.url, false, server.useSsl, server.sslOptions), - "TritonClient(): unable to create inference context"); + "TritonClient(): unable to create inference context", + isLocal_); //set options options_[0].model_version_ = params.getParameter("modelVersion"); @@ -110,7 +112,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //compare model checksums to remote config to enforce versioning inference::ModelConfigResponse modelConfigResponse; TRITON_THROW_IF_ERROR(client_->ModelConfig(&modelConfigResponse, options_[0].model_name_, options_[0].model_version_), - "TritonClient(): unable to get model config"); + "TritonClient(): unable to get model config", + isLocal_); inference::ModelConfig remoteModelConfig(modelConfigResponse.config()); std::map> checksums; @@ -140,7 +143,8 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //get model info inference::ModelMetadataResponse modelMetadata; TRITON_THROW_IF_ERROR(client_->ModelMetadata(&modelMetadata, options_[0].model_name_, options_[0].model_version_), - "TritonClient(): unable to get model metadata"); + "TritonClient(): unable to get model metadata", + isLocal_); //get input and output (which know their sizes) const auto& nicInputs = modelMetadata.inputs(); @@ -329,8 +333,8 @@ void TritonClient::getResults(const std::vector //set shape here before output becomes const if (output.variableDims()) { std::vector tmp_shape; - TRITON_THROW_IF_ERROR(result->Shape(oname, &tmp_shape), - "getResults(): unable to get output shape for " + oname); + TRITON_THROW_IF_ERROR( + result->Shape(oname, &tmp_shape), "getResults(): unable to get output shape for " + oname, false); if (!noOuterDim_) tmp_shape.erase(tmp_shape.begin()); output.setShape(tmp_shape, i); @@ -406,43 +410,45 @@ void TritonClient::evaluate() { if (mode_ == SonicMode::Async) { //non-blocking call success = handle_exception([&]() { - TRITON_THROW_IF_ERROR( - client_->AsyncInferMulti( - [start_status, this](std::vector resultsTmp) { - //immediately convert to shared_ptr - const auto& results = convertToShared(resultsTmp); - //check results - for (auto ptr : results) { - auto success = handle_exception( - [&]() { TRITON_THROW_IF_ERROR(ptr->RequestStatus(), "evaluate(): unable to get result(s)"); }); - if (!success) - return; - } - - if (verbose()) { - inference::ModelStatistics end_status; - auto success = handle_exception([&]() { end_status = getServerSideStatus(); }); - if (!success) - return; - - const auto& stats = summarizeServerStats(start_status, end_status); - reportServerSideStats(stats); - } - - //check result - auto success = handle_exception([&]() { getResults(results); }); - if (!success) - return; - - //finish - finish(true); - }, - options_, - inputsTriton, - outputsTriton, - headers_, - compressionAlgo_), - "evaluate(): unable to launch async run"); + TRITON_THROW_IF_ERROR(client_->AsyncInferMulti( + [start_status, this](std::vector resultsTmp) { + //immediately convert to shared_ptr + const auto& results = convertToShared(resultsTmp); + //check results + for (auto ptr : results) { + auto success = handle_exception([&]() { + TRITON_THROW_IF_ERROR( + ptr->RequestStatus(), "evaluate(): unable to get result(s)", isLocal_); + }); + if (!success) + return; + } + + if (verbose()) { + inference::ModelStatistics end_status; + auto success = handle_exception([&]() { end_status = getServerSideStatus(); }); + if (!success) + return; + + const auto& stats = summarizeServerStats(start_status, end_status); + reportServerSideStats(stats); + } + + //check result + auto success = handle_exception([&]() { getResults(results); }); + if (!success) + return; + + //finish + finish(true); + }, + options_, + inputsTriton, + outputsTriton, + headers_, + compressionAlgo_), + "evaluate(): unable to launch async run", + isLocal_); }); if (!success) return; @@ -452,7 +458,8 @@ void TritonClient::evaluate() { success = handle_exception([&]() { TRITON_THROW_IF_ERROR( client_->InferMulti(&resultsTmp, options_, inputsTriton, outputsTriton, headers_, compressionAlgo_), - "evaluate(): unable to run and/or get result"); + "evaluate(): unable to run and/or get result", + isLocal_); }); //immediately convert to shared_ptr const auto& results = convertToShared(resultsTmp); @@ -539,7 +546,8 @@ inference::ModelStatistics TritonClient::getServerSideStatus() const { if (verbose_) { inference::ModelStatisticsResponse resp; TRITON_THROW_IF_ERROR(client_->ModelInferenceStatistics(&resp, options_[0].model_name_, options_[0].model_version_), - "getServerSideStatus(): unable to get model statistics"); + "getServerSideStatus(): unable to get model statistics", + isLocal_); return *(resp.model_stats().begin()); } return inference::ModelStatistics{}; diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index 102f28eda6601..2e35932f7f9a9 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -19,7 +19,8 @@ template void TritonMemResource::set() { for (auto& entry : data_->entries_) { TRITON_THROW_IF_ERROR(entry.data_->SetSharedMemory(name_, entry.totalByteSize_, entry.offset_), - "unable to set shared memory (" + name_ + ")"); + "unable to set shared memory (" + name_ + ")", + true); } } @@ -35,7 +36,8 @@ void TritonInputHeapResource::copyInput(const void* values, size_t offset, unsig (data_->entries_.size() > 1 ? std::to_string(entry) : data_->entries_[entry].byteSizePerBatch_ ? std::to_string(offset / data_->entries_[entry].byteSizePerBatch_) - : "")); + : ""), + false); } template <> @@ -45,7 +47,8 @@ void TritonOutputHeapResource::copyOutput() { size_t contentByteSizeEntry(0); if (entry.totalByteSize_ > 0) TRITON_THROW_IF_ERROR(entry.result_->RawData(data_->name_, &entry.output_, &contentByteSizeEntry), - data_->name_ + " fromServer(): unable to get raw"); + data_->name_ + " fromServer(): unable to get raw", + false); contentByteSize += contentByteSizeEntry; } if (contentByteSize != data_->totalByteSize_) { @@ -87,7 +90,8 @@ TritonCpuShmResource::TritonCpuShmResource(TritonData* data, const std:: throw cms::Exception("TritonError") << "unable to close descriptor for shared memory key: " + this->name_; TRITON_THROW_IF_ERROR(this->data_->client()->RegisterSystemSharedMemory(this->name_, this->name_, this->size_), - "unable to register shared memory region: " + this->name_); + "unable to register shared memory region: " + this->name_, + true); } template @@ -101,7 +105,8 @@ void TritonCpuShmResource::close() { return; TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterSystemSharedMemory(this->name_), - "unable to unregister shared memory region: " + this->name_); + "unable to unregister shared memory region: " + this->name_, + true); //unmap int tmp_fd = munmap(this->addr_, this->size_); @@ -143,7 +148,8 @@ TritonGpuShmResource::TritonGpuShmResource(TritonData* data, const std:: cudaCheck(cudaMalloc((void**)&this->addr_, this->size_), "unable to allocate GPU memory for key: " + this->name_); cudaCheck(cudaIpcGetMemHandle(handle_.get(), this->addr_), "unable to get IPC handle for key: " + this->name_); TRITON_THROW_IF_ERROR(this->data_->client()->RegisterCudaSharedMemory(this->name_, *handle_, deviceId_, this->size_), - "unable to register CUDA shared memory region: " + this->name_); + "unable to register CUDA shared memory region: " + this->name_, + true); } template @@ -156,7 +162,8 @@ void TritonGpuShmResource::close() { if (this->closed_) return; TRITON_THROW_IF_ERROR(this->data_->client()->UnregisterCudaSharedMemory(this->name_), - "unable to unregister CUDA shared memory region: " + this->name_); + "unable to unregister CUDA shared memory region: " + this->name_, + true); cudaCheck(cudaFree(this->addr_), "unable to free GPU memory for key: " + this->name_); this->closed_ = true; } diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index 3a728770db313..b64e8ad2a6754 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -105,20 +105,22 @@ TritonService::TritonService(const edm::ParameterSet& pset, edm::ActivityRegistr std::unique_ptr client; TRITON_THROW_IF_ERROR( tc::InferenceServerGrpcClient::Create(&client, server.url, false, server.useSsl, server.sslOptions), - "TritonService(): unable to create inference context for " + serverName + " (" + server.url + ")"); + "TritonService(): unable to create inference context for " + serverName + " (" + server.url + ")", + false); if (verbose_) { inference::ServerMetadataResponse serverMetaResponse; TRITON_THROW_IF_ERROR(client->ServerMetadata(&serverMetaResponse), - "TritonService(): unable to get metadata for " + serverName + " (" + server.url + ")"); + "TritonService(): unable to get metadata for " + serverName + " (" + server.url + ")", + false); edm::LogInfo("TritonService") << "Server " << serverName << ": url = " << server.url << ", version = " << serverMetaResponse.version(); } inference::RepositoryIndexResponse repoIndexResponse; - TRITON_THROW_IF_ERROR( - client->ModelRepositoryIndex(&repoIndexResponse), - "TritonService(): unable to get repository index for " + serverName + " (" + server.url + ")"); + TRITON_THROW_IF_ERROR(client->ModelRepositoryIndex(&repoIndexResponse), + "TritonService(): unable to get repository index for " + serverName + " (" + server.url + ")", + false); //servers keep track of models and vice versa if (verbose_) From af890f6d4f0fa683e667336ddaef558f71e30a55 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 26 Jan 2024 10:39:13 -0600 Subject: [PATCH 11/19] select timeout unit --- HeterogeneousCore/SonicTriton/README.md | 1 + .../SonicTriton/src/TritonClient.cc | 18 ++++++++++++++++-- .../SonicTriton/test/tritonTest_cfg.py | 2 ++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/README.md b/HeterogeneousCore/SonicTriton/README.md index 2e29bee23cacd..4050f36beaf0a 100644 --- a/HeterogeneousCore/SonicTriton/README.md +++ b/HeterogeneousCore/SonicTriton/README.md @@ -29,6 +29,7 @@ The model information from the server can be printed by enabling `verbose` outpu * `modelConfigPath`: path to `config.pbtxt` file for the model (using `edm::FileInPath`) * `preferredServer`: name of preferred server, for testing (see [Services](#services) below) * `timeout`: maximum allowed time for a request (disabled with 0) +* `timeoutUnit`: seconds, milliseconds, or microseconds (default: seconds) * `outputs`: optional, specify which output(s) the server should send * `verbose`: enable verbose printouts (default: false) * `useSharedMemory`: enable use of shared memory (see [below](#shared-memory)) with local servers (default: true) diff --git a/HeterogeneousCore/SonicTriton/src/TritonClient.cc b/HeterogeneousCore/SonicTriton/src/TritonClient.cc index c0ada361da98d..76fd670bb66bc 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonClient.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonClient.cc @@ -1,5 +1,6 @@ #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/FileInPath.h" +#include "FWCore/ParameterSet/interface/allowedValues.h" #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/Exception.h" #include "HeterogeneousCore/SonicTriton/interface/TritonClient.h" @@ -81,8 +82,19 @@ TritonClient::TritonClient(const edm::ParameterSet& params, const std::string& d //set options options_[0].model_version_ = params.getParameter("modelVersion"); - //convert seconds to microseconds - options_[0].client_timeout_ = params.getUntrackedParameter("timeout") * 1e6; + options_[0].client_timeout_ = params.getUntrackedParameter("timeout"); + //convert to microseconds + const auto& timeoutUnit = params.getUntrackedParameter("timeoutUnit"); + unsigned conversion = 1; + if (timeoutUnit == "seconds") + conversion = 1e6; + else if (timeoutUnit == "milliseconds") + conversion = 1e3; + else if (timeoutUnit == "microseconds") + conversion = 1; + else + throw cms::Exception("Configuration") << "Unknown timeout unit: " << timeoutUnit; + options_[0].client_timeout_ *= conversion; //get fixed parameters from local config inference::ModelConfig localModelConfig; @@ -563,6 +575,8 @@ void TritonClient::fillPSetDescription(edm::ParameterSetDescription& iDesc) { //server parameters should not affect the physics results descClient.addUntracked("preferredServer", ""); descClient.addUntracked("timeout"); + descClient.ifValue(edm::ParameterDescription("timeoutUnit", "seconds", false), + edm::allowedValues("seconds", "milliseconds", "microseconds")); descClient.addUntracked("useSharedMemory", true); descClient.addUntracked("compression", ""); descClient.addUntracked>("outputs", {}); diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 1773e252cfae2..2006296614e98 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -22,6 +22,7 @@ options.register("address", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "server address") options.register("port", 8001, VarParsing.multiplicity.singleton, VarParsing.varType.int, "server port") options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int, "timeout for requests") +options.register("timeoutUnit", "seconds", VarParsing.multiplicity.singleton, VarParsing.varType.string, "unit for timeout") options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "json file containing server address/port") options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int, "number of threads") options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int, "number of streams") @@ -122,6 +123,7 @@ mode = cms.string(options.mode), preferredServer = cms.untracked.string(""), timeout = cms.untracked.uint32(options.timeout), + timeoutUnit = cms.untracked.string(options.timeoutUnit), modelName = cms.string(model), modelVersion = cms.string(""), modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(model)), From 39f289894cf9848cd80cb25abec169b3fb88cb22 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 26 Jan 2024 11:16:53 -0600 Subject: [PATCH 12/19] more granular verbosity control --- HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 2006296614e98..93423a812c072 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -29,7 +29,10 @@ options.register("modules", "TritonGraphProducer", VarParsing.multiplicity.list, VarParsing.varType.string, "list of modules to run (choices: {})".format(', '.join(models))) options.register("models","gat_test", VarParsing.multiplicity.list, VarParsing.varType.string, "list of models (same length as modules, or just 1 entry if all modules use same model)") options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string, "mode for client (choices: {})".format(', '.join(allowed_modes))) -options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output") +options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable all verbose output") +options.register("verboseClient", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for clients") +options.register("verboseServer", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for server") +options.register("verboseService", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for TritonService") options.register("brief", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "briefer output for graph modules") options.register("fallbackName", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "name for fallback server") options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "unit test mode: reduce input sizes") @@ -83,8 +86,8 @@ process.source = cms.Source("EmptySource") -process.TritonService.verbose = options.verbose -process.TritonService.fallback.verbose = options.verbose +process.TritonService.verbose = options.verbose or options.verboseService +process.TritonService.fallback.verbose = options.verbose or options.verboseServer process.TritonService.fallback.useDocker = options.docker if len(options.fallbackName)>0: process.TritonService.fallback.instanceBaseName = options.fallbackName @@ -127,7 +130,7 @@ modelName = cms.string(model), modelVersion = cms.string(""), modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(model)), - verbose = cms.untracked.bool(options.verbose), + verbose = cms.untracked.bool(options.verbose or options.verboseClient), allowedTries = cms.untracked.uint32(options.tries), useSharedMemory = cms.untracked.bool(options.shm), compression = cms.untracked.string(options.compression), From ec33ffd33f08eb3de847a56b5aff616271b57a61 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 26 Jan 2024 14:23:35 -0600 Subject: [PATCH 13/19] argparse migration for tritonTest_cfg --- HeterogeneousCore/SonicTriton/test/README.md | 6 +- .../SonicTriton/test/tritonTest_cfg.py | 77 ++++++++----------- .../SonicTriton/test/unittest.sh | 2 +- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md index 2249a3ebd6ac1..5ace852ba0153 100644 --- a/HeterogeneousCore/SonicTriton/test/README.md +++ b/HeterogeneousCore/SonicTriton/test/README.md @@ -16,17 +16,17 @@ The local server will use Apptainer with CPU by default; if a local Nvidia GPU i Run the image test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonImageProducer,TritonImageProducer models=inception_graphdef,densenet_onnx +cmsRun tritonTest_cfg.py --maxEvents 1 --modules TritonImageProducer TritonImageProducer --models inception_graphdef densenet_onnx ``` Run the identity test with ragged batching: ``` -cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonIdentityProducer models=ragged_io +cmsRun tritonTest_cfg.py --maxEvents 1 --modules TritonIdentityProducer --models ragged_io ``` Run the graph test: ``` -cmsRun tritonTest_cfg.py maxEvents=1 modules=TritonGraphProducer +cmsRun tritonTest_cfg.py --maxEvents 1 --modules TritonGraphProducer ``` ## Caveats diff --git a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py index 93423a812c072..fa891adb88721 100644 --- a/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py +++ b/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py @@ -1,6 +1,6 @@ -from FWCore.ParameterSet.VarParsing import VarParsing import FWCore.ParameterSet.Config as cms import os, sys, json +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter # module/model correspondence models = { @@ -16,34 +16,34 @@ allowed_compression = ["none","deflate","gzip"] allowed_devices = ["auto","cpu","gpu"] -options = VarParsing() -options.register("maxEvents", -1, VarParsing.multiplicity.singleton, VarParsing.varType.int, "Number of events to process (-1 for all)") -options.register("serverName", "default", VarParsing.multiplicity.singleton, VarParsing.varType.string, "name for server (used internally)") -options.register("address", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "server address") -options.register("port", 8001, VarParsing.multiplicity.singleton, VarParsing.varType.int, "server port") -options.register("timeout", 30, VarParsing.multiplicity.singleton, VarParsing.varType.int, "timeout for requests") -options.register("timeoutUnit", "seconds", VarParsing.multiplicity.singleton, VarParsing.varType.string, "unit for timeout") -options.register("params", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "json file containing server address/port") -options.register("threads", 1, VarParsing.multiplicity.singleton, VarParsing.varType.int, "number of threads") -options.register("streams", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int, "number of streams") -options.register("modules", "TritonGraphProducer", VarParsing.multiplicity.list, VarParsing.varType.string, "list of modules to run (choices: {})".format(', '.join(models))) -options.register("models","gat_test", VarParsing.multiplicity.list, VarParsing.varType.string, "list of models (same length as modules, or just 1 entry if all modules use same model)") -options.register("mode","Async", VarParsing.multiplicity.singleton, VarParsing.varType.string, "mode for client (choices: {})".format(', '.join(allowed_modes))) -options.register("verbose", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable all verbose output") -options.register("verboseClient", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for clients") -options.register("verboseServer", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for server") -options.register("verboseService", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable verbose output for TritonService") -options.register("brief", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "briefer output for graph modules") -options.register("fallbackName", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "name for fallback server") -options.register("unittest", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "unit test mode: reduce input sizes") -options.register("testother", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "also test gRPC communication if shared memory enabled, or vice versa") -options.register("shm", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable shared memory") -options.register("compression", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "enable I/O compression (choices: {})".format(', '.join(allowed_compression))) -options.register("ssl", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "enable SSL authentication for server communication") -options.register("device","auto", VarParsing.multiplicity.singleton, VarParsing.varType.string, "specify device for fallback server (choices: {})".format(', '.join(allowed_devices))) -options.register("docker", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "use Docker for fallback server") -options.register("tries", 0, VarParsing.multiplicity.singleton, VarParsing.varType.int, "number of retries for failed request") -options.parseArguments() +parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) +parser.add_argument("--maxEvents", default=-1, type=int, help="Number of events to process (-1 for all)") +parser.add_argument("--serverName", default="default", type=str, help="name for server (used internally)") +parser.add_argument("--address", default="", type=str, help="server address") +parser.add_argument("--port", default=8001, type=int, help="server port") +parser.add_argument("--timeout", default=30, type=int, help="timeout for requests") +parser.add_argument("--timeoutUnit", default="seconds", type=str, help="unit for timeout") +parser.add_argument("--params", default="", type=str, help="json file containing server address/port") +parser.add_argument("--threads", default=1, type=int, help="number of threads") +parser.add_argument("--streams", default=0, type=int, help="number of streams") +parser.add_argument("--modules", metavar=("MODULES"), default=["TritonGraphProducer"], nargs='+', type=str, choices=list(models), help="list of modules to run (choices: %(choices)s)") +parser.add_argument("--models", default=["gat_test"], nargs='+', type=str, help="list of models (same length as modules, or just 1 entry if all modules use same model)") +parser.add_argument("--mode", default="Async", type=str, choices=allowed_modes, help="mode for client") +parser.add_argument("--verbose", default=False, action="store_true", help="enable all verbose output") +parser.add_argument("--verboseClient", default=False, action="store_true", help="enable verbose output for clients") +parser.add_argument("--verboseServer", default=False, action="store_true", help="enable verbose output for server") +parser.add_argument("--verboseService", default=False, action="store_true", help="enable verbose output for TritonService") +parser.add_argument("--brief", default=False, action="store_true", help="briefer output for graph modules") +parser.add_argument("--fallbackName", default="", type=str, help="name for fallback server") +parser.add_argument("--unittest", default=False, action="store_true", help="unit test mode: reduce input sizes") +parser.add_argument("--testother", default=False, action="store_true", help="also test gRPC communication if shared memory enabled, or vice versa") +parser.add_argument("--noShm", default=False, action="store_true", help="disable shared memory") +parser.add_argument("--compression", default="", type=str, choices=allowed_compression, help="enable I/O compression") +parser.add_argument("--ssl", default=False, action="store_true", help="enable SSL authentication for server communication") +parser.add_argument("--device", default="auto", type=str.lower, choices=allowed_devices, help="specify device for fallback server") +parser.add_argument("--docker", default=False, action="store_true", help="use Docker for fallback server") +parser.add_argument("--tries", default=0, type=int, help="number of retries for failed request") +options = parser.parse_args() if len(options.params)>0: with open(options.params,'r') as pfile: @@ -55,28 +55,13 @@ # check models and modules if len(options.modules)!=len(options.models): # assigning to VarParsing.multiplicity.list actually appends to existing value(s) - if len(options.models)==1: options.models = [options.models[0]]*(len(options.modules)-1) + if len(options.models)==1: options.models = [options.models[0]]*(len(options.modules)) else: raise ValueError("Arguments for modules and models must have same length") for im,module in enumerate(options.modules): - if module not in models: - raise ValueError("Unknown module: {}".format(module)) model = options.models[im] if model not in models[module]: raise ValueError("Unsupported model {} for module {}".format(model,module)) -# check modes -if options.mode not in allowed_modes: - raise ValueError("Unknown mode: {}".format(options.mode)) - -# check compression -if len(options.compression)>0 and options.compression not in allowed_compression: - raise ValueError("Unknown compression setting: {}".format(options.compression)) - -# check devices -options.device = options.device.lower() -if options.device not in allowed_devices: - raise ValueError("Unknown device: {}".format(options.device)) - from Configuration.ProcessModifiers.enableSonicTriton_cff import enableSonicTriton process = cms.Process('tritonTest',enableSonicTriton) @@ -132,7 +117,7 @@ modelConfigPath = cms.FileInPath("HeterogeneousCore/SonicTriton/data/models/{}/config.pbtxt".format(model)), verbose = cms.untracked.bool(options.verbose or options.verboseClient), allowedTries = cms.untracked.uint32(options.tries), - useSharedMemory = cms.untracked.bool(options.shm), + useSharedMemory = cms.untracked.bool(not options.noShm), compression = cms.untracked.string(options.compression), ) ) diff --git a/HeterogeneousCore/SonicTriton/test/unittest.sh b/HeterogeneousCore/SonicTriton/test/unittest.sh index aacb1b699df33..87cab09bf4969 100755 --- a/HeterogeneousCore/SonicTriton/test/unittest.sh +++ b/HeterogeneousCore/SonicTriton/test/unittest.sh @@ -50,7 +50,7 @@ fi fallbackName=triton_server_instance_${DEVICE} tmpFile=$(mktemp -p ${LOCALTOP} SonicTritonTestXXXXXXXX.log) -cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py modules=TritonGraphProducer,TritonGraphFilter,TritonGraphAnalyzer maxEvents=2 unittest=1 verbose=1 device=${DEVICE} testother=1 fallbackName=${fallbackName} >& $tmpFile +cmsRun ${LOCALTOP}/src/HeterogeneousCore/SonicTriton/test/tritonTest_cfg.py --modules TritonGraphProducer TritonGraphFilter TritonGraphAnalyzer --maxEvents 2 --unittest --verbose --device ${DEVICE} --testother --fallbackName ${fallbackName} >& $tmpFile CMSEXIT=$? cat $tmpFile From 6a7f03a615c95d428f9a64e6a49759b6350ecdb4 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Fri, 26 Jan 2024 15:05:37 -0600 Subject: [PATCH 14/19] skip driver check if not using GPU --- HeterogeneousCore/SonicTriton/scripts/cmsTriton | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTriton b/HeterogeneousCore/SonicTriton/scripts/cmsTriton index 5f24aec9a1f07..3949d6f21826b 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTriton +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTriton @@ -580,11 +580,16 @@ elif [ "$OP" == start ]; then START_EXIT=0 for ((counter=0; counter < ${RETRIES}; counter++)); do make_tmp - #If we plan on editing model configs, must repull files into /tmp/local_model_repo, which is deleted upon retry + + # if we plan on editing model configs, must copy files into /tmp/local_model_repo, which is deleted upon retry if [ "$counter" -eq 0 ] || [ -n "$THREADCONTROL" ]; then list_models; fi - check_drivers - DRIVER_EXIT=$? - if [ "$DRIVER_EXIT" -ne 0 ]; then exit $DRIVER_EXIT; fi + + # only need to check drivers if using GPU + if [ -n "$GPU" ]; then + check_drivers + DRIVER_EXIT=$? + if [ "$DRIVER_EXIT" -ne 0 ]; then exit $DRIVER_EXIT; fi + fi $START_FN START_EXIT=$? From 91ef5b0812f6d3d6cafa510e77212347a3478c8c Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 31 Jan 2024 11:43:35 -0600 Subject: [PATCH 15/19] fix handling of symbolic links --- HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool b/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool index 9a1ef54c57da6..4782eabcd976a 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool @@ -263,7 +263,8 @@ def cfg_checksum(args): from glob import glob config_dir = os.path.dirname(args.config) for filename in glob(os.path.join(config_dir,"*/*")): - if os.path.islink(os.path.dirname(filename)): continue + # evaluate symbolic links + filename = os.path.realpath(filename) checksum = get_checksum(filename) # key = algorithm:[filename relative to config.pbtxt dir] filename = os.path.relpath(filename, config_dir) @@ -324,10 +325,12 @@ def cfg_versioncheck(args): missing = [] for path in os.environ['CMSSW_SEARCH_PATH'].split(':'): - for dirpath, dirnames, filenames in os.walk(path): + if args.verbose: print("Checking: "+path) + for dirpath, dirnames, filenames in os.walk(path, followlinks=True): for filename in filenames: if filename=="config.pbtxt": filepath = os.path.join(dirpath,filename) + if args.verbose: print(filepath) checksum_args = Namespace( config=filepath, should_return=True, copy=False, json=False, defaults=False, view=False, @@ -435,6 +438,7 @@ if __name__=="__main__": parser_checksum.set_defaults(func=cfg_checksum) parser_versioncheck = subparsers.add_parser("versioncheck", parents=[_parser_checksum_update], help="check all model checksums") + parser_versioncheck.add_argument("--verbose", default=False, action="store_true", help="verbose output (show all files checked)") parser_versioncheck.set_defaults(func=cfg_versioncheck) _parser_copy_req = ArgumentParser(add_help=False, parents=[_parser_copy_view]) From d28aea39a6748fb07b11c15d65338210bd43a1a6 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 31 Jan 2024 15:20:14 -0600 Subject: [PATCH 16/19] common message elements as string(stream)s --- HeterogeneousCore/SonicTriton/src/TritonService.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index b64e8ad2a6754..a688f8456b097 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -325,13 +325,14 @@ void TritonService::postEndJob() { edm::LogError("TritonService") << output; printFallbackServerLog(); if (rv != 0) { - cms::Exception stopException("FallbackFailed"); - stopException << "TritonService: Stopping the fallback server failed with exit code " << rv; + std::string stopCat("FallbackFailed"); + std::stringstream stopMsg; + stopMsg << "TritonService: Stopping the fallback server failed with exit code " << rv; //avoid throwing if the stack is already unwinding if (callFails_ > 0) - edm::LogWarning(stopException.category()) << stopException.explainSelf(); + edm::LogWarning(stopCat) << stopMsg.str(); else - throw stopException; + throw cms::Exception(stopCat) << stopMsg.str(); } } else if (verbose_) { edm::LogInfo("TritonService") << output; From ccb9dee9010c6c86e2980bb489d0b6612537bd23 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Wed, 31 Jan 2024 15:57:37 -0600 Subject: [PATCH 17/19] avoid throwing multiple exceptions in TritonMemResource destructors --- .../SonicTriton/interface/TritonMemResource.h | 1 + .../SonicTriton/interface/triton_utils.h | 3 +++ .../SonicTriton/src/TritonMemResource.cc | 17 +++++++++++++++-- .../SonicTriton/src/triton_utils.cc | 2 ++ 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h index 9ccd27fd0c0cf..a1352fc25b7ee 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonMemResource.h @@ -19,6 +19,7 @@ class TritonMemResource { uint8_t* addr() { return addr_; } size_t size() const { return size_; } virtual void close() {} + void closeSafe(); //used for input virtual void copyInput(const void* values, size_t offset, unsigned entry) {} //used for output diff --git a/HeterogeneousCore/SonicTriton/interface/triton_utils.h b/HeterogeneousCore/SonicTriton/interface/triton_utils.h index edfb829a48a2c..01ac1ef0f0b0f 100644 --- a/HeterogeneousCore/SonicTriton/interface/triton_utils.h +++ b/HeterogeneousCore/SonicTriton/interface/triton_utils.h @@ -1,6 +1,7 @@ #ifndef HeterogeneousCore_SonicTriton_triton_utils #define HeterogeneousCore_SonicTriton_triton_utils +#include "FWCore/Utilities/interface/Exception.h" #include "FWCore/Utilities/interface/Span.h" #include "HeterogeneousCore/SonicTriton/interface/TritonException.h" @@ -19,6 +20,8 @@ namespace triton_utils { bool checkType(inference::DataType dtype) { return false; } + //turn CMS exceptions into warnings + void convertToWarning(const cms::Exception& e); } // namespace triton_utils //explicit specializations (inlined) diff --git a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc index 2e35932f7f9a9..3f6ef96681309 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonMemResource.cc @@ -24,6 +24,19 @@ void TritonMemResource::set() { } } +template +void TritonMemResource::closeSafe() { + CMS_SA_ALLOW try { close(); } catch (TritonException& e) { + e.convertToWarning(); + } catch (cms::Exception& e) { + triton_utils::convertToWarning(e); + } catch (std::exception& e) { + edm::LogWarning("UnknownFailure") << e.what(); + } catch (...) { + edm::LogWarning("UnknownFailure") << "An unknown exception was thrown"; + } +} + template TritonHeapResource::TritonHeapResource(TritonData* data, const std::string& name, size_t size) : TritonMemResource(data, name, size) {} @@ -96,7 +109,7 @@ TritonCpuShmResource::TritonCpuShmResource(TritonData* data, const std:: template TritonCpuShmResource::~TritonCpuShmResource() { - close(); + this->closeSafe(); } template @@ -154,7 +167,7 @@ TritonGpuShmResource::TritonGpuShmResource(TritonData* data, const std:: template TritonGpuShmResource::~TritonGpuShmResource() { - close(); + this->closeSafe(); } template diff --git a/HeterogeneousCore/SonicTriton/src/triton_utils.cc b/HeterogeneousCore/SonicTriton/src/triton_utils.cc index a71190d951e46..322dddf133381 100644 --- a/HeterogeneousCore/SonicTriton/src/triton_utils.cc +++ b/HeterogeneousCore/SonicTriton/src/triton_utils.cc @@ -1,3 +1,4 @@ +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "HeterogeneousCore/SonicTriton/interface/triton_utils.h" #include @@ -15,6 +16,7 @@ namespace triton_utils { return msg.str(); } + void convertToWarning(const cms::Exception& e) { edm::LogWarning(e.category()) << e.explainSelf(); } } // namespace triton_utils template std::string triton_utils::printColl(const edm::Span::const_iterator>& coll, From 1c86d910aa7b620332f32b1f24a481478c14bd67 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Thu, 1 Feb 2024 15:52:46 -0600 Subject: [PATCH 18/19] another fix for symbolic link handling --- HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool b/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool index 4782eabcd976a..00c08742dd5f9 100755 --- a/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool +++ b/HeterogeneousCore/SonicTriton/scripts/cmsTritonConfigTool @@ -261,9 +261,10 @@ def cfg_checksum(args): missing = [] from glob import glob - config_dir = os.path.dirname(args.config) + # evaluate symbolic links + config_dir = os.path.realpath(os.path.dirname(args.config)) for filename in glob(os.path.join(config_dir,"*/*")): - # evaluate symbolic links + # evaluate symbolic links again filename = os.path.realpath(filename) checksum = get_checksum(filename) # key = algorithm:[filename relative to config.pbtxt dir] From ec8dda148e52b0e963f525a8638ea5c238f21602 Mon Sep 17 00:00:00 2001 From: Kevin Pedro Date: Tue, 6 Feb 2024 17:47:16 -0600 Subject: [PATCH 19/19] code review --- HeterogeneousCore/SonicTriton/interface/TritonService.h | 2 +- HeterogeneousCore/SonicTriton/src/TritonService.cc | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/SonicTriton/interface/TritonService.h b/HeterogeneousCore/SonicTriton/interface/TritonService.h index b9d7a3d024b64..f4d6093695dad 100644 --- a/HeterogeneousCore/SonicTriton/interface/TritonService.h +++ b/HeterogeneousCore/SonicTriton/interface/TritonService.h @@ -134,7 +134,7 @@ class TritonService { unsigned currentModuleId_; bool allowAddModel_; bool startedFallback_; - CMS_SA_ALLOW mutable std::atomic callFails_; + mutable std::atomic callFails_; std::string pid_; std::unordered_map unservedModels_; //this represents a many:many:many map diff --git a/HeterogeneousCore/SonicTriton/src/TritonService.cc b/HeterogeneousCore/SonicTriton/src/TritonService.cc index a688f8456b097..53b94f767062b 100644 --- a/HeterogeneousCore/SonicTriton/src/TritonService.cc +++ b/HeterogeneousCore/SonicTriton/src/TritonService.cc @@ -326,13 +326,12 @@ void TritonService::postEndJob() { printFallbackServerLog(); if (rv != 0) { std::string stopCat("FallbackFailed"); - std::stringstream stopMsg; - stopMsg << "TritonService: Stopping the fallback server failed with exit code " << rv; + std::string stopMsg = fmt::format("TritonService: Stopping the fallback server failed with exit code {}", rv); //avoid throwing if the stack is already unwinding if (callFails_ > 0) - edm::LogWarning(stopCat) << stopMsg.str(); + edm::LogWarning(stopCat) << stopMsg; else - throw cms::Exception(stopCat) << stopMsg.str(); + throw cms::Exception(stopCat) << stopMsg; } } else if (verbose_) { edm::LogInfo("TritonService") << output;