From a330678239241b85f361845f873dd051ff9bb26c Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Fri, 17 Sep 2021 22:43:00 +0800
Subject: [PATCH 01/21] add presets for Tritonserver

---
 pom.xml                                       |    8 +-
 tritonserver/README.md                        |  306 ++
 tritonserver/cppbuild.sh                      |   32 +
 tritonserver/platform/pom.xml                 |  122 +
 tritonserver/platform/redist/pom.xml          |  118 +
 tritonserver/pom.xml                          |  138 +
 .../org/bytedeco/tensorrt/global/nvinfer.java | 3248 ++++++++++++
 .../tensorrt/global/nvinfer_plugin.java       |  391 ++
 .../tensorrt/global/nvonnxparser.java         |  169 +
 .../bytedeco/tensorrt/global/nvparsers.java   |  320 ++
 .../org/bytedeco/tensorrt/nvinfer/Dims2.java  |   59 +
 .../org/bytedeco/tensorrt/nvinfer/Dims3.java  |   61 +
 .../org/bytedeco/tensorrt/nvinfer/Dims32.java |   58 +
 .../org/bytedeco/tensorrt/nvinfer/Dims4.java  |   62 +
 .../bytedeco/tensorrt/nvinfer/DimsExprs.java  |   50 +
 .../org/bytedeco/tensorrt/nvinfer/DimsHW.java |  101 +
 .../nvinfer/DynamicPluginTensorDesc.java      |   53 +
 .../tensorrt/nvinfer/EnumMaxImpl.java         |   43 +
 .../tensorrt/nvinfer/IActivationLayer.java    |  115 +
 .../bytedeco/tensorrt/nvinfer/IAlgorithm.java |   90 +
 .../tensorrt/nvinfer/IAlgorithmContext.java   |   72 +
 .../tensorrt/nvinfer/IAlgorithmIOInfo.java    |   60 +
 .../tensorrt/nvinfer/IAlgorithmSelector.java  |   76 +
 .../tensorrt/nvinfer/IAlgorithmVariant.java   |   52 +
 .../bytedeco/tensorrt/nvinfer/IBuilder.java   |  325 ++
 .../tensorrt/nvinfer/IBuilderConfig.java      |  769 +++
 .../tensorrt/nvinfer/IConcatenationLayer.java |   63 +
 .../tensorrt/nvinfer/IConstantLayer.java      |   87 +
 .../tensorrt/nvinfer/IConvolutionLayer.java   |  556 +++
 .../tensorrt/nvinfer/ICudaEngine.java         |  651 +++
 .../tensorrt/nvinfer/IDeconvolutionLayer.java |  517 ++
 .../tensorrt/nvinfer/IDequantizeLayer.java    |  104 +
 .../tensorrt/nvinfer/IDimensionExpr.java      |   46 +
 .../tensorrt/nvinfer/IElementWiseLayer.java   |   73 +
 .../tensorrt/nvinfer/IErrorRecorder.java      |  231 +
 .../tensorrt/nvinfer/IExecutionContext.java   |  654 +++
 .../tensorrt/nvinfer/IExprBuilder.java        |   54 +
 .../bytedeco/tensorrt/nvinfer/IFillLayer.java |  229 +
 .../nvinfer/IFullyConnectedLayer.java         |  152 +
 .../tensorrt/nvinfer/IGatherLayer.java        |   76 +
 .../tensorrt/nvinfer/IGpuAllocator.java       |  110 +
 .../tensorrt/nvinfer/IHostMemory.java         |   72 +
 .../tensorrt/nvinfer/IIdentityLayer.java      |   39 +
 .../tensorrt/nvinfer/IInt8Calibrator.java     |  130 +
 .../nvinfer/IInt8EntropyCalibrator.java       |   48 +
 .../nvinfer/IInt8EntropyCalibrator2.java      |   48 +
 .../nvinfer/IInt8LegacyCalibrator.java        |  111 +
 .../nvinfer/IInt8MinMaxCalibrator.java        |   47 +
 .../tensorrt/nvinfer/IIteratorLayer.java      |   43 +
 .../bytedeco/tensorrt/nvinfer/ILRNLayer.java  |  134 +
 .../org/bytedeco/tensorrt/nvinfer/ILayer.java |  291 ++
 .../bytedeco/tensorrt/nvinfer/ILogger.java    |   80 +
 .../org/bytedeco/tensorrt/nvinfer/ILoop.java  |  127 +
 .../tensorrt/nvinfer/ILoopBoundaryLayer.java  |   30 +
 .../tensorrt/nvinfer/ILoopOutputLayer.java    |   97 +
 .../nvinfer/IMatrixMultiplyLayer.java         |   72 +
 .../tensorrt/nvinfer/INetworkDefinition.java  | 1480 ++++++
 .../bytedeco/tensorrt/nvinfer/INoCopy.java    |   37 +
 .../nvinfer/IOptimizationProfile.java         |  231 +
 .../tensorrt/nvinfer/IPaddingLayer.java       |  162 +
 .../nvinfer/IParametricReLULayer.java         |   35 +
 .../bytedeco/tensorrt/nvinfer/IPlugin.java    |   27 +
 .../tensorrt/nvinfer/IPluginCreator.java      |  111 +
 .../bytedeco/tensorrt/nvinfer/IPluginExt.java |   27 +
 .../tensorrt/nvinfer/IPluginFactory.java      |   33 +
 .../tensorrt/nvinfer/IPluginLayer.java        |   27 +
 .../tensorrt/nvinfer/IPluginRegistry.java     |  135 +
 .../bytedeco/tensorrt/nvinfer/IPluginV2.java  |  297 ++
 .../tensorrt/nvinfer/IPluginV2DynamicExt.java |  233 +
 .../tensorrt/nvinfer/IPluginV2Ext.java        |  193 +
 .../tensorrt/nvinfer/IPluginV2IOExt.java      |   99 +
 .../tensorrt/nvinfer/IPluginV2Layer.java      |   43 +
 .../tensorrt/nvinfer/IPoolingLayer.java       |  433 ++
 .../bytedeco/tensorrt/nvinfer/IProfiler.java  |   57 +
 .../tensorrt/nvinfer/IQuantizeLayer.java      |  106 +
 .../tensorrt/nvinfer/IRNNv2Layer.java         |  339 ++
 .../tensorrt/nvinfer/IRaggedSoftMaxLayer.java |   42 +
 .../tensorrt/nvinfer/IRecurrenceLayer.java    |   46 +
 .../tensorrt/nvinfer/IReduceLayer.java        |  102 +
 .../bytedeco/tensorrt/nvinfer/IRefitter.java  |  336 ++
 .../tensorrt/nvinfer/IResizeLayer.java        |  323 ++
 .../bytedeco/tensorrt/nvinfer/IRuntime.java   |  191 +
 .../tensorrt/nvinfer/IScaleLayer.java         |  190 +
 .../tensorrt/nvinfer/ISelectLayer.java        |   31 +
 .../tensorrt/nvinfer/IShapeLayer.java         |   40 +
 .../tensorrt/nvinfer/IShuffleLayer.java       |  225 +
 .../tensorrt/nvinfer/ISliceLayer.java         |  213 +
 .../tensorrt/nvinfer/ISoftMaxLayer.java       |   82 +
 .../bytedeco/tensorrt/nvinfer/ITensor.java    |  414 ++
 .../tensorrt/nvinfer/ITimingCache.java        |  123 +
 .../bytedeco/tensorrt/nvinfer/ITopKLayer.java |  105 +
 .../tensorrt/nvinfer/ITripLimitLayer.java     |   29 +
 .../tensorrt/nvinfer/IUnaryLayer.java         |   54 +
 .../tensorrt/nvinfer/Permutation.java         |   48 +
 .../tensorrt/nvinfer/PluginField.java         |   81 +
 .../nvinfer/PluginFieldCollection.java        |   45 +
 .../tensorrt/nvinfer/PluginTensorDesc.java    |   58 +
 .../tensorrt/nvinfer/VActivationLayer.java    |   35 +
 .../bytedeco/tensorrt/nvinfer/VAlgorithm.java |   33 +
 .../tensorrt/nvinfer/VAlgorithmContext.java   |   33 +
 .../tensorrt/nvinfer/VAlgorithmIOInfo.java    |   31 +
 .../tensorrt/nvinfer/VAlgorithmVariant.java   |   30 +
 .../bytedeco/tensorrt/nvinfer/VBuilder.java   |   45 +
 .../tensorrt/nvinfer/VBuilderConfig.java      |   83 +
 .../tensorrt/nvinfer/VConcatenationLayer.java |   30 +
 .../tensorrt/nvinfer/VConstantLayer.java      |   32 +
 .../tensorrt/nvinfer/VConvolutionLayer.java   |   59 +
 .../tensorrt/nvinfer/VCudaEngine.java         |   63 +
 .../tensorrt/nvinfer/VDeconvolutionLayer.java |   57 +
 .../tensorrt/nvinfer/VDequantizeLayer.java    |   30 +
 .../tensorrt/nvinfer/VDimensionExpr.java      |   30 +
 .../tensorrt/nvinfer/VElementWiseLayer.java   |   31 +
 .../tensorrt/nvinfer/VExecutionContext.java   |   63 +
 .../tensorrt/nvinfer/VExprBuilder.java        |   33 +
 .../bytedeco/tensorrt/nvinfer/VFillLayer.java |   37 +
 .../nvinfer/VFullyConnectedLayer.java         |   34 +
 .../tensorrt/nvinfer/VGatherLayer.java        |   32 +
 .../tensorrt/nvinfer/VHostMemory.java         |   31 +
 .../tensorrt/nvinfer/VIdentityLayer.java      |   40 +
 .../tensorrt/nvinfer/VIteratorLayer.java      |   32 +
 .../bytedeco/tensorrt/nvinfer/VLRNLayer.java  |   36 +
 .../org/bytedeco/tensorrt/nvinfer/VLayer.java |   46 +
 .../org/bytedeco/tensorrt/nvinfer/VLoop.java  |   39 +
 .../tensorrt/nvinfer/VLoopBoundaryLayer.java  |   29 +
 .../tensorrt/nvinfer/VLoopOutputLayer.java    |   31 +
 .../nvinfer/VMatrixMultiplyLayer.java         |   31 +
 .../tensorrt/nvinfer/VNetworkDefinition.java  |  113 +
 .../nvinfer/VOptimizationProfile.java         |   51 +
 .../tensorrt/nvinfer/VPaddingLayer.java       |   36 +
 .../nvinfer/VParametricReLULayer.java         |   40 +
 .../tensorrt/nvinfer/VPluginLayer.java        |   29 +
 .../tensorrt/nvinfer/VPluginV2Layer.java      |   29 +
 .../tensorrt/nvinfer/VPoolingLayer.java       |   54 +
 .../tensorrt/nvinfer/VQuantizeLayer.java      |   30 +
 .../tensorrt/nvinfer/VRNNv2Layer.java         |   55 +
 .../tensorrt/nvinfer/VRaggedSoftMaxLayer.java |   40 +
 .../tensorrt/nvinfer/VRecurrenceLayer.java    |   40 +
 .../tensorrt/nvinfer/VReduceLayer.java        |   35 +
 .../bytedeco/tensorrt/nvinfer/VRefitter.java  |   61 +
 .../tensorrt/nvinfer/VResizeLayer.java        |   50 +
 .../org/bytedeco/tensorrt/nvinfer/VRoot.java  |   48 +
 .../bytedeco/tensorrt/nvinfer/VRuntime.java   |   36 +
 .../tensorrt/nvinfer/VScaleLayer.java         |   39 +
 .../tensorrt/nvinfer/VSelectLayer.java        |   27 +
 .../tensorrt/nvinfer/VShapeLayer.java         |   40 +
 .../tensorrt/nvinfer/VShuffleLayer.java       |   36 +
 .../tensorrt/nvinfer/VSliceLayer.java         |   37 +
 .../tensorrt/nvinfer/VSoftMaxLayer.java       |   30 +
 .../bytedeco/tensorrt/nvinfer/VTensor.java    |   52 +
 .../tensorrt/nvinfer/VTimingCache.java        |   31 +
 .../bytedeco/tensorrt/nvinfer/VTopKLayer.java |   35 +
 .../tensorrt/nvinfer/VTripLimitLayer.java     |   29 +
 .../tensorrt/nvinfer/VUnaryLayer.java         |   31 +
 .../bytedeco/tensorrt/nvinfer/Weights.java    |   58 +
 .../tensorrt/nvinfer/cublasContext.java       |   28 +
 .../tensorrt/nvinfer/cudnnContext.java        |   28 +
 .../DetectionOutputParameters.java            |   75 +
 .../nvinfer_plugin/GridAnchorParameters.java  |   63 +
 .../nvinfer_plugin/NMSParameters.java         |   67 +
 .../nvinfer_plugin/PriorBoxParameters.java    |   77 +
 .../tensorrt/nvinfer_plugin/Quadruple.java    |   49 +
 .../tensorrt/nvinfer_plugin/RPROIParams.java  |   67 +
 .../nvinfer_plugin/RegionParameters.java      |   56 +
 .../tensorrt/nvinfer_plugin/softmaxTree.java  |   56 +
 .../tensorrt/nvonnxparser/IParser.java        |  134 +
 .../tensorrt/nvonnxparser/IParserError.java   |   54 +
 .../nvonnxparser/SubGraphCollection_t.java    |   93 +
 .../tensorrt/nvonnxparser/SubGraph_t.java     |   45 +
 .../tensorrt/nvparsers/FieldCollection.java   |   46 +
 .../bytedeco/tensorrt/nvparsers/FieldMap.java |   54 +
 .../tensorrt/nvparsers/IBinaryProtoBlob.java  |   54 +
 .../tensorrt/nvparsers/IBlobNameToTensor.java |   51 +
 .../tensorrt/nvparsers/ICaffeParser.java      |  195 +
 .../tensorrt/nvparsers/IPluginFactoryV2.java  |   66 +
 .../tensorrt/nvparsers/IUffParser.java        |  180 +
 .../tritonserver/global/tritonserver.java     | 4400 +++++++++++++++++
 .../tritonserver/TRITONBACKEND_Backend.java   |   35 +
 .../tritonserver/TRITONBACKEND_Input.java     |   35 +
 .../TRITONBACKEND_MemoryManager.java          |   38 +
 .../tritonserver/TRITONBACKEND_Model.java     |   35 +
 .../TRITONBACKEND_ModelInstance.java          |   41 +
 .../tritonserver/TRITONBACKEND_Output.java    |   35 +
 .../tritonserver/TRITONBACKEND_Request.java   |   35 +
 .../tritonserver/TRITONBACKEND_Response.java  |   35 +
 .../TRITONBACKEND_ResponseFactory.java        |   35 +
 .../tritonserver/TRITONREPOAGENT_Agent.java   |   38 +
 .../TRITONREPOAGENT_AgentModel.java           |   41 +
 .../tritonserver/TRITONSERVER_Error.java      |   38 +
 .../TRITONSERVER_InferenceRequest.java        |   35 +
 ...TONSERVER_InferenceRequestReleaseFn_t.java |   64 +
 .../TRITONSERVER_InferenceResponse.java       |   35 +
 ...NSERVER_InferenceResponseCompleteFn_t.java |   59 +
 .../TRITONSERVER_InferenceTrace.java          |   35 +
 ...ITONSERVER_InferenceTraceActivityFn_t.java |   47 +
 ...RITONSERVER_InferenceTraceReleaseFn_t.java |   48 +
 .../tritonserver/TRITONSERVER_Message.java    |   35 +
 .../tritonserver/TRITONSERVER_Metrics.java    |   35 +
 .../TRITONSERVER_ResponseAllocator.java       |   35 +
 ...ITONSERVER_ResponseAllocatorAllocFn_t.java |   81 +
 ...ONSERVER_ResponseAllocatorReleaseFn_t.java |   60 +
 ...ITONSERVER_ResponseAllocatorStartFn_t.java |   55 +
 .../tritonserver/TRITONSERVER_Server.java     |   35 +
 .../TRITONSERVER_ServerOptions.java           |   41 +
 .../tritonserver/presets/tritonserver.java    |  127 +
 tritonserver/src/main/java9/module-info.java  |    8 +
 205 files changed, 28292 insertions(+), 3 deletions(-)
 create mode 100644 tritonserver/README.md
 create mode 100644 tritonserver/cppbuild.sh
 create mode 100644 tritonserver/platform/pom.xml
 create mode 100644 tritonserver/platform/redist/pom.xml
 create mode 100644 tritonserver/pom.xml
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
 create mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
 create mode 100644 tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
 create mode 100644 tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
 create mode 100644 tritonserver/src/main/java9/module-info.java

diff --git a/pom.xml b/pom.xml
index 8577272a06f..04afed7e31c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -617,7 +617,8 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-        <module>tensorrt</module>
+	<module>tensorrt</module>
+	<module>tritonserver</module>
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
@@ -1367,7 +1368,8 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-        <module>tensorrt</module>
+	<module>tensorrt</module>
+        <module>tritonserver</module>		
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
@@ -1592,7 +1594,7 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-        <module>tensorrt</module>
+	<module>tensorrt</module>
         <module>ale</module>
         <module>onnx</module>
         <module>onnxruntime</module>
diff --git a/tritonserver/README.md b/tritonserver/README.md
new file mode 100644
index 00000000000..f157200b1be
--- /dev/null
+++ b/tritonserver/README.md
@@ -0,0 +1,306 @@
+JavaCPP Presets for TensorRT
+============================
+
+[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tensorrt/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tensorrt) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/tensorrt.svg)](http://bytedeco.org/builds/)  
+<sup>Build status for all platforms:</sup> [![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)  <sup>Commercial support:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
+
+
+License Agreements
+------------------
+By downloading these archives, you agree to the terms of the license agreements for NVIDIA software included in the archives.
+
+### TensorRT
+To view the license for TensorRT included in these archives, click [here](https://docs.nvidia.com/deeplearning/tensorrt/sla/)
+
+
+Introduction
+------------
+This directory contains the JavaCPP Presets module for:
+
+ * TensorRT 8.0.1.6  https://developer.nvidia.com/tensorrt
+
+Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
+
+
+Documentation
+-------------
+Java API documentation is available here:
+
+ * http://bytedeco.org/javacpp-presets/tensorrt/apidocs/
+
+
+Sample Usage
+------------
+Here is a simple example of TensorRT ported to Java from the `sampleGoogleNet.cpp` sample file included in `TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-9.0.cudnn7.0.tar.gz` available at:
+
+ * https://developer.nvidia.com/nvidia-tensorrt-download
+
+We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the `pom.xml` and `SampleGoogleNet.java` source files below, simply execute on the command line:
+```bash
+ $ mvn compile exec:java
+```
+
+### The `pom.xml` build file
+```xml
+<project>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.bytedeco.tensorrt</groupId>
+    <artifactId>samplegooglenet</artifactId>
+    <version>1.5.6</version>
+    <properties>
+        <exec.mainClass>SampleGoogleNet</exec.mainClass>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tensorrt-platform</artifactId>
+            <version>8.0-1.5.6</version>
+        </dependency>
+
+        <!-- Additional dependencies to use bundled CUDA, cuDNN, NCCL, and TensorRT -->
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>cuda-platform-redist</artifactId>
+            <version>11.4-8.2-1.5.6</version>
+        </dependency>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tensorrt-platform-redist</artifactId>
+            <version>8.0-1.5.6</version>
+        </dependency>
+
+    </dependencies>
+    <build>
+        <sourceDirectory>.</sourceDirectory>
+    </build>
+</project>
+```
+
+### The `SampleGoogleNet.java` source file
+```java
+import java.io.*;
+import java.util.*;
+import org.bytedeco.javacpp.*;
+
+import org.bytedeco.cuda.cudart.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+public class SampleGoogleNet {
+    static void CHECK(int status)
+    {
+        if (status != 0)
+        {
+            System.out.println("Cuda failure: " + status);
+            System.exit(6);
+        }
+    }
+
+    // Logger for GIE info/warning/errors
+    static class Logger extends ILogger
+    {
+        @Override public void log(Severity severity, String msg)
+        {
+            severity = severity.intern();
+
+            // suppress info-level messages
+            if (severity == Severity.kINFO) return;
+
+            switch (severity)
+            {
+                case kINTERNAL_ERROR: System.err.print("INTERNAL_ERROR: "); break;
+                case kERROR: System.err.print("ERROR: "); break;
+                case kWARNING: System.err.print("WARNING: "); break;
+                case kINFO: System.err.print("INFO: "); break;
+                default: System.err.print("UNKNOWN: "); break;
+            }
+            System.err.println(msg);
+        }
+    }
+    static Logger gLogger = new Logger();
+
+    static String locateFile(String input, String[] directories)
+    {
+        String file = "";
+        int MAX_DEPTH = 10;
+        boolean found = false;
+        for (String dir : directories)
+        {
+            file = dir + input;
+            for (int i = 0; i < MAX_DEPTH && !found; i++)
+            {
+                File checkFile = new File(file);
+                found = checkFile.exists();
+                if (found) break;
+                file = "../" + file;
+            }
+            if (found) break;
+            file = "";
+        }
+
+        if (file.isEmpty())
+            System.err.println("Could not find a file due to it not existing in the data directory.");
+        return file;
+    }
+
+    // stuff we know about the network and the caffe input/output blobs
+
+    static int BATCH_SIZE = 4;
+    static int TIMING_ITERATIONS = 1000;
+
+    static String INPUT_BLOB_NAME = "data";
+    static String OUTPUT_BLOB_NAME = "prob";
+
+
+    static String locateFile(String input)
+    {
+        String[] dirs = {"data/samples/googlenet/", "data/googlenet/"};
+        return locateFile(input, dirs);
+    }
+
+    static class Profiler extends IProfiler
+    {
+        LinkedHashMap<String, Float> mProfile = new LinkedHashMap<String, Float>();
+
+        @Override public void reportLayerTime(String layerName, float ms)
+        {
+            Float time = mProfile.get(layerName);
+            mProfile.put(layerName, (time != null ? time : 0) + ms);
+        }
+
+        public void printLayerTimes()
+        {
+            float totalTime = 0;
+            for (Map.Entry<String,Float> e : mProfile.entrySet())
+            {
+                System.out.printf("%-40.40s %4.3fms\n", e.getKey(), e.getValue() / TIMING_ITERATIONS);
+                totalTime += e.getValue();
+            }
+            System.out.printf("Time over all layers: %4.3f\n", totalTime / TIMING_ITERATIONS);
+        }
+
+    }
+    static Profiler gProfiler = new Profiler();
+
+    static void caffeToGIEModel(String deployFile,     // name for caffe prototxt
+                         String modelFile,             // name for model 
+                         String[] outputs,             // network outputs
+                         int maxBatchSize,             // batch size - NB must be at least as large as the batch we want to run with)
+                         IHostMemory[] gieModelStream)
+    {
+        // create API root class - must span the lifetime of the engine usage
+        IBuilder builder = createInferBuilder(gLogger);
+        INetworkDefinition network = builder.createNetwork();
+
+        // parse the caffe model to populate the network, then set the outputs
+        ICaffeParser parser = createCaffeParser();
+
+        boolean useFp16 = builder.platformHasFastFp16();
+
+        DataType modelDataType = useFp16 ? DataType.kHALF : DataType.kFLOAT; // create a 16-bit model if it's natively supported
+        IBlobNameToTensor blobNameToTensor =
+            parser.parse(locateFile(deployFile),                // caffe deploy file
+                                     locateFile(modelFile),     // caffe model file
+                                     network,                   // network definition that the parser will populate
+                                     modelDataType);
+
+        assert blobNameToTensor != null;
+        // the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate    
+        for (String s : outputs)
+            network.markOutput(blobNameToTensor.find(s));
+
+        // Build the engine
+        builder.setMaxBatchSize(maxBatchSize);
+        builder.setMaxWorkspaceSize(16 << 20);
+
+        // set up the network for paired-fp16 format if available
+        if(useFp16)
+            builder.setHalf2Mode(true);
+
+        ICudaEngine engine = builder.buildCudaEngine(network);
+        assert engine != null;
+
+        // we don't need the network any more, and we can destroy the parser
+        network.destroy();
+        parser.destroy();
+
+        // serialize the engine, then close everything down
+        gieModelStream[0] = engine.serialize();
+        engine.destroy();
+        builder.destroy();
+        shutdownProtobufLibrary();
+    }
+
+    static void timeInference(ICudaEngine engine, int batchSize)
+    {
+        // input and output buffer pointers that we pass to the engine - the engine requires exactly ICudaEngine::getNbBindings(),
+        // of these, but in this case we know that there is exactly one input and one output.
+        assert engine.getNbBindings() == 2;
+        PointerPointer buffers = new PointerPointer(2);
+
+        // In order to bind the buffers, we need to know the names of the input and output tensors.
+        // note that indices are guaranteed to be less than ICudaEngine::getNbBindings()
+        int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME), outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
+
+        // allocate GPU buffers
+        DimsCHW inputDims = new DimsCHW(engine.getBindingDimensions(inputIndex)), outputDims = new DimsCHW(engine.getBindingDimensions(outputIndex));
+        long inputSize = batchSize * inputDims.c().get() * inputDims.h().get() * inputDims.w().get() * Float.SIZE / 8;
+        long outputSize = batchSize * outputDims.c().get() * outputDims.h().get() * outputDims.w().get() * Float.SIZE / 8;
+
+        CHECK(cudaMalloc(buffers.position(inputIndex), inputSize));
+        CHECK(cudaMalloc(buffers.position(outputIndex), outputSize));
+
+        IExecutionContext context = engine.createExecutionContext();
+        context.setProfiler(gProfiler);
+
+        // zero the input buffer
+        CHECK(cudaMemset(buffers.position(inputIndex).get(), 0, inputSize));
+
+        for (int i = 0; i < TIMING_ITERATIONS;i++)
+            context.execute(batchSize, buffers.position(0));
+
+        // release the context and buffers
+        context.destroy();
+        CHECK(cudaFree(buffers.position(inputIndex).get()));
+        CHECK(cudaFree(buffers.position(outputIndex).get()));
+    }
+
+
+    public static void main(String[] args)
+    {
+        System.out.println("Building and running a GPU inference engine for GoogleNet, N=4...");
+
+        // parse the caffe model and the mean file
+        IHostMemory[] gieModelStream = { null };
+        caffeToGIEModel("googlenet.prototxt", "googlenet.caffemodel", new String[] { OUTPUT_BLOB_NAME }, BATCH_SIZE, gieModelStream);
+
+        // create an engine
+        IRuntime infer = createInferRuntime(gLogger);
+        ICudaEngine engine = infer.deserializeCudaEngine(gieModelStream[0].data(), gieModelStream[0].size(), null);
+
+        System.out.println("Bindings after deserializing:"); 
+        for (int bi = 0; bi < engine.getNbBindings(); bi++) { 
+            if (engine.bindingIsInput(bi)) { 
+                System.out.printf("Binding %d (%s): Input.\n",  bi, engine.getBindingName(bi));
+            } else { 
+                System.out.printf("Binding %d (%s): Output.\n", bi, engine.getBindingName(bi));
+            } 
+        }
+
+        // run inference with null data to time network performance
+        timeInference(engine, BATCH_SIZE);
+
+        engine.destroy();
+        infer.destroy();
+
+        gProfiler.printLayerTimes();
+
+        System.out.println("Done.");
+
+        System.exit(0);
+    }
+}
+```
diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
new file mode 100644
index 00000000000..01261c2e45f
--- /dev/null
+++ b/tritonserver/cppbuild.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# This file is meant to be included by the parent cppbuild.sh script
+if [[ -z "$PLATFORM" ]]; then
+    pushd ..
+    bash cppbuild.sh "$@" tritonserver
+    popd
+    exit
+fi
+
+case $PLATFORM in
+    linux-arm64)
+        if [[ ! -f "/opt/tritonserver/include/triton/core/tritonserver.h" ]] && [[ ! -d "/opt/tritonserver/lib/" ]]; then
+            echo "Please make sure library and include files exist"
+            exit 1
+        fi
+        ;;
+    linux-x86_64)
+        if [[ ! -f "/opt/tritonserver/include/triton/core/tritonserver.h" ]] && [[ ! -d "/opt/tritonserver/lib/" ]]; then
+            echo "Please make sure library and include files exist"
+            exit 1
+        fi
+        ;;
+    windows-x86_64)
+        if [[ ! -f "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/include/NvInfer.h" ]]; then
+            echo "Please install TensorRT in C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/"
+            exit 1
+        fi
+        ;;
+    *)
+        echo "Error: Platform \"$PLATFORM\" is not supported"
+        ;;
+esac
diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
new file mode 100644
index 00000000000..9ef9770c490
--- /dev/null
+++ b/tritonserver/platform/pom.xml
@@ -0,0 +1,122 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.6</version>
+    <relativePath>../../</relativePath>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver-platform</artifactId>
+  <version>8.0-${project.parent.version}</version>
+  <name>JavaCPP Presets Platform for Tritonserver</name>
+
+  <properties>
+    <javacpp.moduleId>tritonserver</javacpp.moduleId>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>cuda-platform</artifactId>
+      <version>11.4-8.2-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-arm64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-x86_64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.windows-x86_64}</classifier>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <configuration>
+              <archive>
+                <manifestEntries>
+                  <Class-Path>${javacpp.moduleId}.jar ${javacpp.moduleId}-linux-arm64.jar ${javacpp.moduleId}-linux-x86_64.jar ${javacpp.moduleId}-windows-x86_64.jar</Class-Path>
+                </manifestEntries>
+              </archive>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-javadoc-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>javadoc</classifier>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-sources-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>sources</classifier>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-infos</id>
+            <phase>none</phase>
+          </execution>
+          <execution>
+            <id>add-platform-module-info</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}.jar</file>
+                  <moduleInfoSource>
+                    module org.bytedeco.${javacpp.moduleId}.platform {
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.arm64;
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.x86_64;
+                      requires static org.bytedeco.${javacpp.moduleId}.windows.x86_64;
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
new file mode 100644
index 00000000000..cf514cb4214
--- /dev/null
+++ b/tritonserver/platform/redist/pom.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.6</version>
+    <relativePath>../../../</relativePath>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver-platform-redist</artifactId>
+  <version>8.0-${project.parent.version}</version>
+  <name>JavaCPP Presets Platform Redist for Tritonserver</name>
+
+  <properties>
+    <javacpp.moduleId>tritonserver</javacpp.moduleId>
+    <javacpp.platform.extension>-redist</javacpp.platform.extension>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}-platform</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-arm64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-x86_64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.windows-x86_64}</classifier>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <configuration>
+              <archive>
+                <manifestEntries>
+                  <Class-Path>${javacpp.moduleId}.jar ${javacpp.moduleId}-linux-arm64-redist.jar ${javacpp.moduleId}-linux-x86_64-redist.jar ${javacpp.moduleId}-windows-x86_64-redist.jar</Class-Path>
+                </manifestEntries>
+              </archive>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-javadoc-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>javadoc</classifier>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-sources-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>sources</classifier>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-infos</id>
+            <phase>none</phase>
+          </execution>
+          <execution>
+            <id>add-platform-module-info</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}.jar</file>
+                  <moduleInfoSource>
+                    module org.bytedeco.${javacpp.moduleId}.platform.redist {
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.arm64.redist;
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.x86_64.redist;
+                      requires static org.bytedeco.${javacpp.moduleId}.windows.x86_64.redist;
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
new file mode 100644
index 00000000000..cc647a9e871
--- /dev/null
+++ b/tritonserver/pom.xml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.6</version>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver</artifactId>
+  <version>8.0-${project.parent.version}</version>
+  <name>JavaCPP Presets for Tritonserver</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>cuda</artifactId>
+      <version>11.4-8.2-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tensorrt</artifactId>
+      <version>8.0-${project.parent.version}</version>
+      </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>javacpp</artifactId>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.bytedeco</groupId>
+        <artifactId>javacpp</artifactId>
+        <configuration>
+          <encoding>ISO-8859-1</encoding>
+        </configuration>
+        <dependencies>
+          <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>cuda</artifactId>
+            <version>11.4-8.2-${project.parent.version}</version>
+          </dependency>
+	  <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tensorrt</artifactId>
+            <version>8.0-${project.parent.version}</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>javacpp-${javacpp.platform}</id>
+            <phase>package</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>${javacpp.platform}</classifier>
+              <includes>
+                <include>org/bytedeco/tritonserver/${javacpp.platform}/*jni*</include>
+                <include>META-INF/native-image/${javacpp.platform}/</include>
+              </includes>
+            </configuration>
+          </execution>
+          <execution>
+            <id>javacpp-${javacpp.platform}-redist</id>
+            <phase>package</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>${javacpp.platform}-redist</classifier>
+              <classesDirectory>${project.build.directory}/native</classesDirectory>
+              <includes>
+                <include>org/bytedeco/tritonserver/${javacpp.platform}/</include>
+                <include>META-INF/native-image/${javacpp.platform}/</include>
+              </includes>
+              <excludes>
+                <exclude>org/bytedeco/tritonserver/${javacpp.platform}/*jni*</exclude>
+              </excludes>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-info-redist</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}-${javacpp.platform}-redist.jar</file>
+                  <moduleInfoSource>
+                    open module org.bytedeco.${javacpp.packageName}.${javacpp.platform.module}.redist {
+                      requires transitive org.bytedeco.${javacpp.packageName};
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <encoding>ISO-8859-1</encoding>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
new file mode 100644
index 00000000000..66234e0c548
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
@@ -0,0 +1,3248 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.global;
+
+import org.bytedeco.tensorrt.nvinfer.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer {
+    static { Loader.load(); }
+
+// Parsed from NvInferVersion.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+ /**
+ /** \file NvInferVersion.h
+ /**
+ /** Defines the TensorRT version
+ /** */
+
+// #ifndef NV_INFER_VERSION_H
+// #define NV_INFER_VERSION_H
+
+/** TensorRT major version. */
+public static final int NV_TENSORRT_MAJOR = 8;
+/** TensorRT minor version. */
+public static final int NV_TENSORRT_MINOR = 0;
+/** TensorRT patch version. */
+public static final int NV_TENSORRT_PATCH = 1;
+/** TensorRT build number. */
+public static final int NV_TENSORRT_BUILD = 6;
+
+/** Shared object library major version number. */
+public static final int NV_TENSORRT_SONAME_MAJOR = 8;
+/** Shared object library minor version number. */
+public static final int NV_TENSORRT_SONAME_MINOR = 0;
+/** Shared object library patch version number. */
+public static final int NV_TENSORRT_SONAME_PATCH = 1;
+
+// #endif // NV_INFER_VERSION_H
+
+
+// Parsed from NvInferRuntimeCommon.h
+
+/*
+ * Copyright (c) 1993-2021 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_RUNTIME_COMMON_H
+// #define NV_INFER_RUNTIME_COMMON_H
+
+// #include "NvInferVersion.h"
+// #include <cstddef>
+
+
+/** Items that are marked as deprecated will be removed in a future release. */
+// #include <cstdint>
+// #if __cplusplus >= 201402L
+// #define TRT_DEPRECATED [[deprecated]]
+// #if __GNUC__ < 6
+// #define TRT_DEPRECATED_ENUM
+// #else
+// #define TRT_DEPRECATED_ENUM TRT_DEPRECATED
+// #endif
+// #ifdef _MSC_VER
+// #define TRT_DEPRECATED_API __declspec(dllexport)
+// #else
+// #define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
+// #endif
+// #else
+// #ifdef _MSC_VER
+// #define TRT_DEPRECATED
+// #define TRT_DEPRECATED_ENUM
+// #define TRT_DEPRECATED_API __declspec(dllexport)
+// #else
+// #define TRT_DEPRECATED __attribute__((deprecated))
+// #define TRT_DEPRECATED_ENUM
+
+
+/** Defines which symbols are exported */
+// #define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
+// #endif
+// #endif
+// #ifdef TENSORRT_BUILD_LIB
+// #ifdef _MSC_VER
+// #define TENSORRTAPI __declspec(dllexport)
+// #else
+// #define TENSORRTAPI __attribute__((visibility("default")))
+// #endif
+// #else
+// #define TENSORRTAPI
+// #endif
+
+//!
+//!
+//!
+// #define TRTNOEXCEPT
+/**
+ *  \file NvInferRuntimeCommon.h
+ * 
+ *  This is the top-level API file for TensorRT core runtime library.
+ *  */
+
+// forward declare some CUDA types to avoid an include dependency
+// Targeting ../nvinfer/cublasContext.java
+
+
+// Targeting ../nvinfer/cudnnContext.java
+
+
+
+    /** Forward declaration of cudaStream_t. */
+
+    /** Forward declaration of cudaEvent_t. */
+
+
+//!
+//!
+//!
+public static native @MemberGetter int NV_TENSORRT_VERSION();
+public static final int NV_TENSORRT_VERSION = NV_TENSORRT_VERSION();
+/**
+ *  \namespace nvinfer1
+ * 
+ *  \brief The TensorRT API version 1 namespace.
+ *  */
+
+@Namespace("nvinfer1") @MemberGetter public static native int kNV_TENSORRT_VERSION_IMPL();
+public static final int kNV_TENSORRT_VERSION_IMPL = kNV_TENSORRT_VERSION_IMPL(); // major, minor, patch
+
+/** char_t is the type used by TensorRT to represent all valid characters. */
+/** AsciiChar is the type used by TensorRT to represent valid ASCII characters. */
+
+/** Forward declare IErrorRecorder for use in other interfaces. */
+/** Forward declare IGpuAllocator for use in other interfaces. */
+/** Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type. */
+ // namespace impl
+
+/** Maximum number of elements in an enumeration type. */
+
+
+/**
+ *  \enum DataType
+ *  \brief The type of weights and tensors.
+ *  */
+@Namespace("nvinfer1") public enum DataType {
+    /** 32-bit floating point format. */
+    kFLOAT(0),
+
+    /** IEEE 16-bit floating-point format. */
+    kHALF(1),
+
+    /** 8-bit integer representing a quantized floating-point value. */
+    kINT8(2),
+
+    /** Signed 32-bit integer format. */
+    kINT32(3),
+
+    /** 8-bit boolean. 0 = false, 1 = true, other values undefined. */
+    kBOOL(4);
+
+    public final int value;
+    private DataType(int v) { this.value = v; }
+    private DataType(DataType e) { this.value = e.value; }
+    public DataType intern() { for (DataType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvinfer/EnumMaxImpl.java
+
+
+
+// Targeting ../nvinfer/Dims32.java
+
+
+
+/**
+ *  Alias for Dims32.
+ * 
+ *  \warning: This alias might change in the future.
+ *  */
+
+
+//!
+//!
+//!
+//!
+//!
+//!
+
+/**
+ *  \enum TensorFormat
+ * 
+ *  \brief Format of the input/output tensors.
+ * 
+ *  This enum is extended to be used by both plugins and reformat-free network
+ *  I/O tensors.
+ * 
+ *  @see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat()
+ * 
+ *  For more information about data formats, see the topic "Data Format Description" located in the
+ *  TensorRT Developer Guide.
+ *  */
+@Namespace("nvinfer1") public enum TensorFormat {
+    /** Row major linear format.
+     *  For a tensor with dimensions {N, C, H, W} or {numbers, channels,
+     *  columns, rows}, the dimensional index corresponds to {3, 2, 1, 0}
+     *  and thus the order is W minor.
+     * 
+     *  For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192].
+     *  */
+    kLINEAR(0),
+
+    /** Two wide channel vectorized row major format. This format is bound to
+     *  FP16. It is only available for dimensions >= 3.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][(C+1)/2][H][W][2], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][c/2][h][w][c%2]. */
+    kCHW2(1),
+
+    /** Eight channel format where C is padded to a multiple of 8. This format
+     *  is bound to FP16. It is only available for dimensions >= 3.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to the array with dimensions
+     *  [N][H][W][(C+7)/8*8], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][h][w][c]. */
+    
+//!
+//!
+    kHWC8(2),
+
+    /** Four wide channel vectorized row major format. This format is bound to
+     *  INT8 or FP16. It is only available for dimensions >= 3.
+     *  For INT8, the C dimension must be a build-time constant.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][(C+3)/4][H][W][4], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][c/4][h][w][c%4].
+     * 
+     *  Deprecated usage:
+     * 
+     *  If running on the DLA, this format can be used for acceleration
+     *  with the caveat that C must be equal or lesser than 4.
+     *  If used as DLA input with allowGPUFallback disable, it needs to meet
+     *  line stride requirement of DLA format. Column stride in bytes should
+     *  be multiple of 32. */
+    
+//!
+//!
+    kCHW4(3),
+
+    /** Sixteen wide channel vectorized row major format. This format is bound
+     *  to FP16. It is only available for dimensions >= 3.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][(C+15)/16][H][W][16], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][c/16][h][w][c%16].
+     * 
+     *  For DLA usage, this format maps to the native image format for FP16,
+     *  and the tensor sizes are limited to C,H,W in the range [1,8192].
+     *  */
+    
+//!
+    kCHW16(4),
+
+    /** Thirty-two wide channel vectorized row major format. This format is
+     *  only available for dimensions >= 3.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][(C+31)/32][H][W][32], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][c/32][h][w][c%32].
+     * 
+     *  For DLA usage, this format maps to the native image format for INT8,
+     *  and the tensor sizes are limited to C,H,W in the range [1,8192]. */
+    kCHW32(5),
+
+    /** Eight channel format where C is padded to a multiple of 8. This format
+     *  is bound to FP16, and it is only available for dimensions >= 4.
+     *  For a tensor with dimensions {N, C, D, H, W},
+     *  the memory layout is equivalent to an array with dimensions
+     *  [N][D][H][W][(C+7)/8*8], with the tensor coordinates (n, c, d, h, w)
+     *  mapping to array subscript [n][d][h][w][c]. */
+    kDHWC8(6),
+
+    /** Thirty-two wide channel vectorized row major format. This format is
+     *  bound to FP16 and INT8 and is only available for dimensions >= 4.
+     *  For a tensor with dimensions {N, C, D, H, W},
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][(C+31)/32][D][H][W][32], with the tensor coordinates (n, c, d, h, w)
+     *  mapping to array subscript [n][c/32][d][h][w][c%32]. */
+    kCDHW32(7),
+
+    /** Non-vectorized channel-last format. This format is bound to FP32
+     *  and is only available for dimensions >= 3. */
+    
+//!
+    kHWC(8),
+
+    /** DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis
+     *  always has unit stride. The stride for stepping along the H axis is
+     *  rounded up to 64 bytes.
+     * 
+     *  The memory layout is equivalent to a C array with dimensions
+     *  [N][C][H][roundUp(W, 64/elementSize)] where elementSize is
+     *  2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][c][h][w]. */
+    
+//!
+    kDLA_LINEAR(9),
+
+    /** DLA image format. For a tensor with dimension {N, C, H, W} the C axis
+     *  always has unit stride. The stride for stepping along the H axis is rounded up
+     *  to 32 bytes. C can only be 1, 3 or 4.
+     *  If C == 1, it will map to grayscale format.
+     *  If C == 3 or C == 4, it will map to color image format. And if C == 3,
+     *  the stride for stepping along the W axis needs to be padded to 4 in elements.
+     * 
+     *  When C is {1, 3, 4}, then C' is {1, 4, 4} respectively,
+     *  the memory layout is equivalent to a C array with dimensions
+     *  [N][H][roundUp(W, 32/C'/elementSize)][C'] where elementSize is 2 for FP16
+     *  and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array
+     *  subscript [n][h][w][c]. */
+    kDLA_HWC4(10),
+
+    /** Sixteen channel format where C is padded to a multiple of 16. This format
+     *  is bound to FP16. It is only available for dimensions >= 3.
+     *  For a tensor with dimensions {N, C, H, W},
+     *  the memory layout is equivalent to the array with dimensions
+     *  [N][H][W][(C+15)/16*16], with the tensor coordinates (n, c, h, w)
+     *  mapping to array subscript [n][h][w][c]. */
+    kHWC16(11);
+
+    public final int value;
+    private TensorFormat(int v) { this.value = v; }
+    private TensorFormat(TensorFormat e) { this.value = e.value; }
+    public TensorFormat intern() { for (TensorFormat e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/**
+ *  \brief PluginFormat is reserved for backward compatibility.
+ * 
+ *  @see IPluginV2::supportsFormat()
+ *  */
+/** Maximum number of elements in TensorFormat enum. @see TensorFormat */
+
+// Targeting ../nvinfer/PluginTensorDesc.java
+
+
+
+/** \struct PluginVersion
+ * 
+ *  \brief Definition of plugin versions.
+ * 
+ *  Tag for plug-in versions.  Used in upper byte of getTensorRTVersion().
+ *  */
+@Namespace("nvinfer1") public enum PluginVersion {
+    /** IPluginV2 */
+    kV2((byte)(0)),
+    /** IPluginV2Ext */
+    kV2_EXT((byte)(1)),
+    /** IPluginV2IOExt */
+    kV2_IOEXT((byte)(2)),
+    /** IPluginV2DynamicExt */
+    kV2_DYNAMICEXT((byte)(3));
+
+    public final byte value;
+    private PluginVersion(byte v) { this.value = v; }
+    private PluginVersion(PluginVersion e) { this.value = e.value; }
+    public PluginVersion intern() { for (PluginVersion e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvinfer/IPluginV2.java
+
+
+// Targeting ../nvinfer/IPluginV2Ext.java
+
+
+// Targeting ../nvinfer/IPluginV2IOExt.java
+
+
+
+/**
+ *  \enum FieldType
+ *  \brief The possible field types for custom layer.
+ *  */
+
+@Namespace("nvinfer1") public enum PluginFieldType {
+    /** FP16 field type. */
+    kFLOAT16(0),
+    /** FP32 field type. */
+    kFLOAT32(1),
+    /** FP64 field type. */
+    kFLOAT64(2),
+    /** INT8 field type. */
+    kINT8(3),
+    /** INT16 field type. */
+    kINT16(4),
+    /** INT32 field type. */
+    kINT32(5),
+    /** char field type. */
+    kCHAR(6),
+    /** nvinfer1::Dims field type. */
+    kDIMS(7),
+    /** Unknown field type. */
+    kUNKNOWN(8);
+
+    public final int value;
+    private PluginFieldType(int v) { this.value = v; }
+    private PluginFieldType(PluginFieldType e) { this.value = e.value; }
+    public PluginFieldType intern() { for (PluginFieldType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvinfer/PluginField.java
+
+
+// Targeting ../nvinfer/PluginFieldCollection.java
+
+
+// Targeting ../nvinfer/IPluginCreator.java
+
+
+// Targeting ../nvinfer/IPluginRegistry.java
+
+
+
+@Namespace("nvinfer1") public enum AllocatorFlag {
+    /** TensorRT may call realloc() on this allocation */
+    kRESIZABLE(0);
+
+    public final int value;
+    private AllocatorFlag(int v) { this.value = v; }
+    private AllocatorFlag(AllocatorFlag e) { this.value = e.value; }
+    public AllocatorFlag intern() { for (AllocatorFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in AllocatorFlag enum. @see AllocatorFlag */
+ // namespace impl
+
+
+
+//!
+//!
+//!
+// Targeting ../nvinfer/IGpuAllocator.java
+
+
+// Targeting ../nvinfer/ILogger.java
+
+
+/** Maximum number of elements in ILogger::Severity enum. @see ILogger::Severity */
+ // namespace impl
+
+/**
+ *  \enum ErrorCode
+ * 
+ *  \brief Error codes that can be returned by TensorRT during execution.
+ *  */
+@Namespace("nvinfer1") public enum ErrorCode {
+    /**
+     *  Execution completed successfully.
+     *  */
+    
+
+//!
+//!
+    kSUCCESS(0),
+
+    /**
+     *  An error that does not fall into any other category. This error is included for forward compatibility.
+     *  */
+    
+
+//!
+//!
+    kUNSPECIFIED_ERROR(1),
+
+    /**
+     *  A non-recoverable TensorRT error occurred. TensorRT is in an invalid internal state when this error is
+     *  emitted and any further calls to TensorRT will result in undefined behavior.
+     *  */
+    
+
+//!
+//!
+    kINTERNAL_ERROR(2),
+
+    /**
+     *  An argument passed to the function is invalid in isolation.
+     *  This is a violation of the API contract.
+     *  */
+    
+
+//!
+//!
+    kINVALID_ARGUMENT(3),
+
+    /**
+     *  An error occurred when comparing the state of an argument relative to other arguments. For example, the
+     *  dimensions for concat differ between two tensors outside of the channel dimension. This error is triggered
+     *  when an argument is correct in isolation, but not relative to other arguments. This is to help to distinguish
+     *  from the simple errors from the more complex errors.
+     *  This is a violation of the API contract.
+     *  */
+    
+
+//!
+//!
+    kINVALID_CONFIG(4),
+
+    /**
+     *  An error occurred when performing an allocation of memory on the host or the device.
+     *  A memory allocation error is normally fatal, but in the case where the application provided its own memory
+     *  allocation routine, it is possible to increase the pool of available memory and resume execution.
+     *  */
+    
+
+//!
+//!
+    kFAILED_ALLOCATION(5),
+
+    /**
+     *  One, or more, of the components that TensorRT relies on did not initialize correctly.
+     *  This is a system setup issue.
+     *  */
+    
+
+//!
+//!
+    kFAILED_INITIALIZATION(6),
+
+    /**
+     *  An error occurred during execution that caused TensorRT to end prematurely, either an asynchronous error or
+     *  other execution errors reported by CUDA/DLA. In a dynamic system, the
+     *  data can be thrown away and the next frame can be processed or execution can be retried.
+     *  This is either an execution error or a memory error.
+     *  */
+    
+
+//!
+//!
+    kFAILED_EXECUTION(7),
+
+    /**
+     *  An error occurred during execution that caused the data to become corrupted, but execution finished. Examples
+     *  of this error are NaN squashing or integer overflow. In a dynamic system, the data can be thrown away and the
+     *  next frame can be processed or execution can be retried.
+     *  This is either a data corruption error, an input error, or a range error.
+     *  This is not used in safety but may be used in standard.
+     *  */
+    
+
+//!
+//!
+//!
+    kFAILED_COMPUTATION(8),
+
+    /**
+     *  TensorRT was put into a bad state by incorrect sequence of function calls. An example of an invalid state is
+     *  specifying a layer to be DLA only without GPU fallback, and that layer is not supported by DLA. This can occur
+     *  in situations where a service is optimistically executing networks for multiple different configurations
+     *  without checking proper error configurations, and instead throwing away bad configurations caught by TensorRT.
+     *  This is a violation of the API contract, but can be recoverable.
+     * 
+     *  Example of a recovery:
+     *  GPU fallback is disabled and conv layer with large filter(63x63) is specified to run on DLA. This will fail due
+     *  to DLA not supporting the large kernel size. This can be recovered by either turning on GPU fallback
+     *  or setting the layer to run on the GPU.
+     *  */
+    
+
+//!
+//!
+    kINVALID_STATE(9),
+
+    /**
+     *  An error occurred due to the network not being supported on the device due to constraints of the hardware or
+     *  system. An example is running a unsafe layer in a safety certified context, or a resource requirement for the
+     *  current network is greater than the capabilities of the target device. The network is otherwise correct, but
+     *  the network and hardware combination is problematic. This can be recoverable.
+     *  Examples:
+     *   * Scratch space requests larger than available device memory and can be recovered by increasing allowed
+     *     workspace size.
+     *   * Tensor size exceeds the maximum element count and can be recovered by reducing the maximum batch size.
+     *  */
+    kUNSUPPORTED_STATE(10);
+
+    public final int value;
+    private ErrorCode(int v) { this.value = v; }
+    private ErrorCode(ErrorCode e) { this.value = e.value; }
+    public ErrorCode intern() { for (ErrorCode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ErrorCode enum. @see ErrorCode */
+
+// Targeting ../nvinfer/IErrorRecorder.java
+
+ // class IErrorRecorder
+
+ // namespace nvinfer1
+
+/**
+ *  \brief Return the library version number.
+ * 
+ *  The format is as for TENSORRT_VERSION: (TENSORRT_MAJOR * 1000) + (TENSORRT_MINOR * 100) + TENSOR_PATCH.
+ *  */
+public static native @NoException(true) int getInferLibVersion();
+
+// #endif // NV_INFER_RUNTIME_COMMON_H
+
+
+// Parsed from NvInferLegacyDims.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_LEGACY_DIMS_H
+// #define NV_INFER_LEGACY_DIMS_H
+
+
+
+//!
+//!
+//!
+
+//!
+//!
+//!
+// #include "NvInferRuntimeCommon.h"
+
+/**
+ *  \file NvInferLegacyDims.h
+ * 
+ *  This file contains declarations of legacy dimensions types which use channel
+ *  semantics in their names, and declarations on which those types rely.
+ * 
+ <p>
+ * 
+ *  \namespace nvinfer1
+ * 
+ *  \brief The TensorRT API version 1 namespace.
+ *  */
+// Targeting ../nvinfer/Dims2.java
+
+
+// Targeting ../nvinfer/DimsHW.java
+
+
+// Targeting ../nvinfer/Dims3.java
+
+
+// Targeting ../nvinfer/Dims4.java
+
+
+
+ // namespace nvinfer1
+
+// #endif // NV_INFER_LEGCY_DIMS_H
+
+
+// Parsed from NvInferRuntime.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_RUNTIME_H
+
+
+//!
+//!
+//!
+// #define NV_INFER_RUNTIME_H
+
+/**
+ *  \file NvInferRuntime.h
+ * 
+ *  This is the top-level API file for TensorRT extended runtime library.
+ *  */
+
+// #include "NvInferImpl.h"
+// #include "NvInferRuntimeCommon.h"
+// Targeting ../nvinfer/IPluginFactory.java
+
+
+// Targeting ../nvinfer/INoCopy.java
+
+
+
+/**
+ *  \enum EngineCapability
+ * 
+ *  \brief List of supported engine capability flows.
+ * 
+ *  \details The EngineCapability determines the restrictions of a network during build time and what runtime
+ *  it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide
+ *  any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard
+ *  runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network
+ *  operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe
+ *  runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of
+ *  network operations that are DLA compatible and the resulting serialized engine can be executed using standalone
+ *  DLA runtime APIs. See sampleNvmedia for an example of integrating NvMediaDLA APIs with TensorRT APIs.
+ *  */
+
+@Namespace("nvinfer1") public enum EngineCapability {
+    /**
+     *  Standard: TensorRT flow without targeting the safety runtime.
+     *  This flow supports both DeviceType::kGPU and DeviceType::kDLA.
+     *  */
+    kSTANDARD(0),
+    
+
+//!
+//!
+    kDEFAULT(kSTANDARD.value),
+
+    /**
+     *  Safety: TensorRT flow with restrictions targeting the safety runtime.
+     *  See safety documentation for list of supported layers and formats.
+     *  This flow supports only DeviceType::kGPU.
+     *  */
+    kSAFETY(1),
+    
+
+//!
+//!
+    kSAFE_GPU(kSAFETY.value),
+
+    /**
+     *  DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes.
+     *  See DLA documentation for list of supported layers and formats.
+     *  This flow supports only DeviceType::kDLA.
+     *  */
+    kDLA_STANDALONE(2),
+    kSAFE_DLA(kDLA_STANDALONE.value);
+
+    public final int value;
+    private EngineCapability(int v) { this.value = v; }
+    private EngineCapability(EngineCapability e) { this.value = e.value; }
+    public EngineCapability intern() { for (EngineCapability e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in EngineCapability enum. @see EngineCapability */
+
+// Targeting ../nvinfer/Weights.java
+
+
+// Targeting ../nvinfer/IHostMemory.java
+
+
+
+/**
+ *  \enum DimensionOperation
+ * 
+ *  \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations.
+ * 
+ *  For example, given two IDimensionExpr x and y and an IExprBuilder& eb,
+ *  eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y.
+ * 
+ *  @see IDimensionExpr, IExprBuilder
+ *  */
+@Namespace("nvinfer1") public enum DimensionOperation {
+    /** Sum of the two operands. */
+    kSUM(0),
+    /** Product of the two operands. */
+    kPROD(1),
+    /** Maximum of the two operands. */
+    kMAX(2),
+    /** Minimum of the two operands. */
+    kMIN(3),
+    /** Substract the second element from the first. */
+    kSUB(4),
+    /** 1 if operands are equal, 0 otherwise. */
+    kEQUAL(5),
+    /** 1 if first operand is less than second operand, 0 otherwise. */
+    kLESS(6),
+    /** Floor division of the first element by the second. */
+    kFLOOR_DIV(7),
+    /** Division rounding up */
+    kCEIL_DIV(8);
+
+    public final int value;
+    private DimensionOperation(int v) { this.value = v; }
+    private DimensionOperation(DimensionOperation e) { this.value = e.value; }
+    public DimensionOperation intern() { for (DimensionOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in DimensionOperation enum. @see DimensionOperation */
+
+
+/**
+ *  \enum TensorLocation
+ *  \brief The location for tensor data storage, device or host.
+ *  */
+@Namespace("nvinfer1") public enum TensorLocation {
+    /** Data stored on device. */
+    kDEVICE(0),
+    /** Data stored on host. */
+    kHOST(1);
+
+    public final int value;
+    private TensorLocation(int v) { this.value = v; }
+    private TensorLocation(TensorLocation e) { this.value = e.value; }
+    public TensorLocation intern() { for (TensorLocation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in TensorLocation enum. @see TensorLocation */
+
+// Targeting ../nvinfer/IDimensionExpr.java
+
+
+// Targeting ../nvinfer/IExprBuilder.java
+
+
+// Targeting ../nvinfer/DimsExprs.java
+
+
+// Targeting ../nvinfer/DynamicPluginTensorDesc.java
+
+
+// Targeting ../nvinfer/IPluginV2DynamicExt.java
+
+
+// Targeting ../nvinfer/IProfiler.java
+
+
+
+/**
+ *  \enum WeightsRole
+ *  \brief How a layer uses particular Weights.
+ * 
+ *  The power weights of an IScaleLayer are omitted.  Refitting those is not supported.
+ *  */
+@Namespace("nvinfer1") public enum WeightsRole {
+    /** kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer */
+    kKERNEL(0),
+    /** bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer */
+    kBIAS(1),
+    /** shift part of IScaleLayer */
+    kSHIFT(2),
+    /** scale part of IScaleLayer */
+    kSCALE(3),
+    /** weights for IConstantLayer */
+    kCONSTANT(4),
+    /** Any other weights role */
+    kANY(5);
+
+    public final int value;
+    private WeightsRole(int v) { this.value = v; }
+    private WeightsRole(WeightsRole e) { this.value = e.value; }
+    public WeightsRole intern() { for (WeightsRole e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in WeightsRole enum. @see WeightsRole */
+
+
+/**
+ *  \enum DeviceType
+ *  \brief The device that this layer/network will execute on.
+ * 
+ *  */
+@Namespace("nvinfer1") public enum DeviceType {
+    /** GPU Device */
+    kGPU(0),
+    /** DLA Core */
+    kDLA(1);
+
+    public final int value;
+    private DeviceType(int v) { this.value = v; }
+    private DeviceType(DeviceType e) { this.value = e.value; }
+    public DeviceType intern() { for (DeviceType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in DeviceType enum. @see DeviceType */
+
+// Targeting ../nvinfer/IRuntime.java
+
+
+// Targeting ../nvinfer/IRefitter.java
+
+
+
+/**
+ *  \enum OptProfileSelector
+ * 
+ *  \brief When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dimensions),
+ *         select whether we are interested in the minimum, optimum, or maximum values for these parameters.
+ *         The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value
+ *         is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime.
+ * 
+ *  @see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValues()
+ *  */
+@Namespace("nvinfer1") public enum OptProfileSelector {
+    /** This is used to set or get the minimum permitted value for dynamic dimensions etc. */
+    kMIN(0),
+    /** This is used to set or get the value that is used in the optimization (kernel selection). */
+    kOPT(1),
+    /** This is used to set or get the maximum permitted value for dynamic dimensions etc. */
+    kMAX(2);
+
+    public final int value;
+    private OptProfileSelector(int v) { this.value = v; }
+    private OptProfileSelector(OptProfileSelector e) { this.value = e.value; }
+    public OptProfileSelector intern() { for (OptProfileSelector e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+// Targeting ../nvinfer/IOptimizationProfile.java
+
+
+
+/**
+ *  \enum TacticSource
+ * 
+ *  \brief List of tactic sources for TensorRT.
+ * 
+ *  @see TacticSources, IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
+ *  */
+@Namespace("nvinfer1") public enum TacticSource {
+    /** \note Disabling kCUBLAS will cause the cublas handle passed to plugins in attachToContext to be null. */
+    /** cuBLAS tactics. */
+    kCUBLAS(0),
+    /** cuBLAS LT tactics */
+    kCUBLAS_LT(1),
+    /** cuDNN tactics */
+    kCUDNN(2);
+
+    public final int value;
+    private TacticSource(int v) { this.value = v; }
+    private TacticSource(TacticSource e) { this.value = e.value; }
+    public TacticSource intern() { for (TacticSource e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+
+
+/**
+ *  \brief Represents a collection of one or more TacticSource values
+ *  combine using bitwise-OR operations.
+ * 
+ *  @see IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
+ *  */
+
+
+//!
+//!
+//!
+//!
+// Targeting ../nvinfer/ICudaEngine.java
+
+
+// Targeting ../nvinfer/IExecutionContext.java
+
+ // class IExecutionContext
+ // namespace nvinfer1
+
+/**
+ *  Internal C entry point for creating IRuntime.
+ *  \private
+ *  */
+
+
+//!
+//!
+public static native @NoException(true) Pointer createInferRuntime_INTERNAL(Pointer logger, int version);
+
+/**
+ *  Internal C entry point for creating IRefitter.
+ *  \private
+ *  */
+
+
+//!
+//!
+public static native @NoException(true) Pointer createInferRefitter_INTERNAL(Pointer engine, Pointer logger, int version);
+
+/**
+ *  \brief Return the plugin registry
+ *  */
+
+
+//!
+//!
+public static native @NoException(true) IPluginRegistry getPluginRegistry();
+
+/**
+ *  \brief Return the logger object.
+ *  */
+public static native @NoException(true) ILogger getLogger();
+/**
+ *  \brief Create an instance of an IRuntime class.
+ * 
+ *  This class is the logging class for the runtime.
+ *  */
+
+
+//!
+//!
+//!
+@Namespace("nvinfer1") public static native @NoException(true) IRuntime createInferRuntime(@ByRef ILogger logger);
+
+/**
+ *  \brief Create an instance of an IRefitter class.
+ * 
+ *  This is the logging class for the refitter.
+ *  */
+@Namespace("nvinfer1") public static native @NoException(true) IRefitter createInferRefitter(@ByRef ICudaEngine engine, @ByRef ILogger logger);
+
+ // namespace
+
+/**
+ *  \brief Register the plugin creator to the registry
+ *  The static registry object will be instantiated when the plugin library is
+ *  loaded. This static object will register all creators available in the
+ *  library to the registry.
+ * 
+ *  \warning Statically registering plugins should be avoided in the automotive
+ *   safety context as the application developer should first register an error recorder
+ *   with the plugin registry via IPluginRegistry::setErrorRecorder() before using
+ *   IPluginRegistry::registerCreator() or other methods.
+ *  */
+
+ // namespace nvinfer1
+
+// #define REGISTER_TENSORRT_PLUGIN(name)
+//     static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
+// #endif // NV_INFER_RUNTIME_H
+
+
+// Parsed from NvInfer.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_H
+// #define NV_INFER_H
+
+// #include "NvInferLegacyDims.h"
+
+
+//!
+//!
+//!
+//!
+
+//!
+//!
+//!
+
+//!
+//!
+//!
+// #include "NvInferRuntime.h"
+
+/**
+ *  \mainpage
+ * 
+ *  This is the API documentation for the NVIDIA TensorRT library. It provides information on individual
+ *  functions, classes and methods. Use the index on the left to navigate the documentation.
+ * 
+ *  Please see the accompanying user guide and samples for higher-level information and general advice on
+ *  using TensorRT. */
+//
+/** TensorRT Versioning follows Semantic Versioning Guidelines specified here: https://semver.org/
+/**
+<p>
+/**
+/** \file NvInfer.h
+/**
+/** This is the top-level API file for TensorRT.
+/**
+<p>
+/**
+/** \namespace nvinfer1
+/**
+/** \brief The TensorRT API version 1 namespace.
+/** */
+
+/**
+ *  \enum LayerType
+ * 
+ *  \brief The type values of layer classes.
+ * 
+ *  @see ILayer::getType()
+ *  */
+@Namespace("nvinfer1") public enum LayerType {
+    /** Convolution layer. */
+    kCONVOLUTION(0),
+    /** Fully connected layer. */
+    kFULLY_CONNECTED(1),
+    /** Activation layer. */
+    kACTIVATION(2),
+    /** Pooling layer. */
+    kPOOLING(3),
+    /** LRN layer. */
+    kLRN(4),
+    /** Scale layer. */
+    kSCALE(5),
+    /** SoftMax layer. */
+    kSOFTMAX(6),
+    /** Deconvolution layer. */
+    kDECONVOLUTION(7),
+    /** Concatenation layer. */
+    kCONCATENATION(8),
+    /** Elementwise layer. */
+    kELEMENTWISE(9),
+    /** Plugin layer. */
+    kPLUGIN(10),
+    /** UnaryOp operation Layer. */
+    kUNARY(11),
+    /** Padding layer. */
+    kPADDING(12),
+    /** Shuffle layer. */
+    kSHUFFLE(13),
+    /** Reduce layer. */
+    kREDUCE(14),
+    /** TopK layer. */
+    kTOPK(15),
+    /** Gather layer. */
+    kGATHER(16),
+    /** Matrix multiply layer. */
+    kMATRIX_MULTIPLY(17),
+    /** Ragged softmax layer. */
+    kRAGGED_SOFTMAX(18),
+    /** Constant layer. */
+    kCONSTANT(19),
+    /** RNNv2 layer. */
+    kRNN_V2(20),
+    /** Identity layer. */
+    kIDENTITY(21),
+    /** PluginV2 layer. */
+    kPLUGIN_V2(22),
+    /** Slice layer. */
+    kSLICE(23),
+    /** Shape layer. */
+    kSHAPE(24),
+    /** Parametric ReLU layer. */
+    kPARAMETRIC_RELU(25),
+    /** Resize Layer. */
+    kRESIZE(26),
+    /** Loop Trip limit layer */
+    kTRIP_LIMIT(27),
+    /** Loop Recurrence layer */
+    kRECURRENCE(28),
+    /** Loop Iterator layer */
+    kITERATOR(29),
+    /** Loop output layer */
+    kLOOP_OUTPUT(30),
+    /** Select layer. */
+    kSELECT(31),
+    /** Fill layer */
+    kFILL(32),
+    /** Quantize layer */
+    kQUANTIZE(33),
+    /** Dequantize layer */
+    kDEQUANTIZE(34);
+
+    public final int value;
+    private LayerType(int v) { this.value = v; }
+    private LayerType(LayerType e) { this.value = e.value; }
+    public LayerType intern() { for (LayerType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in LayerType enum. @see LayerType */
+
+
+/**
+ *  \brief It is capable of representing one or more TensorFormat by binary OR
+ *  operations, e.g., 1U << TensorFormat::kCHW4 | 1U << TensorFormat::kCHW32.
+ * 
+ *  @see ITensor::getAllowedFormats(), ITensor::setAllowedFormats(),
+ *  */
+
+
+//!
+//!
+//!
+
+/**
+ *  \enum ActivationType
+ * 
+ *  \brief Enumerates the types of activation to perform in an activation layer.
+ *  */
+@Namespace("nvinfer1") public enum ActivationType {
+    /** Rectified linear activation. */
+    kRELU(0),
+    /** Sigmoid activation. */
+    kSIGMOID(1),
+    /** TanH activation. */
+    kTANH(2),
+    /** LeakyRelu activation: x>=0 ? x : alpha * x. */
+    kLEAKY_RELU(3),
+    /** Elu activation: x>=0 ? x : alpha * (exp(x) - 1). */
+    kELU(4),
+    /** Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha) */
+    kSELU(5),
+    /** Softsign activation: x / (1+|x|) */
+    kSOFTSIGN(6),
+    /** Parametric softplus activation: alpha*log(exp(beta*x)+1) */
+    kSOFTPLUS(7),
+    /** Clip activation: max(alpha, min(beta, x)) */
+    kCLIP(8),
+    /** Hard sigmoid activation: max(0, min(1, alpha*x+beta)) */
+    kHARD_SIGMOID(9),
+    /** Scaled tanh activation: alpha*tanh(beta*x) */
+    kSCALED_TANH(10),
+    /** Thresholded ReLU activation: x>alpha ? x : 0 */
+    kTHRESHOLDED_RELU(11);
+
+    public final int value;
+    private ActivationType(int v) { this.value = v; }
+    private ActivationType(ActivationType e) { this.value = e.value; }
+    public ActivationType intern() { for (ActivationType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ActivationType enum. @see ActivationType */
+
+// Targeting ../nvinfer/ITensor.java
+
+
+// Targeting ../nvinfer/ILayer.java
+
+
+
+/**
+ *  \enum PaddingMode
+ * 
+ *  \brief Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,
+ *  padding mode takes precedence if setPaddingMode() and setPrePadding() are also used.
+ * 
+ *  There are three padding styles, EXPLICIT, SAME, and CAFFE, with each style having two variants.
+ *  The EXPLICIT and CAFFE styles determine if the final sampling location is used or not.
+ *  The SAME style determine if the asymmetry in the padding is on the pre or post padding.
+ * 
+ *  <pre>{@code
+ *  Shorthand:
+ *      I = dimensions of input image.
+ *      B = prePadding, before the image data. For deconvolution, prePadding is set before output.
+ *      A = postPadding, after the image data. For deconvolution, postPadding is set after output.
+ *      P = delta between input and output
+ *      S = stride
+ *      F = filter
+ *      O = output
+ *      D = dilation
+ *      M = I + B + A ; The image data plus any padding
+ *      DK = 1 + D * (F - 1)
+ *  }</pre>
+ * 
+ *  Formulas for Convolution:
+ *      - EXPLICIT_ROUND_DOWN:
+ *  <pre>{@code
+ *          O = floor((M - DK) / S) + 1
+ *  }</pre>
+ *      - CAFFE_ROUND_DOWN:
+ *  <pre>{@code
+ *          O = floor((I + B * 2 - DK) / S)
+ *  }</pre>
+ *      - EXPLICIT_ROUND_UP:
+ *  <pre>{@code
+ *          O = ceil((M - DK) / S) + 1
+ *  }</pre>
+ *      - CAFFE_ROUND_UP:
+ *  <pre>{@code
+ *          O = ceil((I + B * 2 - DK) / S)
+ *  }</pre>
+ *      - SAME_UPPER:
+ *  <pre>{@code
+ *          O = ceil(I / S)
+ *          P = floor((I - 1) / S) * S + DK - I;
+ *          B = floor(P / 2)
+ *          A = P - B
+ *  }</pre>
+ *      - SAME_LOWER:
+ *  <pre>{@code
+ *          O = ceil(I / S)
+ *          P = floor((I - 1) / S) * S + DK - I;
+ *          A = floor(P / 2)
+ *          B = P - A
+ *  }</pre>
+ * 
+ *  Formulas for Deconvolution:
+ *      - EXPLICIT_ROUND_DOWN:
+ *      - CAFFE_ROUND_DOWN:
+ *      - EXPLICIT_ROUND_UP:
+ *      - CAFFE_ROUND_UP:
+ *  <pre>{@code
+ *          O = (I - 1) * S + DK - (B + A)
+ *  }</pre>
+ *      - SAME_UPPER:
+ *  <pre>{@code
+ *          O = min(I * S, (I - 1) * S + DK)
+ *          P = max(DK - S, 0)
+ *          B = floor(P / 2)
+ *          A = P - B
+ *  }</pre>
+ *      - SAME_LOWER:
+ *  <pre>{@code
+ *          O = min(I * S, (I - 1) * S + DK)
+ *          P = max(DK - S, 0)
+ *          A = floor(P / 2)
+ *          B = P - A
+ *  }</pre>
+ * 
+ *  Formulas for Pooling:
+ *      - EXPLICIT_ROUND_DOWN:
+ *  <pre>{@code
+ *          O = floor((M - F) / S) + 1
+ *  }</pre>
+ *      - EXPLICIT_ROUND_UP:
+ *  <pre>{@code
+ *          O = ceil((M - F) / S) + 1
+ *  }</pre>
+ *      - SAME_UPPER:
+ *  <pre>{@code
+ *          O = ceil(I / S)
+ *          P = floor((I - 1) / S) * S + F - I;
+ *          B = floor(P / 2)
+ *          A = P - B
+ *  }</pre>
+ *      - SAME_LOWER:
+ *  <pre>{@code
+ *          O = ceil(I / S)
+ *          P = floor((I - 1) / S) * S + F - I;
+ *          A = floor(P / 2)
+ *          B = P - A
+ *  }</pre>
+ *      - CAFFE_ROUND_DOWN:
+ *  <pre>{@code
+ *          EXPLICIT_ROUND_DOWN - ((EXPLICIT_ROUND_DOWN - 1) * S >= I + B)
+ *  }</pre>
+ *      - CAFFE_ROUND_UP:
+ *  <pre>{@code
+ *          EXPLICIT_ROUND_UP - ((EXPLICIT_ROUND_UP - 1) * S >= I + B)
+ *  }</pre>
+ * 
+ *  Pooling Example 1:
+ *  <pre>{@code
+ *      Given I = {6, 6}, B = {3, 3}, A = {2, 2}, S = {2, 2}, F = {3, 3}. What is O?
+ *      (B, A can be calculated for SAME_UPPER and SAME_LOWER mode)
+ *  }</pre>
+ * 
+ *  - EXPLICIT_ROUND_DOWN:
+ *  <pre>{@code
+ *      Computation:
+ *          M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
+ *          O ==> floor((M - F) / S) + 1
+ *            ==> floor(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
+ *            ==> floor({8, 8} / {2, 2}) + {1, 1}
+ *            ==> {5, 5}
+ *  }</pre>
+ *  - EXPLICIT_ROUND_UP:
+ *  <pre>{@code
+ *      Computation:
+ *          M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
+ *          O ==> ceil((M - F) / S) + 1
+ *            ==> ceil(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
+ *            ==> ceil({8, 8} / {2, 2}) + {1, 1}
+ *            ==> {5, 5}
+ *  }</pre>
+ *      The sample points are {0, 2, 4, 6, 8} in each dimension.
+ * 
+ *  - SAME_UPPER:
+ *  <pre>{@code
+ *      Computation:
+ *          I = {6, 6}
+ *          S = {2, 2}
+ *          O = ceil(I / S) = {3, 3}
+ *          P = floor((I - 1) / S) * S + F - I
+ *              ==> floor(({6, 6} - {1, 1}) / {2, 2}) * {2, 2} + {3, 3} - {6, 6}
+ *              ==> {4, 4} + {3, 3} - {6, 6}
+ *              ==> {1, 1}
+ *          B = floor({1, 1} / {2, 2})
+ *              ==> {0, 0}
+ *          A = {1, 1} - {0, 0}
+ *              ==> {1, 1}
+ *  }</pre>
+ *  - SAME_LOWER:
+ *  <pre>{@code
+ *      Computation:
+ *          I = {6, 6}
+ *          S = {2, 2}
+ *          O = ceil(I / S) = {3, 3}
+ *          P = floor((I - 1) / S) * S + F - I
+ *            ==> {1, 1}
+ *          A = floor({1, 1} / {2, 2})
+ *            ==> {0, 0}
+ *          B = {1, 1} - {0, 0}
+ *            ==> {1, 1}
+ *  }</pre>
+ *      The sample pointers are {0, 2, 4} in each dimension.
+ *      SAMPLE_UPPER has {O0, O1, O2, pad} in output in each dimension.
+ *      SAMPLE_LOWER has {pad, O0, O1, O2} in output in each dimension.
+ * 
+ *  Pooling Example 2:
+ *  <pre>{@code
+ *      Given I = {6, 6}, B = {3, 3}, A = {3, 3}, S = {2, 2}, F = {3, 3}. What is O?
+ *  }</pre>
+ * 
+ *  - CAFFE_ROUND_DOWN:
+ *  <pre>{@code
+ *      Computation:
+ *          M = {6, 6} + {3, 3} + {3, 3} ==> {12, 12}
+ *          EXPLICIT_ROUND_DOWN ==> floor((M - F) / S) + 1
+ *                              ==> floor(({12, 12} - {3, 3}) / {2, 2}) + {1, 1}
+ *                              ==> {5, 5}
+ *          DIFF = (((EXPLICIT_ROUND_DOWN - 1) * S >= I + B) ? {1, 1} : {0, 0})
+ *            ==> ({5, 5} - {1, 1}) * {2, 2} >= {6, 6} + {3, 3} ? {1, 1} : {0,0}
+ *            ==> {0, 0}
+ *          O ==> EXPLICIT_ROUND_DOWN - DIFF
+ *            ==> {5, 5} - {0, 0}
+ *            ==> {5, 5}
+ *  }</pre>
+ *  - CAFFE_ROUND_UP:
+ *  <pre>{@code
+ *      Computation:
+ *          M = {6, 6} + {3, 3} + {3, 3} ==> {12, 12}
+ *          EXPLICIT_ROUND_UP ==> ceil((M - F) / S) + 1
+ *                            ==> ceil(({12, 12} - {3, 3}) / {2, 2}) + {1, 1}
+ *                            ==> {6, 6}
+ *          DIFF = (((EXPLICIT_ROUND_UP - 1) * S >= I + B) ? {1, 1} : {0, 0})
+ *            ==> ({6, 6} - {1, 1}) * {2, 2} >= {6, 6} + {3, 3} ? {1, 1} : {0,0}
+ *            ==> {1, 1}
+ *          O ==> EXPLICIT_ROUND_UP - DIFF
+ *            ==> {6, 6} - {1, 1}
+ *            ==> {5, 5}
+ *  }</pre>
+ * 
+ *  The sample points are {0, 2, 4, 6, 8} in each dimension. <br>
+ *  CAFFE_ROUND_DOWN and CAFFE_ROUND_UP have two restrictions each on usage with pooling operations.
+ *  This will cause getDimensions to return an empty dimension and also to reject the network
+ *  at validation time. <br>
+ *  For more information on original reference code, see
+ *  https://github.com/BVLC/caffe/blob/master/src/caffe/layers/pooling_layer.cpp
+ * 
+ *  - Restriction 1:
+ *  <pre>{@code
+ *      CAFFE_ROUND_DOWN: B >= F is an error if (B - S) < F
+ *      CAFFE_ROUND_UP: (B + S) >= (F + 1) is an error if B < (F + 1)
+ *  }</pre>
+ * 
+ *  - Restriction 2:
+ *  <pre>{@code
+ *      CAFFE_ROUND_DOWN: (B - S) >= F is an error if B >= F
+ *      CAFFE_ROUND_UP: B >= (F + 1) is an error if (B + S) >= (F + 1)
+ *  }</pre>
+ *  */
+@Namespace("nvinfer1") public enum PaddingMode {
+    /** Use explicit padding, rounding output size down. */
+    kEXPLICIT_ROUND_DOWN(0),
+    /** Use explicit padding, rounding output size up. */
+    kEXPLICIT_ROUND_UP(1),
+    /** Use SAME padding, with prePadding <= postPadding. */
+    kSAME_UPPER(2),
+    /** Use SAME padding, with prePadding >= postPadding. */
+    kSAME_LOWER(3),
+    /** Use CAFFE padding, rounding output size down, uses prePadding value. */
+    kCAFFE_ROUND_DOWN(4),
+    /** Use CAFFE padding, rounding output size up, uses prePadding value. */
+    kCAFFE_ROUND_UP(5);
+
+    public final int value;
+    private PaddingMode(int v) { this.value = v; }
+    private PaddingMode(PaddingMode e) { this.value = e.value; }
+    public PaddingMode intern() { for (PaddingMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in PaddingMode enum. @see PaddingMode */
+
+// Targeting ../nvinfer/IConvolutionLayer.java
+
+
+// Targeting ../nvinfer/IFullyConnectedLayer.java
+
+
+// Targeting ../nvinfer/IActivationLayer.java
+
+
+
+/**
+ *  \enum PoolingType
+ * 
+ *  \brief The type of pooling to perform in a pooling layer.
+ *  */
+@Namespace("nvinfer1") public enum PoolingType {
+    kMAX(0),              // Maximum over elements
+    kAVERAGE(1),          // Average over elements. If the tensor is padded, the count includes the padding
+    kMAX_AVERAGE_BLEND(2);// Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
+
+    public final int value;
+    private PoolingType(int v) { this.value = v; }
+    private PoolingType(PoolingType e) { this.value = e.value; }
+    public PoolingType intern() { for (PoolingType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in PoolingType enum. @see PoolingType */
+
+// Targeting ../nvinfer/IPoolingLayer.java
+
+
+// Targeting ../nvinfer/ILRNLayer.java
+
+
+
+/**
+ *  \brief Controls how shift, scale and power are applied in a Scale layer.
+ * 
+ *  @see IScaleLayer
+ *  */
+@Namespace("nvinfer1") public enum ScaleMode {
+    /** Identical coefficients across all elements of the tensor. */
+    kUNIFORM(0),
+    /** Per-channel coefficients. */
+    kCHANNEL(1),
+    /** Elementwise coefficients. */
+    kELEMENTWISE(2);
+
+    public final int value;
+    private ScaleMode(int v) { this.value = v; }
+    private ScaleMode(ScaleMode e) { this.value = e.value; }
+    public ScaleMode intern() { for (ScaleMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in ScaleMode enum. @see ScaleMode */
+
+// Targeting ../nvinfer/IScaleLayer.java
+
+
+// Targeting ../nvinfer/ISoftMaxLayer.java
+
+
+// Targeting ../nvinfer/IConcatenationLayer.java
+
+
+// Targeting ../nvinfer/IDeconvolutionLayer.java
+
+
+
+/**
+ *  \enum ElementWiseOperation
+ * 
+ *  \brief Enumerates the binary operations that may be performed by an ElementWise layer.
+ * 
+ *  @see IElementWiseLayer
+ *  */
+@Namespace("nvinfer1") public enum ElementWiseOperation {
+    /** Sum of the two elements. */
+    kSUM(0),
+    /** Product of the two elements. */
+    kPROD(1),
+    /** Maximum of the two elements. */
+    kMAX(2),
+    /** Minimum of the two elements. */
+    kMIN(3),
+    /** Substract the second element from the first. */
+    kSUB(4),
+    /** Divide the first element by the second. */
+    kDIV(5),
+    /** The first element to the power of the second element. */
+    kPOW(6),
+    /** Floor division of the first element by the second. */
+    kFLOOR_DIV(7),
+    /** Logical AND of two elements. */
+    kAND(8),
+    /** Logical OR of two elements. */
+    kOR(9),
+    /** Logical XOR of two elements. */
+    kXOR(10),
+    /** Check if two elements are equal. */
+    kEQUAL(11),
+    /** Check if element in first tensor is greater than corresponding element in second tensor. */
+    kGREATER(12),
+    /** Check if element in first tensor is less than corresponding element in second tensor. */
+    kLESS(13);
+
+    public final int value;
+    private ElementWiseOperation(int v) { this.value = v; }
+    private ElementWiseOperation(ElementWiseOperation e) { this.value = e.value; }
+    public ElementWiseOperation intern() { for (ElementWiseOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ElementWiseOperation enum. @see ElementWiseOperation */
+
+// Targeting ../nvinfer/IElementWiseLayer.java
+
+
+// Targeting ../nvinfer/IGatherLayer.java
+
+
+
+/**
+ *  \enum RNNOperation
+ * 
+ *  \brief Enumerates the RNN operations that may be performed by an RNN layer.
+ * 
+ *  __Equation definitions__
+ * 
+ *  In the equations below, we use the following naming convention:
+ * 
+ *  ~~~
+ *  t := current time step
+ * 
+ *  i := input gate
+ *  o := output gate
+ *  f := forget gate
+ *  z := update gate
+ *  r := reset gate
+ *  c := cell gate
+ *  h := hidden gate
+ * 
+ *  g[t] denotes the output of gate g at timestep t, e.g.
+ *  f[t] is the output of the forget gate f.
+ * 
+ *  X[t] := input tensor for timestep t
+ *  C[t] := cell state for timestep t
+ *  H[t] := hidden state for timestep t
+ * 
+ *  W[g] := W (input) parameter weight matrix for gate g
+ *  R[g] := U (recurrent) parameter weight matrix for gate g
+ *  Wb[g] := W (input) parameter bias vector for gate g
+ *  Rb[g] := U (recurrent) parameter bias vector for gate g
+ * 
+ *  Unless otherwise specified, all operations apply pointwise
+ *  to elements of each operand tensor.
+ * 
+ *  ReLU(X) := max(X, 0)
+ *  tanh(X) := hyperbolic tangent of X
+ *  sigmoid(X) := 1 / (1 + exp(-X))
+ *  exp(X) := e^X
+ * 
+ *  A.B denotes matrix multiplication of A and B.
+ *  A*B denotes pointwise multiplication of A and B.
+ *  ~~~
+ * 
+ *  __Equations__
+ * 
+ *  Depending on the value of RNNOperation chosen, each sub-layer of the RNN
+ *  layer will perform one of the following operations:
+ * 
+ *  ~~~
+ *  ::kRELU
+ * 
+ *    H[t] := ReLU(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
+ * 
+ *  ::kTANH
+ * 
+ *    H[t] := tanh(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
+ * 
+ *  ::kLSTM
+ * 
+ *    i[t] := sigmoid(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
+ *    f[t] := sigmoid(W[f].X[t] + R[f].H[t-1] + Wb[f] + Rb[f])
+ *    o[t] := sigmoid(W[o].X[t] + R[o].H[t-1] + Wb[o] + Rb[o])
+ *    c[t] :=    tanh(W[c].X[t] + R[c].H[t-1] + Wb[c] + Rb[c])
+ * 
+ *    C[t] := f[t]*C[t-1] + i[t]*c[t]
+ *    H[t] := o[t]*tanh(C[t])
+ * 
+ *  ::kGRU
+ * 
+ *    z[t] := sigmoid(W[z].X[t] + R[z].H[t-1] + Wb[z] + Rb[z])
+ *    r[t] := sigmoid(W[r].X[t] + R[r].H[t-1] + Wb[r] + Rb[r])
+ *    h[t] := tanh(W[h].X[t] + r[t]*(R[h].H[t-1] + Rb[h]) + Wb[h])
+ * 
+ *    H[t] := (1 - z[t])*h[t] + z[t]*H[t-1]
+ *  ~~~
+ * 
+ *  @see IRNNv2Layer
+ *  */
+@Namespace("nvinfer1") public enum RNNOperation {
+    /** Single gate RNN w/ ReLU activation function. */
+    kRELU(0),
+    /** Single gate RNN w/ TANH activation function. */
+    kTANH(1),
+    /** Four-gate LSTM network w/o peephole connections. */
+    kLSTM(2),
+    /** Three-gate network consisting of Gated Recurrent Units. */
+    kGRU(3);
+
+    public final int value;
+    private RNNOperation(int v) { this.value = v; }
+    private RNNOperation(RNNOperation e) { this.value = e.value; }
+    public RNNOperation intern() { for (RNNOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in RNNOperation enum. @see RNNOperation */
+
+
+/**
+ *  \enum RNNDirection
+ * 
+ *  \brief Enumerates the RNN direction that may be performed by an RNN layer.
+ * 
+ *  @see IRNNv2Layer
+ *  */
+@Namespace("nvinfer1") public enum RNNDirection {
+    /** Network iterations from first input to last input. */
+    kUNIDIRECTION(0),
+    /** Network iterates from first to last and vice versa and outputs concatenated. */
+    kBIDIRECTION(1);
+
+    public final int value;
+    private RNNDirection(int v) { this.value = v; }
+    private RNNDirection(RNNDirection e) { this.value = e.value; }
+    public RNNDirection intern() { for (RNNDirection e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in RNNDirection enum. @see RNNDirection */
+
+
+/**
+ *  \enum RNNInputMode
+ * 
+ *  \brief Enumerates the RNN input modes that may occur with an RNN layer.
+ * 
+ *  If the RNN is configured with RNNInputMode::kLINEAR, then for each gate {@code g} in the first layer of the RNN,
+ *  the input vector {@code X[t]} (length {@code E}) is left-multiplied by the gate's corresponding weight matrix {@code W[g]}
+ *  (dimensions {@code HxE}) as usual, before being used to compute the gate output as described by \ref RNNOperation.
+ * 
+ *  If the RNN is configured with RNNInputMode::kSKIP, then this initial matrix multiplication is "skipped"
+ *  and {@code W[g]} is conceptually an identity matrix.  In this case, the input vector {@code X[t]} must have length {@code H}
+ *  (the size of the hidden state).
+ * 
+ *  @see IRNNv2Layer
+ *  */
+@Namespace("nvinfer1") public enum RNNInputMode {
+    /** Perform the normal matrix multiplication in the first recurrent layer. */
+    kLINEAR(0),
+    /** No operation is performed on the first recurrent layer. */
+    kSKIP(1);
+
+    public final int value;
+    private RNNInputMode(int v) { this.value = v; }
+    private RNNInputMode(RNNInputMode e) { this.value = e.value; }
+    public RNNInputMode intern() { for (RNNInputMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in RNNInputMode enum. @see RNNInputMode */
+
+
+/**
+ *  \enum RNNGateType
+ * 
+ *  \brief Identifies an individual gate within an RNN cell.
+ * 
+ *  @see RNNOperation
+ *  */
+@Namespace("nvinfer1") public enum RNNGateType {
+    /** Input gate  (i). */
+    kINPUT(0),
+    /** Output gate (o). */
+    kOUTPUT(1),
+    /** Forget gate (f). */
+    kFORGET(2),
+    /** Update gate (z). */
+    kUPDATE(3),
+    /** Reset gate  (r). */
+    kRESET(4),
+    /** Cell gate   (c). */
+    kCELL(5),
+    /** Hidden gate (h). */
+    kHIDDEN(6);
+
+    public final int value;
+    private RNNGateType(int v) { this.value = v; }
+    private RNNGateType(RNNGateType e) { this.value = e.value; }
+    public RNNGateType intern() { for (RNNGateType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+
+// Targeting ../nvinfer/IRNNv2Layer.java
+
+
+// Targeting ../nvinfer/IPluginV2Layer.java
+
+
+
+/**
+ *  \enum UnaryOperation
+ * 
+ *  \brief Enumerates the unary operations that may be performed by a Unary layer.
+ * 
+ *  @see IUnaryLayer
+ *  */
+@Namespace("nvinfer1") public enum UnaryOperation {
+    /** Exponentiation. */
+    kEXP(0),
+    /** Log (base e). */
+    kLOG(1),
+    /** Square root. */
+    kSQRT(2),
+    /** Reciprocal. */
+    kRECIP(3),
+    /** Absolute value. */
+    kABS(4),
+    /** Negation. */
+    kNEG(5),
+    /** Sine. */
+    kSIN(6),
+    /** Cosine. */
+    kCOS(7),
+    /** Tangent. */
+    kTAN(8),
+    /** Hyperbolic sine. */
+    kSINH(9),
+    /** Hyperbolic cosine. */
+    kCOSH(10),
+    /** Inverse sine. */
+    kASIN(11),
+    /** Inverse cosine. */
+    kACOS(12),
+    /** Inverse tangent. */
+    kATAN(13),
+    /** Inverse hyperbolic sine. */
+    kASINH(14),
+    /** Inverse hyperbolic cosine. */
+    kACOSH(15),
+    /** Inverse hyperbolic tangent. */
+    kATANH(16),
+    /** Ceiling. */
+    kCEIL(17),
+    /** Floor. */
+    kFLOOR(18),
+    /** Gauss error function. */
+    kERF(19),
+    /** Logical NOT. */
+    kNOT(20);
+
+    public final int value;
+    private UnaryOperation(int v) { this.value = v; }
+    private UnaryOperation(UnaryOperation e) { this.value = e.value; }
+    public UnaryOperation intern() { for (UnaryOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in UnaryOperation enum. @see UnaryOperation */
+
+// Targeting ../nvinfer/IUnaryLayer.java
+
+
+
+/**
+ *  \enum ReduceOperation
+ * 
+ *  \brief Enumerates the reduce operations that may be performed by a Reduce layer.
+ * 
+ *  The table shows the result of reducing across an empty volume of a given type.
+ * 
+ *  Operation | kFLOAT and kHALF  | kINT32  | kINT8
+ *  --------- | ----------------- | ------- | -----
+ *  kSUM      | 0                 | 0       | 0
+ *  kPROD     | 1                 | 1       | 1
+ *  kMAX      | negative infinity | INT_MIN | -128
+ *  kMIN      | positive infinity | INT_MAX | 127
+ *  kAVG      | NaN               | 0       | -128
+ * 
+ *  The current version of TensorRT usually performs reduction for kINT8 via kFLOAT or kHALF.
+ *  The kINT8 values show the quantized representations of the floating-point values.
+ *  */
+@Namespace("nvinfer1") public enum ReduceOperation {
+    kSUM(0),
+    kPROD(1),
+    kMAX(2),
+    kMIN(3),
+    kAVG(4);
+
+    public final int value;
+    private ReduceOperation(int v) { this.value = v; }
+    private ReduceOperation(ReduceOperation e) { this.value = e.value; }
+    public ReduceOperation intern() { for (ReduceOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in ReduceOperation enum. @see ReduceOperation */
+
+// Targeting ../nvinfer/IReduceLayer.java
+
+
+// Targeting ../nvinfer/IPaddingLayer.java
+
+
+// Targeting ../nvinfer/Permutation.java
+
+
+// Targeting ../nvinfer/IShuffleLayer.java
+
+
+
+/**
+ *  \brief Controls how ISliceLayer handles out of bounds coordinates.
+ * 
+ *  @see ISliceLayer
+ *  */
+@Namespace("nvinfer1") public enum SliceMode {
+    /** Fail with error when the coordinates are out of bounds. This is the default. */
+    kDEFAULT(0),
+    /** Coordinates wrap around periodically. */
+    kWRAP(1);
+
+    public final int value;
+    private SliceMode(int v) { this.value = v; }
+    private SliceMode(SliceMode e) { this.value = e.value; }
+    public SliceMode intern() { for (SliceMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in SliceMode enum. @see SliceMode */
+
+// Targeting ../nvinfer/ISliceLayer.java
+
+
+// Targeting ../nvinfer/IShapeLayer.java
+
+
+
+/**
+ *  \enum TopKOperation
+ * 
+ *  \brief Enumerates the operations that may be performed by a TopK layer.
+ *  */
+@Namespace("nvinfer1") public enum TopKOperation {
+    /** Maximum of the elements. */
+    kMAX(0),
+    /** Minimum of the elements. */
+    kMIN(1);
+
+    public final int value;
+    private TopKOperation(int v) { this.value = v; }
+    private TopKOperation(TopKOperation e) { this.value = e.value; }
+    public TopKOperation intern() { for (TopKOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in TopKOperation enum. @see TopKOperation */
+
+// Targeting ../nvinfer/ITopKLayer.java
+
+
+
+/**
+ *  \enum MatrixOperation
+ * 
+ *  \brief Enumerates the operations that may be performed on a tensor
+ *         by IMatrixMultiplyLayer before multiplication.
+ *  */
+@Namespace("nvinfer1") public enum MatrixOperation {
+    /** Treat x as a matrix if it has two dimensions, or as a collection of
+     *  matrices if x has more than two dimensions, where the last two dimensions
+     *  are the matrix dimensions.  x must have at least two dimensions. */
+    kNONE(0),
+
+    /** Like kNONE, but transpose the matrix dimensions. */
+    
+//!
+    kTRANSPOSE(1),
+
+    /** Treat x as a vector if it has one dimension, or as a collection of
+     *  vectors if x has more than one dimension.  x must have at least one dimension.
+     *  The first input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
+     *  with dimensions [M, 1, K] with MatrixOperation::kNONE, i.e. is treated as M row vectors of length K.
+     *  If MatrixOperation::kTRANSPOSE is specified, then the dimensions are [M, K, 1].
+     * 
+     *  The second input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
+     *  with dimensions [M, K, 1] with MatrixOperation::kNONE, i.e. is treated as M column vectors of length K.
+     *  If MatrixOperation::kTRANSPOSE is specified, then the dimensions are [M, 1, K]. */
+    kVECTOR(2);
+
+    public final int value;
+    private MatrixOperation(int v) { this.value = v; }
+    private MatrixOperation(MatrixOperation e) { this.value = e.value; }
+    public MatrixOperation intern() { for (MatrixOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in MatrixOperation enum. @see DataType */
+
+// Targeting ../nvinfer/IMatrixMultiplyLayer.java
+
+
+// Targeting ../nvinfer/IRaggedSoftMaxLayer.java
+
+
+// Targeting ../nvinfer/IIdentityLayer.java
+
+
+// Targeting ../nvinfer/IConstantLayer.java
+
+
+// Targeting ../nvinfer/IParametricReLULayer.java
+
+
+
+/** \enum ResizeMode
+ * 
+ *  \brief Enumerates various modes of resize in the resize layer.
+ *         Resize mode set using setResizeMode().
+ *  */
+@Namespace("nvinfer1") public enum ResizeMode {
+    /** ND (0 < N <= 8) nearest neighbor resizing. */
+    kNEAREST(0),
+    /** Can handle linear (1D), bilinear (2D), and trilinear (3D) resizing. */
+    kLINEAR(1);
+
+    public final int value;
+    private ResizeMode(int v) { this.value = v; }
+    private ResizeMode(ResizeMode e) { this.value = e.value; }
+    public ResizeMode intern() { for (ResizeMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ResizeMode enum. @see ResizeMode */
+ // namespace impl
+
+/**
+ *  \enum ResizeCoordinateTransformation
+ * 
+ *  \brief The resize coordinate transformation function.
+ * 
+ *  @see IResizeLayer::setCoordinateTransformation()
+ *  */
+@Namespace("nvinfer1") public enum ResizeCoordinateTransformation {
+    /** We can think each value in tensor has a volume, and the coordinate is a point inside this volume.
+     *  The coordinate point is drawn as star(*) in below diagram, and multiple values range has a length.
+     *  Let's use x_origin as the coordinate of axis x in the input tensor, x_resized as the coordinate of axis x in the
+     *  output tensor, length_origin as length of the input tensor in axis x, and length_resize as length of the output
+     *  tensor in axis x.
+     * 
+     *  |<--------------length---------->|
+     *  |    0     |    1     |    2     |    3     |
+     *  *          *          *          *
+     * 
+     *  x_origin = x_resized * (length_origin - 1) / (length_resize - 1)
+     *  */
+    
+//!
+//!
+    kALIGN_CORNERS(0),
+
+    /** |<--------------length--------------------->|
+     *  |    0     |    1     |    2     |    3     |
+     *  *          *          *          *
+     * 
+     *  x_origin = x_resized * (length_origin / length_resize)
+     *  */
+    
+//!
+//!
+    kASYMMETRIC(1),
+
+    /** |<--------------length--------------------->|
+     *  |    0     |    1     |    2     |    3     |
+     *       *          *          *          *
+     * 
+     *  x_origin = (x_resized + 0.5) * (length_origin / length_resize) - 0.5
+     *  */
+    kHALF_PIXEL(2);
+
+    public final int value;
+    private ResizeCoordinateTransformation(int v) { this.value = v; }
+    private ResizeCoordinateTransformation(ResizeCoordinateTransformation e) { this.value = e.value; }
+    public ResizeCoordinateTransformation intern() { for (ResizeCoordinateTransformation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ResizeCoordinateTransformation enum. @see ResizeCoordinateTransformation */
+ // namespace impl
+
+/**
+ *  \enum ResizeSelector
+ * 
+ *  \brief The coordinate selector when resize to single pixel output.
+ * 
+ *  @see IResizeLayer::setSelectorForSinglePixel()
+ *  */
+@Namespace("nvinfer1") public enum ResizeSelector {
+    /** Use formula to map the original index. */
+    kFORMULA(0),
+
+    /** Select the upper left pixel. */
+    kUPPER(1);
+
+    public final int value;
+    private ResizeSelector(int v) { this.value = v; }
+    private ResizeSelector(ResizeSelector e) { this.value = e.value; }
+    public ResizeSelector intern() { for (ResizeSelector e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ResizeSelector enum. @see ResizeSelector */
+ // namespace impl
+
+/**
+ *  \enum ResizeRoundMode
+ * 
+ *  \brief The rounding mode for nearest neighbor resize.
+ * 
+ * 
+ *  @see IResizeLayer::setNearestRounding()
+ *  */
+@Namespace("nvinfer1") public enum ResizeRoundMode {
+    /** Round half up. */
+    kHALF_UP(0),
+
+    /** Round half down. */
+    kHALF_DOWN(1),
+
+    /** Round to floor. */
+    kFLOOR(2),
+
+    /** Round to ceil. */
+    kCEIL(3);
+
+    public final int value;
+    private ResizeRoundMode(int v) { this.value = v; }
+    private ResizeRoundMode(ResizeRoundMode e) { this.value = e.value; }
+    public ResizeRoundMode intern() { for (ResizeRoundMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+/** Maximum number of elements in ResizeRoundMode enum. @see ResizeRoundMode */
+
+// Targeting ../nvinfer/IResizeLayer.java
+
+
+
+/** Enum that describes kinds of loop outputs. */
+@Namespace("nvinfer1") public enum LoopOutput {
+    /** Output value is value of tensor for last iteration. */
+    kLAST_VALUE(0),
+
+    /** Output value is concatenation of values of tensor for each iteration, in forward order. */
+    kCONCATENATE(1),
+
+    /** Output value is concatenation of values of tensor for each iteration, in reverse order. */
+    kREVERSE(2);
+
+    public final int value;
+    private LoopOutput(int v) { this.value = v; }
+    private LoopOutput(LoopOutput e) { this.value = e.value; }
+    public LoopOutput intern() { for (LoopOutput e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in LoopOutput enum. @see DataType */
+
+
+/** Enum that describes kinds of trip limits. */
+@Namespace("nvinfer1") public enum TripLimit {
+
+    /** Tensor is scalar of type kINT32 that contains the trip count. */
+    kCOUNT(0),
+    /** Tensor is a scalar of type kBOOL. Loop terminates when value is false. */
+    kWHILE(1);
+
+    public final int value;
+    private TripLimit(int v) { this.value = v; }
+    private TripLimit(TripLimit e) { this.value = e.value; }
+    public TripLimit intern() { for (TripLimit e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in TripLimit enum. @see DataType */
+
+// Targeting ../nvinfer/ILoopBoundaryLayer.java
+
+
+// Targeting ../nvinfer/IRecurrenceLayer.java
+
+
+// Targeting ../nvinfer/ILoopOutputLayer.java
+
+
+// Targeting ../nvinfer/ITripLimitLayer.java
+
+
+// Targeting ../nvinfer/IIteratorLayer.java
+
+
+// Targeting ../nvinfer/ILoop.java
+
+
+// Targeting ../nvinfer/ISelectLayer.java
+
+
+
+/**
+ *  \enum FillOperation
+ * 
+ *  \brief Enumerates the tensor fill operations that may performed by a fill layer.
+ * 
+ *  @see IFillLayer
+ *  */
+@Namespace("nvinfer1") public enum FillOperation {
+    /** Generate evenly spaced numbers over a specified interval. */
+    kLINSPACE(0),
+    /** Generate a tensor with random values drawn from a uniform distribution. */
+    kRANDOM_UNIFORM(1);
+
+    public final int value;
+    private FillOperation(int v) { this.value = v; }
+    private FillOperation(FillOperation e) { this.value = e.value; }
+    public FillOperation intern() { for (FillOperation e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in FillOperation enum. @see FillOperation */
+
+// Targeting ../nvinfer/IFillLayer.java
+
+
+// Targeting ../nvinfer/IQuantizeLayer.java
+
+
+// Targeting ../nvinfer/IDequantizeLayer.java
+
+
+// Targeting ../nvinfer/INetworkDefinition.java
+
+
+
+/**
+ *  enum CalibrationAlgoType
+ * 
+ *  \brief Version of calibration algorithm to use.
+ *  */
+@Namespace("nvinfer1") public enum CalibrationAlgoType {
+    kLEGACY_CALIBRATION(0),
+    kENTROPY_CALIBRATION(1),
+    kENTROPY_CALIBRATION_2(2),
+    kMINMAX_CALIBRATION(3);
+
+    public final int value;
+    private CalibrationAlgoType(int v) { this.value = v; }
+    private CalibrationAlgoType(CalibrationAlgoType e) { this.value = e.value; }
+    public CalibrationAlgoType intern() { for (CalibrationAlgoType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in CalibrationAlgoType enum. @see DataType */
+
+// Targeting ../nvinfer/IInt8Calibrator.java
+
+
+// Targeting ../nvinfer/IInt8EntropyCalibrator.java
+
+
+// Targeting ../nvinfer/IInt8EntropyCalibrator2.java
+
+
+// Targeting ../nvinfer/IInt8MinMaxCalibrator.java
+
+
+// Targeting ../nvinfer/IInt8LegacyCalibrator.java
+
+
+// Targeting ../nvinfer/IAlgorithmIOInfo.java
+
+
+// Targeting ../nvinfer/IAlgorithmVariant.java
+
+
+// Targeting ../nvinfer/IAlgorithmContext.java
+
+
+// Targeting ../nvinfer/IAlgorithm.java
+
+
+// Targeting ../nvinfer/IAlgorithmSelector.java
+
+
+
+/**
+ *  \brief Represents one or more QuantizationFlag values using binary OR
+ *  operations.
+ * 
+ *  @see IBuilderConfig::getQuantizationFlags(), IBuilderConfig::setQuantizationFlags()
+ *  */
+
+
+//!
+//!
+//!
+//!
+
+/**
+ *  \enum QuantizationFlag
+ * 
+ *  \brief List of valid flags for quantizing the network to int8
+ * 
+ *  @see IBuilderConfig::setQuantizationFlag(), IBuilderConfig::getQuantizationFlag()
+ *  */
+@Namespace("nvinfer1") public enum QuantizationFlag {
+    /** IInt8EntropyCalibrator. We always run int8 calibration pass before layer fusion for
+     *  IInt8MinMaxCalibrator and IInt8EntropyCalibrator2. Disabled by default. */
+    kCALIBRATE_BEFORE_FUSION(0);
+
+    public final int value;
+    private QuantizationFlag(int v) { this.value = v; }
+    private QuantizationFlag(QuantizationFlag e) { this.value = e.value; }
+    public QuantizationFlag intern() { for (QuantizationFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of quantization flags in QuantizationFlag enum. @see QuantizationFlag */
+
+
+/**
+ *  \brief Represents one or more QuantizationFlag values using binary OR
+ *  operations, e.g., 1U << BuilderFlag::kFP16 | 1U << BuilderFlag::kDEBUG.
+ * 
+ *  @see IBuilderConfig::getFlags(), ITensor::setFlags(),
+ *  */
+
+
+//!
+//!
+//!
+//!
+
+/**
+ *  \enum BuilderFlag
+ * 
+ *  \brief List of valid modes that the builder can enable when creating an engine from a network definition.
+ * 
+ *  @see IBuilderConfig::setFlag(), IBuilderConfig::getFlag()
+ *  */
+@Namespace("nvinfer1") public enum BuilderFlag {
+    /** Enable FP16 layer selection, with FP32 fallback. */
+    kFP16(0),
+    /** Enable Int8 layer selection, with FP32 fallback with FP16 fallback if kFP16 also specified. */
+    kINT8(1),
+    /** Enable debugging of layers via synchronizing after every layer. */
+    kDEBUG(2),
+    /** Enable layers marked to execute on GPU if layer cannot execute on DLA. */
+    kGPU_FALLBACK(3),
+    /** Enables strict type constraints. */
+    kSTRICT_TYPES(4),
+    /** Enable building a refittable engine. */
+    kREFIT(5),
+    /** Disable reuse of timing information across identical layers. */
+    kDISABLE_TIMING_CACHE(6),
+
+    /** Allow (but not require) computations on tensors of type DataType::kFLOAT to use TF32.
+     *  TF32 computes inner products by rounding the inputs to 10-bit mantissas before
+     *  multiplying, but accumulates the sum using 23-bit mantissas. Enabled by default. */
+    kTF32(7),
+
+    /** Allow the builder to examine weights and use optimized functions when weights have suitable sparsity. */
+    
+//!
+    kSPARSE_WEIGHTS(8),
+
+    /** Change the allowed parameters in the EngineCapability::kSTANDARD flow to
+     *  match the restrictions that EngineCapability::kSAFETY check against for DeviceType::kGPU
+     *  and EngineCapability::kDLA_STANDALONE check against the DeviceType::kDLA case. This flag
+     *  is forced to true if EngineCapability::kSAFETY at build time if it is unset.
+     *  */
+    kSAFETY_SCOPE(9);
+
+    public final int value;
+    private BuilderFlag(int v) { this.value = v; }
+    private BuilderFlag(BuilderFlag e) { this.value = e.value; }
+    public BuilderFlag intern() { for (BuilderFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of builder flags in BuilderFlag enum. @see BuilderFlag */
+
+
+/**
+ *  \enum ProfilingVerbosity
+ * 
+ *  \brief List of verbosity levels of layer information exposed in NVTX annotations.
+ * 
+ *  @see IBuilderConfig::setProfilingVerbosity(),
+ *       IBuilderConfig::getProfilingVerbosity()
+ *  */
+@Namespace("nvinfer1") public enum ProfilingVerbosity {
+    /** Register layer names in NVTX message field. */
+    kDEFAULT(0),
+    /** Turn off NVTX traces. */
+    kNONE(1),
+    /** Register layer names in NVTX message field and register layer detail in NVTX JSON payload field. */
+    kVERBOSE(2);
+
+    public final int value;
+    private ProfilingVerbosity(int v) { this.value = v; }
+    private ProfilingVerbosity(ProfilingVerbosity e) { this.value = e.value; }
+    public ProfilingVerbosity intern() { for (ProfilingVerbosity e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of profile verbosity levels in ProfilingVerbosity enum. @see ProfilingVerbosity */
+
+// Targeting ../nvinfer/ITimingCache.java
+
+
+// Targeting ../nvinfer/IBuilderConfig.java
+
+
+
+/** \brief Represents one or more NetworkDefinitionCreationFlag flags
+ *  using binary OR operations.
+ *   e.g., 1U << NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
+ * 
+ *  @see IBuilder::createNetworkV2
+ *  */
+
+//!
+//!
+//!
+
+/** \enum NetworkDefinitionCreationFlag
+ * 
+ *  \brief List of immutable network properties expressed at network creation time.
+ *  NetworkDefinitionCreationFlag is used with createNetworkV2 to specify immutable properties of the network.
+ *  The createNetwork() function always had an implicit batch dimension being specified by the
+ *  maxBatchSize builder parameter. createNetworkV2 with kDEFAULT flag mimics that behaviour.
+ * 
+ *  @see IBuilder::createNetworkV2
+ *  */
+@Namespace("nvinfer1") public enum NetworkDefinitionCreationFlag {
+    /** Dynamic shape support requires that the kEXPLICIT_BATCH flag is set.
+     *  With dynamic shapes, any of the input dimensions can vary at run-time,
+     *  and there are no implicit dimensions in the network specification. This is specified by using the
+     *  wildcard dimension value -1. */
+    /** Mark the network to be an explicit batch network */
+    kEXPLICIT_BATCH(0),
+
+    /** Setting the network to be an explicit precision network has the following implications:
+     *  1) Precision of all input tensors to the network have to be specified with ITensor::setType() function
+     *  2) Precision of all layer output tensors in the network have to be specified using ILayer::setOutputType()
+     *  function
+     *  3) The builder will not quantize the weights of any layer including those running in lower precision(INT8). It
+     *  will
+     *  simply cast the weights into the required precision.
+     *  4) Dynamic ranges must not be provided to run the network in int8 mode. Dynamic ranges of each tensor in the
+     *  explicit
+     *  precision network is [-127,127].
+     *  5) Quantizing and dequantizing activation values between higher (FP32) and lower (INT8) precision
+     *  will be performed using explicit Scale layers with input/output precision set appropriately. */
+    kEXPLICIT_PRECISION(1);/** <-- Deprecated, used for backward compatibility */
+
+    public final int value;
+    private NetworkDefinitionCreationFlag(int v) { this.value = v; }
+    private NetworkDefinitionCreationFlag(NetworkDefinitionCreationFlag e) { this.value = e.value; }
+    public NetworkDefinitionCreationFlag intern() { for (NetworkDefinitionCreationFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Maximum number of elements in NetworkDefinitionCreationFlag enum. @see NetworkDefinitionCreationFlag */
+
+// Targeting ../nvinfer/IBuilder.java
+
+
+
+ // namespace nvinfer1
+
+/**
+ *  Internal C entry point for creating IBuilder.
+ *  \private
+ *  */
+public static native @NoException(true) Pointer createInferBuilder_INTERNAL(Pointer logger, int version);
+
+/**
+ *  \brief Create an instance of an IBuilder class.
+ * 
+ *  This is the logging class for the builder.
+ * 
+ *  unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.
+ *  */
+@Namespace("nvinfer1") public static native @NoException(true) IBuilder createInferBuilder(@ByRef ILogger logger);
+
+ // namespace
+ // namespace nvinfer1
+
+// #endif // NV_INFER_H
+
+
+// Parsed from NvInferImpl.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_IMPL_H
+// #define NV_INFER_IMPL_H
+
+// #include "NvInferLegacyDims.h"
+// #include "NvInferRuntimeCommon.h"
+// Targeting ../nvinfer/IPlugin.java
+
+
+// Targeting ../nvinfer/IPluginExt.java
+
+
+// Targeting ../nvinfer/IPluginLayer.java
+
+
+
+/** enum class nvinfer1::ActivationType */
+;
+/** enum class nvinfer1::BuilderFlag */
+;
+/** enum class nvinfer1::CalibrationAlgoType */
+;
+/** enum class nvinfer1::DeviceType */
+;
+/** enum class nvinfer1::DimensionOperation */
+;
+/** enum class nvinfer1::ElementWiseOperation */
+;
+/** enum class nvinfer1::EngineCapability */
+;
+/** enum class nvinfer1::FillOperation */
+;
+/** enum class nvinfer1::LayerType */
+;
+/** enum class nvinfer1::LoopOutput */
+;
+/** enum class nvinfer1::MatrixOperation */
+;
+/** enum class nvinfer1::NetworkDefinitionCreationFlag */
+;
+/** enum class nvinfer1::OptProfileSelector */
+;
+/** enum class nvinfer1::PaddingMode */
+;
+/** enum class nvinfer1::PoolingType */
+;
+/** enum class nvinfer1::ProfilingVerbosity */
+;
+/** enum class nvinfer1::QuantizationFlag */
+;
+/** enum class nvinfer1::ReduceOperation */
+;
+/** enum class nvinfer1::ResizeCoordinateTransformation */
+;
+/** enum class nvinfer1::ResizeMode */
+;
+/** enum class nvinfer1::ResizeRoundMode */
+;
+/** enum class nvinfer1::ResizeSelector */
+;
+/** enum class nvinfer1::RNNDirection */
+;
+/** enum class nvinfer1::RNNGateType */
+;
+/** enum class nvinfer1::RNNInputMode */
+;
+/** enum class nvinfer1::RNNOperation */
+;
+/** enum class nvinfer1::ScaleMode */
+;
+/** enum class nvinfer1::SliceMode */
+;
+/** enum class nvinfer1::TensorLocation */
+;
+/** enum class nvinfer1::TopKOperation */
+;
+/** enum class nvinfer1::TripLimit */
+;
+/** enum class nvinfer1::UnaryOperation */
+;
+/** enum class nvinfer1::WeightsRole */
+;
+
+
+//!
+//!
+//!
+// Targeting ../nvinfer/VRoot.java
+
+
+// Targeting ../nvinfer/VHostMemory.java
+
+
+// Targeting ../nvinfer/VDimensionExpr.java
+
+
+// Targeting ../nvinfer/VExprBuilder.java
+
+
+// Targeting ../nvinfer/VRuntime.java
+
+
+// Targeting ../nvinfer/VRefitter.java
+
+
+// Targeting ../nvinfer/VOptimizationProfile.java
+
+
+// Targeting ../nvinfer/VCudaEngine.java
+
+
+// Targeting ../nvinfer/VExecutionContext.java
+
+
+// Targeting ../nvinfer/VTensor.java
+
+
+// Targeting ../nvinfer/VLayer.java
+
+
+// Targeting ../nvinfer/VConvolutionLayer.java
+
+
+// Targeting ../nvinfer/VFullyConnectedLayer.java
+
+
+// Targeting ../nvinfer/VActivationLayer.java
+
+
+// Targeting ../nvinfer/VPoolingLayer.java
+
+
+// Targeting ../nvinfer/VLRNLayer.java
+
+
+// Targeting ../nvinfer/VScaleLayer.java
+
+
+// Targeting ../nvinfer/VSoftMaxLayer.java
+
+
+// Targeting ../nvinfer/VConcatenationLayer.java
+
+
+// Targeting ../nvinfer/VDeconvolutionLayer.java
+
+
+// Targeting ../nvinfer/VElementWiseLayer.java
+
+
+// Targeting ../nvinfer/VGatherLayer.java
+
+
+// Targeting ../nvinfer/VRNNv2Layer.java
+
+
+// Targeting ../nvinfer/VPluginLayer.java
+
+
+// Targeting ../nvinfer/VPluginV2Layer.java
+
+
+// Targeting ../nvinfer/VUnaryLayer.java
+
+
+// Targeting ../nvinfer/VReduceLayer.java
+
+
+// Targeting ../nvinfer/VPaddingLayer.java
+
+
+// Targeting ../nvinfer/VShuffleLayer.java
+
+
+// Targeting ../nvinfer/VSliceLayer.java
+
+
+// Targeting ../nvinfer/VShapeLayer.java
+
+
+// Targeting ../nvinfer/VTopKLayer.java
+
+
+// Targeting ../nvinfer/VMatrixMultiplyLayer.java
+
+
+// Targeting ../nvinfer/VRaggedSoftMaxLayer.java
+
+
+// Targeting ../nvinfer/VIdentityLayer.java
+
+
+// Targeting ../nvinfer/VConstantLayer.java
+
+
+// Targeting ../nvinfer/VParametricReLULayer.java
+
+
+// Targeting ../nvinfer/VResizeLayer.java
+
+
+// Targeting ../nvinfer/VLoopBoundaryLayer.java
+
+
+// Targeting ../nvinfer/VRecurrenceLayer.java
+
+
+// Targeting ../nvinfer/VLoopOutputLayer.java
+
+
+// Targeting ../nvinfer/VTripLimitLayer.java
+
+
+// Targeting ../nvinfer/VIteratorLayer.java
+
+
+// Targeting ../nvinfer/VLoop.java
+
+
+// Targeting ../nvinfer/VSelectLayer.java
+
+
+// Targeting ../nvinfer/VFillLayer.java
+
+
+// Targeting ../nvinfer/VQuantizeLayer.java
+
+
+// Targeting ../nvinfer/VDequantizeLayer.java
+
+
+// Targeting ../nvinfer/VNetworkDefinition.java
+
+
+// Targeting ../nvinfer/VAlgorithmIOInfo.java
+
+
+// Targeting ../nvinfer/VAlgorithmVariant.java
+
+
+// Targeting ../nvinfer/VAlgorithmContext.java
+
+
+// Targeting ../nvinfer/VAlgorithm.java
+
+
+// Targeting ../nvinfer/VTimingCache.java
+
+
+// Targeting ../nvinfer/VBuilderConfig.java
+
+
+// Targeting ../nvinfer/VBuilder.java
+
+
+
+ // namespace apiv
+ // namespace nvinfer1
+
+// #endif // NV_INFER_RUNTIME_IMPL_H
+
+
+// Parsed from NvUtils.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_UTILS_H
+// #define NV_UTILS_H
+
+
+
+//!
+//!
+//!
+// #include "NvInfer.h"
+
+/**
+ *  \file NvUtils.h
+ * 
+ *  This file includes various utility functions
+ *  */
+
+/**
+ *  @param input The input weights to reshape.
+ *  @param shape The shape of the weights.
+ *  @param shapeOrder The order of the dimensions to process for the output.
+ *  @param data The location where the output data is placed.
+ *  @param nbDims The number of dimensions to process.
+ * 
+ *  \brief Reformat the input weights of the given shape based on the new
+ *  order of dimensions.
+ * 
+ *  Take the weights specified by \p input with the dimensions specified by
+ *  \p shape and re-order the weights based on the new dimensions specified
+ *  by \p shapeOrder. The size of each dimension and the input data is not
+ *  modified. The output volume pointed to by \p data must be the same as
+ *  he \p input volume.
+ * 
+ *  Example usage:
+ *  float *out = new float[N*C*H*W];
+ *  Weights input{DataType::kFLOAT, {0 ... N*C*H*W-1}, N*C*H*W size};
+ *  int32_t order[4]{1, 0, 3, 2};
+ *  int32_t shape[4]{C, N, W, H};
+ *  reshapeWeights(input, shape, order, out, 4);
+ *  Weights reshaped{input.type, out, input.count};
+ * 
+ *  Input Matrix{3, 2, 3, 2}:
+ *  { 0  1}, { 2  3}, { 4  5} <-- {0, 0, *, *}
+ *  { 6  7}, { 8  9}, {10 11} <-- {0, 1, *, *}
+ *  {12 13}, {14 15}, {16 17} <-- {1, 0, *, *}
+ *  {18 19}, {20 21}, {22 23} <-- {1, 1, *, *}
+ *  {24 25}, {26 27}, {28 29} <-- {2, 0, *, *}
+ *  {30 31}, {32 33}, {34 35} <-- {2, 1, *, *}
+ * 
+ *  Output Matrix{2, 3, 2, 3}:
+ *  { 0  2  4}, { 1  3  5} <-- {0, 0, *, *}
+ *  {12 14 16}, {13 15 17} <-- {0, 1, *, *}
+ *  {24 26 28}, {25 27 29} <-- {0, 2, *, *}
+ *  { 6  8 10}, { 7  9 11} <-- {1, 0, *, *}
+ *  {18 20 22}, {19 21 23} <-- {1, 1, *, *}
+ *  {30 32 34}, {31 33 35} <-- {1, 2, *, *}
+ * 
+ *  @return True on success, false on failure.
+ * 
+ *  \warning This file will be removed in TensorRT 10.0.
+ *  */
+
+
+//!
+//!
+//!
+//!
+//!
+//!
+//!
+//!
+//!
+//!
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
+    @Const @ByRef Weights input, @Const IntPointer shape, @Const IntPointer shapeOrder, Pointer data, int nbDims);
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
+    @Const @ByRef Weights input, @Const IntBuffer shape, @Const IntBuffer shapeOrder, Pointer data, int nbDims);
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
+    @Const @ByRef Weights input, @Const int[] shape, @Const int[] shapeOrder, Pointer data, int nbDims);
+
+/**
+ *  @param input The input data to re-order.
+ *  @param order The new order of the data sub-buffers.
+ *  @param num The number of data sub-buffers to re-order.
+ *  @param size The size of each data sub-buffer in bytes.
+ * 
+ *  \brief Takes an input stream and re-orders \p num chunks of the data
+ *  given the \p size and \p order.
+ * 
+ *  In some frameworks, the ordering of the sub-buffers within a dimension
+ *  is different than the way that TensorRT expects them.
+ *  TensorRT expects the gate/bias sub-buffers for LSTM's to be in fico order.
+ *  TensorFlow however formats the sub-buffers in icfo order.
+ *  This helper function solves this in a generic fashion.
+ * 
+ *  Example usage output of reshapeWeights above:
+ *  int32_t indir[1]{1, 0}
+ *  int32_t stride = W*H;
+ *  for (int32_t x = 0, y = N*C; x < y; ++x)
+ *  reorderSubBuffers(out + x * stride, indir, H, W);
+ * 
+ *  Input Matrix{2, 3, 2, 3}:
+ *  { 0  2  4}, { 1  3  5} <-- {0, 0, *, *}
+ *  {12 14 16}, {13 15 17} <-- {0, 1, *, *}
+ *  {24 26 28}, {25 27 29} <-- {0, 2, *, *}
+ *  { 6  8 10}, { 7  9 11} <-- {1, 0, *, *}
+ *  {18 20 22}, {19 21 23} <-- {1, 1, *, *}
+ *  {30 32 34}, {31 33 35} <-- {1, 2, *, *}
+ * 
+ *  Output Matrix{2, 3, 2, 3}:
+ *  { 1  3  5}, { 0  2  4} <-- {0, 0, *, *}
+ *  {13 15 17}, {12 14 16} <-- {0, 1, *, *}
+ *  {25 27 29}, {24 26 28} <-- {0, 2, *, *}
+ *  { 7  9 11}, { 6  8 10} <-- {1, 0, *, *}
+ *  {19 21 23}, {18 20 22} <-- {1, 1, *, *}
+ *  {31 33 35}, {30 32 34} <-- {1, 2, *, *}
+ * 
+ *  @return True on success, false on failure.
+ * 
+ *  @see reshapeWeights()
+ * 
+ *  \warning This file will be removed in TensorRT 10.0.
+ *  */
+
+
+//!
+//!
+//!
+//!
+//!
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
+    Pointer input, @Const IntPointer order, int num, int size);
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
+    Pointer input, @Const IntBuffer order, int num, int size);
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
+    Pointer input, @Const int[] order, int num, int size);
+
+/**
+ *  @param input The input data to transpose.
+ *  @param type The type of the data to transpose.
+ *  @param num The number of data sub-buffers to transpose.
+ *  @param height The size of the height dimension to transpose.
+ *  @param width The size of the width dimension to transpose.
+ * 
+ *  \brief Transpose \p num sub-buffers of \p height * \p width.
+ * 
+ *  @return True on success, false on failure.
+ * 
+ *  \warning This file will be removed in TensorRT 10.0.
+ *  */
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean transposeSubBuffers(
+    Pointer input, DataType type, int num, int height, int width);
+@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean transposeSubBuffers(
+    Pointer input, @Cast("nvinfer1::DataType") int type, int num, int height, int width);
+
+ // namespace utils
+ // namespace nvinfer1
+// #endif // NV_UTILS_H
+
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
new file mode 100644
index 00000000000..4fed32f0113
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
@@ -0,0 +1,391 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.global;
+
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+public class nvinfer_plugin extends org.bytedeco.tensorrt.presets.nvinfer_plugin {
+    static { Loader.load(); }
+
+// Parsed from NvInferPlugin.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_PLUGIN_H
+// #define NV_INFER_PLUGIN_H
+
+// #include "NvInfer.h"
+
+//!
+//!
+//!
+// #include "NvInferPluginUtils.h"
+/**
+ *  \file NvInferPlugin.h
+ * 
+ *  This is the API for the Nvidia provided TensorRT plugins.
+ *  */
+    /**
+     *  \brief Create a plugin layer that fuses the RPN and ROI pooling using user-defined parameters.
+     *  Registered plugin type "RPROI_TRT". Registered plugin version "1".
+     *  @param featureStride Feature stride.
+     *  @param preNmsTop Number of proposals to keep before applying NMS.
+     *  @param nmsMaxOut Number of remaining proposals after applying NMS.
+     *  @param iouThreshold IoU threshold.
+     *  @param minBoxSize Minimum allowed bounding box size before scaling.
+     *  @param spatialScale Spatial scale between the input image and the last feature map.
+     *  @param pooling Spatial dimensions of pooled ROIs.
+     *  @param anchorRatios Aspect ratios for generating anchor windows.
+     *  @param anchorScales Scales for generating anchor windows.
+     * 
+     *  @return Returns a FasterRCNN fused RPN+ROI pooling plugin. Returns nullptr on invalid inputs.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createRPNROIPlugin(int featureStride, int preNmsTop, int nmsMaxOut,
+            float iouThreshold, float minBoxSize, float spatialScale, @ByVal DimsHW pooling,
+            @ByVal Weights anchorRatios, @ByVal Weights anchorScales);
+
+    /**
+     *  \brief The Normalize plugin layer normalizes the input to have L2 norm of 1 with scale learnable.
+     *  Registered plugin type "Normalize_TRT". Registered plugin version "1".
+     *  @param scales Scale weights that are applied to the output tensor.
+     *  @param acrossSpatial Whether to compute the norm over adjacent channels (acrossSpatial is true) or nearby
+     *  spatial locations (within channel in which case acrossSpatial is false).
+     *  @param channelShared Whether the scale weight(s) is shared across channels.
+     *  @param eps Epsilon for not dividing by zero.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createNormalizePlugin(
+            @Const Weights scales, @Cast("bool") boolean acrossSpatial, @Cast("bool") boolean channelShared, float eps);
+
+    /**
+     *  \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
+     *  dimensions (H x W). PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
+     *  Registered plugin type "PriorBox_TRT". Registered plugin version "1".
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createPriorBoxPlugin(@ByVal PriorBoxParameters param);
+
+    /**
+     *  \brief The Grid Anchor Generator plugin layer generates the prior boxes of
+     *  designated sizes and aspect ratios across all dimensions (H x W) for all feature maps.
+     *  GridAnchorParameters defines a set of parameters for creating the GridAnchorGenerator plugin layer.
+     *  Registered plugin type "GridAnchor_TRT". Registered plugin version "1".
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createAnchorGeneratorPlugin(
+            GridAnchorParameters param, int numLayers);
+
+    /**
+     *  \brief The DetectionOutput plugin layer generates the detection output based on location and confidence
+     *  predictions by doing non maximum suppression. DetectionOutputParameters defines a set of parameters for creating
+     *  the DetectionOutput plugin layer. Registered plugin type "NMS_TRT". Registered plugin version "1".
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createNMSPlugin(@ByVal DetectionOutputParameters param);
+
+    /**
+     *  \brief The Reorg plugin reshapes input of shape CxHxW into a (C*stride*stride)x(H/stride)x(W/stride) shape, used
+     *  in YOLOv2. It does that by taking 1 x stride x stride slices from tensor and flattening them into
+     *  (stride x stride) x 1 x 1 shape. Registered plugin type "Reorg_TRT". Registered plugin version "1".
+     *  @param stride Strides in H and W, it should divide both H and W. Also stride * stride should be less than or equal to C.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createReorgPlugin(int stride);
+
+    /**
+     *  \brief The Region plugin layer performs region proposal calculation: generate 5 bounding boxes per cell (for
+     *  yolo9000, generate 3 bounding boxes per cell). For each box, calculating its probablities of objects detections
+     *  from 80 pre-defined classifications (yolo9000 has 9416 pre-defined classifications, and these 9416 items are
+     *  organized as work-tree structure). RegionParameters defines a set of parameters for creating the Region plugin
+     *  layer. Registered plugin type "Region_TRT". Registered plugin version "1".
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public static native IPluginV2 createRegionPlugin(@ByVal RegionParameters params);
+
+    /**
+     *  \brief The BatchedNMS Plugin performs non_max_suppression on the input boxes, per batch, across all classes.
+     *  It greedily selects a subset of bounding boxes in descending order of
+     *  score. Prunes away boxes that have a high intersection-over-union (IOU)
+     *  overlap with previously selected boxes. Bounding boxes are supplied as [y1, x1, y2, x2],
+     *  where (y1, x1) and (y2, x2) are the coordinates of any
+     *  diagonal pair of box corners and the coordinates can be provided as normalized
+     *  (i.e., lying in the interval [0, 1]) or absolute.
+     *  The plugin expects two inputs.
+     *  Input0 is expected to be 4-D float boxes tensor of shape [batch_size, num_boxes,
+     *  q, 4], where q can be either 1 (if shareLocation is true) or num_classes.
+     *  Input1 is expected to be a 3-D float scores tensor of shape [batch_size, num_boxes, num_classes]
+     *  representing a single score corresponding to each box.
+     *  The plugin returns four outputs.
+     *  num_detections : A [batch_size] int32 tensor indicating the number of valid
+     *  detections per batch item. Can be less than keepTopK. Only the top num_detections[i] entries in
+     *  nmsed_boxes[i], nmsed_scores[i] and nmsed_classes[i] are valid.
+     *  nmsed_boxes : A [batch_size, max_detections, 4] float32 tensor containing
+     *  the co-ordinates of non-max suppressed boxes.
+     *  nmsed_scores : A [batch_size, max_detections] float32 tensor containing the
+     *  scores for the boxes.
+     *  nmsed_classes :  A [batch_size, max_detections] float32 tensor containing the
+     *  classes for the boxes.
+     * 
+     *  Registered plugin type "BatchedNMS_TRT". Registered plugin version "1".
+     * 
+     *  The batched NMS plugin can require a lot of workspace due to intermediate buffer usage. To get the
+     *  estimated workspace size for the plugin for a batch size, use the API {@code plugin->getWorkspaceSize(batchSize)}.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createBatchedNMSPlugin(@ByVal NMSParameters param);
+
+    /**
+     *  \brief The Split Plugin performs a split operation on the input tensor. It
+     *  splits the input tensor into several output tensors, each of a length corresponding to output_lengths.
+     *  The split occurs along the axis specified by axis.
+     *  @param axis The axis to split on.
+     *  @param output_lengths The lengths of the output tensors.
+     *  @param noutput The number of output tensors.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createSplitPlugin(int axis, IntPointer output_lengths, int noutput);
+    public static native IPluginV2 createSplitPlugin(int axis, IntBuffer output_lengths, int noutput);
+    public static native IPluginV2 createSplitPlugin(int axis, int[] output_lengths, int noutput);
+
+    /**
+     *  \brief The Instance Normalization Plugin computes the instance normalization of an input tensor.
+     *  The instance normalization is calculated as found in the paper https://arxiv.org/abs/1607.08022.
+     *  The calculation is y = scale * (x - mean) / sqrt(variance + epsilon) + bias where mean and variance
+     *  are computed per instance per channel.
+     *  @param epsilon The epsilon value to use to avoid division by zero.
+     *  @param scale_weights The input 1-dimensional scale weights of size C to scale.
+     *  @param bias_weights The input 1-dimensional bias weights of size C to offset.
+     *  */
+    
+    
+    //!
+    //!
+    public static native IPluginV2 createInstanceNormalizationPlugin(
+            float epsilon, @ByVal Weights scale_weights, @ByVal Weights bias_weights);
+
+    /**
+     *  \brief Initialize and register all the existing TensorRT plugins to the Plugin Registry with an optional
+     *  namespace. The plugin library author should ensure that this function name is unique to the library. This
+     *  function should be called once before accessing the Plugin Registry.
+     *  @param logger Logger object to print plugin registration information
+     *  @param libNamespace Namespace used to register all the plugins in this library
+     *  */
+    public static native @Cast("bool") boolean initLibNvInferPlugins(Pointer logger, String libNamespace);
+    public static native @Cast("bool") boolean initLibNvInferPlugins(Pointer logger, @Cast("const char*") BytePointer libNamespace); // extern "C"
+
+// #endif // NV_INFER_PLUGIN_H
+
+
+// Parsed from NvInferPluginUtils.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_INFER_PLUGIN_UTILS_H
+// #define NV_INFER_PLUGIN_UTILS_H
+
+
+
+//!
+//!
+//!
+// #include "NvInferRuntimeCommon.h"
+
+/**
+ *  \file NvInferPluginUtils.h
+ * 
+ *  This is the API for the Nvidia provided TensorRT plugin utilities.
+ *  It lists all the parameters utilized by the TensorRT plugins.
+ *  */
+// Targeting ../nvinfer_plugin/Quadruple.java
+
+
+// Targeting ../nvinfer_plugin/PriorBoxParameters.java
+
+
+// Targeting ../nvinfer_plugin/RPROIParams.java
+
+
+// Targeting ../nvinfer_plugin/GridAnchorParameters.java
+
+
+
+/**
+ *  \enum CodeTypeSSD
+ *  \brief The type of encoding used for decoding the bounding boxes and loc_data.
+ *  */
+@Namespace("nvinfer1::plugin") public enum CodeTypeSSD {
+    /** Use box corners. */
+    CORNER(0),
+    /** Use box centers and size. */
+    CENTER_SIZE(1),
+    /** Use box centers and size. */
+    CORNER_SIZE(2),
+    /** Use box centers and size but flip x and y coordinates. */
+    TF_CENTER(3);
+
+    public final int value;
+    private CodeTypeSSD(int v) { this.value = v; }
+    private CodeTypeSSD(CodeTypeSSD e) { this.value = e.value; }
+    public CodeTypeSSD intern() { for (CodeTypeSSD e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvinfer_plugin/DetectionOutputParameters.java
+
+
+// Targeting ../nvinfer_plugin/softmaxTree.java
+
+
+// Targeting ../nvinfer_plugin/RegionParameters.java
+
+
+// Targeting ../nvinfer_plugin/NMSParameters.java
+
+
+
+ // namespace plugin
+ // namespace nvinfer1
+
+// #endif // NV_INFER_PLUGIN_UTILS_H
+
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
new file mode 100644
index 00000000000..10b0296119f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
@@ -0,0 +1,169 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.global;
+
+import org.bytedeco.tensorrt.nvonnxparser.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+public class nvonnxparser extends org.bytedeco.tensorrt.presets.nvonnxparser {
+    static { Loader.load(); }
+
+// Targeting ../nvonnxparser/SubGraphCollection_t.java
+
+
+// Targeting ../nvonnxparser/SubGraph_t.java
+
+
+// Parsed from NvOnnxParser.h
+
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+// #ifndef NV_ONNX_PARSER_H
+// #define NV_ONNX_PARSER_H
+
+// #include "NvInfer.h"
+// #include <stddef.h>
+
+
+//!
+//!
+//!
+// #include <vector>
+
+/**
+ *  \file NvOnnxParser.h
+ * 
+ *  This is the API for the ONNX Parser
+ *  */
+
+public static final int NV_ONNX_PARSER_MAJOR = 0;
+public static final int NV_ONNX_PARSER_MINOR = 1;
+public static final int NV_ONNX_PARSER_PATCH = 0;
+
+
+//!
+//!
+@MemberGetter public static native int NV_ONNX_PARSER_VERSION();
+public static final int NV_ONNX_PARSER_VERSION = NV_ONNX_PARSER_VERSION();
+
+/** \typedef SubGraph_t
+ * 
+ *  \brief The data structure containing the parsing capability of
+ *  a set of nodes in an ONNX graph.
+ *  */
+
+//!
+//!
+
+/** \typedef SubGraphCollection_t
+ * 
+ *  \brief The data structure containing all SubGraph_t partitioned
+ *  out of an ONNX graph.
+ *  */
+
+
+//!
+//!
+//!
+
+/**
+ *  \namespace nvonnxparser
+ * 
+ *  \brief The TensorRT ONNX parser API namespace
+ *  */
+
+@Namespace("nvonnxparser") public static native @Name("EnumMax<nvonnxparser::ErrorCode>") int ErrorCodeEnumMax();
+
+/** \enum ErrorCode
+ *
+ * \brief the type of parser error
+ */
+@Namespace("nvonnxparser") public enum ErrorCode {
+    kSUCCESS(0),
+    kINTERNAL_ERROR(1),
+    kMEM_ALLOC_FAILED(2),
+    kMODEL_DESERIALIZE_FAILED(3),
+    kINVALID_VALUE(4),
+    kINVALID_GRAPH(5),
+    kINVALID_NODE(6),
+    kUNSUPPORTED_GRAPH(7),
+    kUNSUPPORTED_NODE(8);
+
+    public final int value;
+    private ErrorCode(int v) { this.value = v; }
+    private ErrorCode(ErrorCode e) { this.value = e.value; }
+    public ErrorCode intern() { for (ErrorCode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvonnxparser/IParserError.java
+
+
+// Targeting ../nvonnxparser/IParser.java
+
+
+
+ // namespace nvonnxparser
+
+public static native Pointer createNvOnnxParser_INTERNAL(Pointer network, Pointer logger, int version);
+public static native int getNvOnnxParserVersion();
+
+/** \brief Create a new parser object
+ *
+ * @param network The network definition that the parser will write to
+ * @param logger The logger to use
+ * @return a new parser object or NULL if an error occurred
+ *
+ * Any input dimensions that are constant should not be changed after parsing,
+ * because correctness of the translation may rely on those constants.
+ * Changing a dynamic input dimension, i.e. one that translates to -1 in
+ * TensorRT, to a constant is okay if the constant is consistent with the model.
+ *
+ * @see IParser
+ */
+@Namespace("nvonnxparser") public static native IParser createParser(@ByRef INetworkDefinition network, @ByRef ILogger logger);
+
+ // namespace
+
+ // namespace nvonnxparser
+
+// #endif // NV_ONNX_PARSER_H
+
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
new file mode 100644
index 00000000000..2dcb64d38ed
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
@@ -0,0 +1,320 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.global;
+
+import org.bytedeco.tensorrt.nvparsers.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+public class nvparsers extends org.bytedeco.tensorrt.presets.nvparsers {
+    static { Loader.load(); }
+
+// Parsed from NvCaffeParser.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_CAFFE_PARSER_H
+// #define NV_CAFFE_PARSER_H
+
+
+
+//!
+//!
+//!
+
+//!
+//!
+//!
+// #include "NvInfer.h"
+
+/**
+ *  \file NvCaffeParser.h
+ * 
+ *  This is the API for the Caffe Parser
+ * 
+ <p>
+ * 
+ *  \namespace nvcaffeparser1
+ * 
+ *  \brief The TensorRT Caffe parser API namespace.
+ *  */
+// Targeting ../nvparsers/IBlobNameToTensor.java
+
+
+// Targeting ../nvparsers/IBinaryProtoBlob.java
+
+
+// Targeting ../nvparsers/IPluginFactoryV2.java
+
+
+// Targeting ../nvparsers/ICaffeParser.java
+
+
+
+/**
+ *  \brief Creates a ICaffeParser object.
+ * 
+ *  @return A pointer to the ICaffeParser object is returned.
+ * 
+ *  @see nvcaffeparser1::ICaffeParser
+ * 
+ *  @deprecated ICaffeParser will be removed in TensorRT 9.0. Plan to migrate your workflow to
+ *  use nvonnxparser::IParser for deployment.
+ *  */
+
+
+//!
+//!
+//!
+@Namespace("nvcaffeparser1") public static native @NoException(true) ICaffeParser createCaffeParser();
+
+/**
+ *  \brief Shuts down protocol buffers library.
+ * 
+ *  \note No part of the protocol buffers library can be used after this function is called.
+ *  */
+@Namespace("nvcaffeparser1") public static native @NoException(true) void shutdownProtobufLibrary();
+ // namespace nvcaffeparser1
+
+/**
+ *  Internal C entry point for creating ICaffeParser.
+ *  \private
+ *  */
+public static native @NoException(true) Pointer createNvCaffeParser_INTERNAL();
+// #endif
+
+
+// Parsed from NvUffParser.h
+
+/*
+ * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee.  Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users.  These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+// #ifndef NV_UFF_PARSER_H
+// #define NV_UFF_PARSER_H
+
+
+
+//!
+//!
+//!
+// #include "NvInfer.h"
+
+/**
+ *  \file NvUffParser.h
+ * 
+ *  This is the API for the UFF Parser
+ *  */
+
+// Current supported Universal Framework Format (UFF) version for the parser.
+public static final int UFF_REQUIRED_VERSION_MAJOR = 0;
+public static final int UFF_REQUIRED_VERSION_MINOR = 6;
+
+
+//!
+//!
+//!
+public static final int UFF_REQUIRED_VERSION_PATCH = 9;
+
+/**
+ *  \namespace nvuffparser
+ * 
+ *  \brief The TensorRT UFF parser API namespace.
+ *  */
+
+/**
+ *  \enum UffInputOrder
+ *  \brief The different possible supported input order.
+ *  */
+@Namespace("nvuffparser") public enum UffInputOrder {
+    /** NCHW order. */
+    kNCHW(0),
+    /** NHWC order. */
+    kNHWC(1),
+    /** NC order. */
+    kNC(2);
+
+    public final int value;
+    private UffInputOrder(int v) { this.value = v; }
+    private UffInputOrder(UffInputOrder e) { this.value = e.value; }
+    public UffInputOrder intern() { for (UffInputOrder e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/**
+ *  \enum FieldType
+ *  \brief The possible field types for custom layer.
+ *  */
+
+@Namespace("nvuffparser") public enum FieldType {
+    /** FP32 field type. */
+    kFLOAT(0),
+    /** INT32 field type. */
+    kINT32(1),
+    /** char field type. String for length>1. */
+    kCHAR(2),
+    /** nvinfer1::Dims field type. */
+    kDIMS(4),
+    /** nvinfer1::DataType field type. */
+    kDATATYPE(5),
+    kUNKNOWN(6);
+
+    public final int value;
+    private FieldType(int v) { this.value = v; }
+    private FieldType(FieldType e) { this.value = e.value; }
+    public FieldType intern() { for (FieldType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../nvparsers/FieldMap.java
+
+
+// Targeting ../nvparsers/FieldCollection.java
+
+
+// Targeting ../nvparsers/IUffParser.java
+
+
+
+/**
+ *  \brief Creates a IUffParser object.
+ * 
+ *  @return A pointer to the IUffParser object is returned.
+ * 
+ *  @see nvuffparser::IUffParser
+ * 
+ *  @deprecated IUffParser will be removed in TensorRT 9.0. Plan to migrate your workflow to
+ *  use nvonnxparser::IParser for deployment.
+ *  */
+
+
+//!
+//!
+//!
+@Namespace("nvuffparser") public static native @NoException(true) IUffParser createUffParser();
+
+/**
+ *  \brief Shuts down protocol buffers library.
+ * 
+ *  \note No part of the protocol buffers library can be used after this function is called.
+ *  */
+
+ // namespace nvuffparser
+
+/**
+ *  Internal C entry point for creating IUffParser
+ *  \private
+ *  */
+public static native @NoException(true) Pointer createNvUffParser_INTERNAL();
+
+// #endif /* !NV_UFF_PARSER_H */
+
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
new file mode 100644
index 00000000000..5a9a79efcf6
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
@@ -0,0 +1,59 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+/**
+ *  \class Dims2
+ *  \brief Descriptor for two-dimensional data.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Dims2 extends Dims32 {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Dims2(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Dims2(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public Dims2 position(long position) {
+        return (Dims2)super.position(position);
+    }
+    @Override public Dims2 getPointer(long i) {
+        return new Dims2((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Construct an empty Dims2 object.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public Dims2() { super((Pointer)null); allocate(); }
+    private native void allocate();
+
+    /**
+     *  \brief Construct a Dims2 from 2 elements.
+     * 
+     *  @param d0 The first element.
+     *  @param d1 The second element.
+     *  */
+    public Dims2(int d0, int d1) { super((Pointer)null); allocate(d0, d1); }
+    private native void allocate(int d0, int d1);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
new file mode 100644
index 00000000000..65319fcf60e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
@@ -0,0 +1,61 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class Dims3
+ *  \brief Descriptor for three-dimensional data.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Dims3 extends Dims32 {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Dims3(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Dims3(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public Dims3 position(long position) {
+        return (Dims3)super.position(position);
+    }
+    @Override public Dims3 getPointer(long i) {
+        return new Dims3((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Construct an empty Dims3 object.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public Dims3() { super((Pointer)null); allocate(); }
+    private native void allocate();
+
+    /**
+     *  \brief Construct a Dims3 from 3 elements.
+     * 
+     *  @param d0 The first element.
+     *  @param d1 The second element.
+     *  @param d2 The third element.
+     *  */
+    public Dims3(int d0, int d1, int d2) { super((Pointer)null); allocate(d0, d1, d2); }
+    private native void allocate(int d0, int d1, int d2);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
new file mode 100644
index 00000000000..5a6532071af
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
@@ -0,0 +1,58 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class Dims
+ *  \brief Structure to define the dimensions of a tensor.
+ * 
+ *  TensorRT can also return an invalid dims structure. This structure is represented by nbDims == -1
+ *  and d[i] == 0 for all d.
+ * 
+ *  TensorRT can also return an "unknown rank" dims structure. This structure is represented by nbDims == -1
+ *  and d[i] == -1 for all d.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Dims32 extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public Dims32() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Dims32(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Dims32(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public Dims32 position(long position) {
+        return (Dims32)super.position(position);
+    }
+    @Override public Dims32 getPointer(long i) {
+        return new Dims32((Pointer)this).offsetAddress(i);
+    }
+
+    /** The maximum number of dimensions supported for a tensor. */
+    @MemberGetter public static native int MAX_DIMS();
+    public static final int MAX_DIMS = MAX_DIMS();
+    /** The number of dimensions. */
+    public native int nbDims(); public native Dims32 nbDims(int setter);
+    /** The extent of each dimension. */
+    public native int d(int i); public native Dims32 d(int i, int setter);
+    @MemberGetter public native IntPointer d();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
new file mode 100644
index 00000000000..368ff810bb6
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
@@ -0,0 +1,62 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class Dims4
+ *  \brief Descriptor for four-dimensional data.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Dims4 extends Dims32 {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Dims4(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Dims4(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public Dims4 position(long position) {
+        return (Dims4)super.position(position);
+    }
+    @Override public Dims4 getPointer(long i) {
+        return new Dims4((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Construct an empty Dims4 object.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public Dims4() { super((Pointer)null); allocate(); }
+    private native void allocate();
+
+    /**
+     *  \brief Construct a Dims4 from 4 elements.
+     * 
+     *  @param d0 The first element.
+     *  @param d1 The second element.
+     *  @param d2 The third element.
+     *  @param d3 The fourth element.
+     *  */
+    public Dims4(int d0, int d1, int d2, int d3) { super((Pointer)null); allocate(d0, d1, d2, d3); }
+    private native void allocate(int d0, int d1, int d2, int d3);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
new file mode 100644
index 00000000000..76cd6e7f3ec
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
@@ -0,0 +1,50 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class DimsExprs
+ * 
+ *  Analog of class Dims with expressions instead of constants for the dimensions.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class DimsExprs extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public DimsExprs() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public DimsExprs(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public DimsExprs(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public DimsExprs position(long position) {
+        return (DimsExprs)super.position(position);
+    }
+    @Override public DimsExprs getPointer(long i) {
+        return new DimsExprs((Pointer)this).offsetAddress(i);
+    }
+
+    /** The number of dimensions. */
+    public native int nbDims(); public native DimsExprs nbDims(int setter);
+    /** The extent of each dimension. */
+    public native @Const IDimensionExpr d(int i); public native DimsExprs d(int i, IDimensionExpr setter);
+    @MemberGetter public native @Cast("const nvinfer1::IDimensionExpr**") PointerPointer d();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
new file mode 100644
index 00000000000..ddcc05fffb7
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
@@ -0,0 +1,101 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class DimsHW
+ *  \brief Descriptor for two-dimensional spatial data.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class DimsHW extends Dims2 {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public DimsHW(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public DimsHW(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public DimsHW position(long position) {
+        return (DimsHW)super.position(position);
+    }
+    @Override public DimsHW getPointer(long i) {
+        return new DimsHW((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Construct an empty DimsHW object.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public DimsHW() { super((Pointer)null); allocate(); }
+    private native void allocate();
+
+    /**
+     *  \brief Construct a DimsHW given height and width.
+     * 
+     *  @param height the height of the data
+     *  @param width the width of the data
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public DimsHW(int height, int width) { super((Pointer)null); allocate(height, width); }
+    private native void allocate(int height, int width);
+
+    /**
+     *  \brief Get the height.
+     * 
+     *  @return The height.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByRef IntPointer h();
+
+    /**
+     *  \brief Get the height.
+     * 
+     *  @return The height.
+     *  */
+
+    /**
+     *  \brief Get the width.
+     * 
+     *  @return The width.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByRef IntPointer w();
+
+    /**
+     *  \brief Get the width.
+     * 
+     *  @return The width.
+     *  */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
new file mode 100644
index 00000000000..c3728e51cd4
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
@@ -0,0 +1,53 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class DynamicPluginTensorDesc
+ * 
+ *  Summarizes tensors that a plugin might see for an input or output.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class DynamicPluginTensorDesc extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public DynamicPluginTensorDesc() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public DynamicPluginTensorDesc(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public DynamicPluginTensorDesc(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public DynamicPluginTensorDesc position(long position) {
+        return (DynamicPluginTensorDesc)super.position(position);
+    }
+    @Override public DynamicPluginTensorDesc getPointer(long i) {
+        return new DynamicPluginTensorDesc((Pointer)this).offsetAddress(i);
+    }
+
+    /** Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension. */
+    public native @ByRef PluginTensorDesc desc(); public native DynamicPluginTensorDesc desc(PluginTensorDesc setter);
+
+    /** Lower bounds on tensor’s dimensions */
+    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 min(); public native DynamicPluginTensorDesc min(Dims32 setter);
+
+    /** Upper bounds on tensor’s dimensions */
+    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 max(); public native DynamicPluginTensorDesc max(Dims32 setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
new file mode 100644
index 00000000000..95f399a78bf
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
@@ -0,0 +1,43 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+/** Maximum number of elements in DataType enum. @see DataType */
+@Name("nvinfer1::impl::EnumMaxImpl<nvinfer1::DataType>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class EnumMaxImpl extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public EnumMaxImpl() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public EnumMaxImpl(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public EnumMaxImpl(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public EnumMaxImpl position(long position) {
+        return (EnumMaxImpl)super.position(position);
+    }
+    @Override public EnumMaxImpl getPointer(long i) {
+        return new EnumMaxImpl((Pointer)this).offsetAddress(i);
+    }
+
+    // Declaration of kVALUE that represents maximum number of elements in DataType enum
+    @MemberGetter public static native int kVALUE();
+    public static final int kVALUE = kVALUE();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
new file mode 100644
index 00000000000..f6a05189adc
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
@@ -0,0 +1,115 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IActivationLayer
+ * 
+ *  \brief An Activation layer in a network definition.
+ * 
+ *  This layer applies a per-element activation function to its input.
+ * 
+ *  The output has the same shape as the input.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IActivationLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IActivationLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the type of activation to be performed.
+     * 
+     *  On the DLA, the valid activation types are kRELU, kSIGMOID, kTANH, and kCLIP.
+     * 
+     *  @see getActivationType(), ActivationType
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setActivationType(ActivationType type);
+    public native @NoException(true) void setActivationType(@Cast("nvinfer1::ActivationType") int type);
+
+    /**
+     *  \brief Get the type of activation to be performed.
+     * 
+     *  @see setActivationType(), ActivationType
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ActivationType getActivationType();
+
+    /**
+     *  \brief Set the alpha parameter (must be finite).
+     * 
+     *  This parameter is used by the following activations:
+     *  LeakyRelu, Elu, Selu, Softplus, Clip, HardSigmoid, ScaledTanh,
+     *  ThresholdedRelu.
+     * 
+     *  It is ignored by the other activations.
+     * 
+     *  @see getAlpha(), setBeta() */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAlpha(float alpha);
+
+    /**
+     *  \brief Set the beta parameter (must be finite).
+     * 
+     *  This parameter is used by the following activations:
+     *  Selu, Softplus, Clip, HardSigmoid, ScaledTanh.
+     * 
+     *  It is ignored by the other activations.
+     * 
+     *  @see getBeta(), setAlpha() */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setBeta(float beta);
+
+    /**
+     *  \brief Get the alpha parameter.
+     * 
+     *  @see getBeta(), setAlpha() */
+    
+    
+    //!
+    //!
+    public native @NoException(true) float getAlpha();
+
+    /**
+     *  \brief Get the beta parameter.
+     * 
+     *  @see getAlpha(), setBeta() */
+    public native @NoException(true) float getBeta();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
new file mode 100644
index 00000000000..51ee6acea48
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
@@ -0,0 +1,90 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IAlgorithm
+ *  \brief Describes a variation of execution of a layer.
+ *         An algorithm is represented by IAlgorithmVariant and the IAlgorithmIOInfo for each of its inputs and outputs.
+ *         An algorithm can be selected or reproduced using AlgorithmSelector::selectAlgorithms()."
+ *  @see IAlgorithmIOInfo, IAlgorithmVariant, IAlgorithmSelector::selectAlgorithms()
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IAlgorithm extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IAlgorithm(Pointer p) { super(p); }
+
+    /**
+     *  \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
+     *         followed by algorithm outputs.
+     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
+     *               and the outputs.
+     * 
+     *  @return a reference to IAlgorithmIOInfo specified by index or the first algorithm if index is out of range.
+     * 
+     *  @deprecated API will be removed in TensorRT 10.0, use IAlgorithm::getAlgorithmIOInfoByIndex instead.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Const @Deprecated @ByRef @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfo(int index);
+
+    /**
+     *  \brief Returns the algorithm variant.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Const @ByRef @NoException(true) IAlgorithmVariant getAlgorithmVariant();
+
+    /**
+     *  \brief The time in milliseconds to execute the algorithm.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) float getTimingMSec();
+
+    /**
+     *  \brief The size of the GPU temporary memory in bytes which the algorithm uses at execution time.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("std::size_t") @NoException(true) long getWorkspaceSize();
+
+    /**
+     *  \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
+     *         followed by algorithm outputs.
+     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
+     *               and the outputs.
+     * 
+     *  @return a pointer to a IAlgorithmIOInfo interface or nullptr if index is out of range.
+     *  */
+    public native @Const @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfoByIndex(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
new file mode 100644
index 00000000000..66ccf379bba
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
@@ -0,0 +1,72 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IAlgorithmContext
+ * 
+ *  \brief Describes the context and requirements, that could be fulfilled by one or more instances of IAlgorithm.
+ *  @see IAlgorithm
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IAlgorithmContext extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IAlgorithmContext(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return name of the algorithm node.
+     *  This is a unique identifier for the IAlgorithmContext.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Get the minimum / optimum / maximum dimensions for input or output tensor.
+     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
+     *               and the outputs.
+     *  @param select Which of the minimum, optimum, or maximum dimensions to be queried.
+     *  */
+    
+    
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, @Cast("nvinfer1::OptProfileSelector") int select);
+
+    /**
+     *  \brief Return number of inputs of the algorithm.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getNbInputs();
+
+    /**
+     *  \brief Return number of outputs of the algorithm.
+     *  */
+    public native @NoException(true) int getNbOutputs();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
new file mode 100644
index 00000000000..d095cc4a020
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
@@ -0,0 +1,60 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IAlgorithmIOInfo
+ * 
+ *  \brief Carries information about input or output of the algorithm.
+ *         IAlgorithmIOInfo for all the input and output along with IAlgorithmVariant denotes the variation of algorithm
+ *         and can be used to select or reproduce an algorithm using IAlgorithmSelector::selectAlgorithms().
+ *  @see IAlgorithmVariant, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IAlgorithmIOInfo extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IAlgorithmIOInfo(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return TensorFormat of the input/output of algorithm.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) TensorFormat getTensorFormat();
+
+    /**
+     *  \brief Return DataType of the input/output of algorithm.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) DataType getDataType();
+
+    /**
+     *  \brief Return strides of the input/output tensor of algorithm.
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
new file mode 100644
index 00000000000..3bf2098255b
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
@@ -0,0 +1,76 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // IAlgorithm
+
+/**
+ *  \class IAlgorithmSelector
+ * 
+ *  \brief Interface implemented by application for selecting and reporting algorithms of a layer provided by the
+ *         builder.
+ *  \note A layer in context of algorithm selection may be different from ILayer in INetworkDefiniton.
+ *        For example, an algorithm might be implementing a conglomeration of multiple ILayers in INetworkDefinition.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IAlgorithmSelector extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IAlgorithmSelector(Pointer p) { super(p); }
+
+    /**
+     *  \brief Select Algorithms for a layer from the given list of algorithm choices.
+     * 
+     *  @return The number of choices selected from [0, nbChoices-1].
+     *  @param context The context for which the algorithm choices are valid.
+     *  @param choices The list of algorithm choices to select for implementation of this layer.
+     *  @param nbChoices Number of algorithm choices.
+     *  @param selection The user writes indices of selected choices in to selection buffer which is of size nbChoices.
+     * 
+     *  \note TensorRT uses its default algorithm selection to choose from the list provided.
+     *        If return value is 0, TensorRT’s default algorithm selection is used unless strict type constraints are
+     *        set. The list of choices is valid only for this specific algorithm context.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Cast("const nvinfer1::IAlgorithm*const*") PointerPointer choices,
+            int nbChoices, IntPointer selection);
+    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
+            int nbChoices, IntPointer selection);
+    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
+            int nbChoices, IntBuffer selection);
+    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
+            int nbChoices, int[] selection);
+    /**
+     *  \brief Called by TensorRT to report choices it made.
+     * 
+     *  \note For a given optimization profile, this call comes after all calls to selectAlgorithms.
+     *  algoChoices[i] is the choice that TensorRT made for algoContexts[i], for i in [0, nbAlgorithms-1]
+     * 
+     *  @param algoContexts The list of all algorithm contexts.
+     *  @param algoChoices The list of algorithm choices made by TensorRT
+     *  @param nbAlgorithms The size of algoContexts as well as algoChoices.
+     *  */
+    public native @NoException(true) void reportAlgorithms(@Cast("const nvinfer1::IAlgorithmContext*const*") PointerPointer algoContexts, @Cast("const nvinfer1::IAlgorithm*const*") PointerPointer algoChoices,
+            int nbAlgorithms);
+    public native @NoException(true) void reportAlgorithms(@Const @ByPtrPtr IAlgorithmContext algoContexts, @Const @ByPtrPtr IAlgorithm algoChoices,
+            int nbAlgorithms);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
new file mode 100644
index 00000000000..b3cd1c32538
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
@@ -0,0 +1,52 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IAlgorithmVariant
+ * 
+ *  \brief provides a unique 128-bit identifier, which along with the input and output information
+ *         denotes the variation of algorithm and can be used to select or reproduce an algorithm,
+ *         using IAlgorithmSelector::selectAlgorithms()
+ *  @see IAlgorithmIOInfo, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
+ *  \note A single implementation can have multiple tactics.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IAlgorithmVariant extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IAlgorithmVariant(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return implementation of the algorithm.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("int64_t") @NoException(true) long getImplementation();
+
+    /**
+     *  \brief Return tactic of the algorithm.
+     *  */
+    public native @Cast("int64_t") @NoException(true) long getTactic();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
new file mode 100644
index 00000000000..29aeb415fd7
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
@@ -0,0 +1,325 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IBuilder
+ * 
+ *  \brief Builds an engine from a network definition.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IBuilder extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IBuilder() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IBuilder(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IBuilder(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IBuilder position(long position) {
+        return (IBuilder)super.position(position);
+    }
+    @Override public IBuilder getPointer(long i) {
+        return new IBuilder((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Set the maximum batch size.
+     * 
+     *  @param batchSize The maximum batch size which can be used at execution time, and also the batch size for which
+     *  the engine will be optimized.
+     * 
+     *  @see getMaxBatchSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMaxBatchSize(int batchSize);
+
+    /**
+     *  \brief Get the maximum batch size.
+     * 
+     *  @return The maximum batch size.
+     * 
+     *  @see setMaxBatchSize()
+     *  @see getMaxDLABatchSize()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getMaxBatchSize();
+
+    /**
+     *  \brief Determine whether the platform has fast native fp16.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
+
+    /**
+     *  \brief Determine whether the platform has fast native int8.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
+
+    /**
+     *  \brief Destroy this object.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Get the maximum batch size DLA can support.
+     *  For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested
+     *  batch size should not exceed the value returned by this function.
+     * 
+     *  \warning getMaxDLABatchSize does not work with dynamic shapes.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getMaxDLABatchSize();
+
+    /**
+     *  \brief Return the number of DLA engines available to this builder.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbDLACores();
+
+    /**
+     *  \brief Set the GPU allocator.
+     *  @param allocator Set the GPU allocator to be used by the builder. All GPU memory acquired will use this
+     *  allocator. If NULL is passed, the default allocator will be used.
+     * 
+     *  Default: uses cudaMalloc/cudaFree.
+     * 
+     *  \note This allocator will be passed to any engines created via the builder; thus the lifetime of the allocator
+     *  must span the lifetime of those engines as
+     *  well as that of the builder. If nullptr is passed, the default allocator will be used.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
+
+    /**
+     *  \brief Create a builder configuration object.
+     * 
+     *  @see IBuilderConfig
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IBuilderConfig createBuilderConfig();
+
+    /**
+     *  \brief Builds an engine for the given INetworkDefinition and given IBuilderConfig.
+     * 
+     *  It enables the builder to build multiple engines based on the same network definition, but with different
+     *  builder configurations.
+     * 
+     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
+     * 
+     *  @deprecated API will be removed in TensorRT 10.0, use IBuilder::buildSerializedNetwork instead.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) ICudaEngine buildEngineWithConfig(
+            @ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
+
+    /** \brief Create a network definition object
+     * 
+     *  Creates a network definition object with immutable properties specified using the flags parameter. Providing
+     *  the kDEFAULT flag as parameter mimics the behaviour of createNetwork(). CreateNetworkV2 supports dynamic shapes
+     *  and explicit batch dimensions when used with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
+     * 
+     *  @param flags Bitset of NetworkDefinitionCreationFlags specifying network properties combined with bitwise OR.
+     *              e.g., 1U << NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
+     * 
+     *  @see INetworkDefinition, NetworkDefinitionCreationFlags
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) INetworkDefinition createNetworkV2(@Cast("nvinfer1::NetworkDefinitionCreationFlags") int flags);
+
+    /** \brief Create a new optimization profile.
+     * 
+     *  If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made.
+     *  Likewise, if there are any shape input tensors, the appropriate calls to setShapeValues() are required.
+     *  The builder retains ownership of the created optimization profile and returns a raw pointer, i.e. the users
+     *  must not attempt to delete the returned pointer.
+     * 
+     *  @see IOptimizationProfile
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IOptimizationProfile createOptimizationProfile();
+
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class.
+     *  A nullptr will be returned if setErrorRecorder has not been called.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Resets the builder state to default values.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void reset();
+
+    /**
+     *  \brief Determine whether the platform has TF32 support.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean platformHasTf32();
+
+    /**
+     *  \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
+     * 
+     *  This function allows building and serialization of a network without creating an engine.
+     * 
+     *  @param network Network definition.
+     *  @param config Builder configuration.
+     * 
+     *  @return A pointer to a IHostMemory object that contains a serialized network.
+     * 
+     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
+     * 
+     *  @see INetworkDefinition, IBuilderConfig, IHostMemory
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
+
+    /**
+     *  \brief Checks that a network is within the scope of the IBuilderConfig settings.
+     * 
+     *  @param network The network definition to check for configuration compliance.
+     *  @param config The configuration of the builder to use when checking \p network.
+     * 
+     *  Given an INetworkDefinition, \p network, and an IBuilderConfig, \p config, check if
+     *  the network falls within the constraints of the builder configuration based on the
+     *  EngineCapability, BuilderFlag, and DeviceType. If the network is within the constraints,
+     *  then the function returns true, and false if a violation occurs. This function reports
+     *  the conditions that are violated to the registered ErrorRecorder.
+     * 
+     *  @return True if network is within the scope of the restrictions specified by the builder config,
+     *  false otherwise.
+     * 
+     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
+     *  */
+    public native @Cast("bool") @NoException(true) boolean isNetworkSupported(@Const @ByRef INetworkDefinition network, @Const @ByRef IBuilderConfig config);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
new file mode 100644
index 00000000000..18193a4675a
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
@@ -0,0 +1,769 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IBuilderConfig
+ * 
+ *  \brief Holds properties for configuring a builder to produce an engine. @see BuilderFlags
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IBuilderConfig extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IBuilderConfig() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IBuilderConfig(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IBuilderConfig(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IBuilderConfig position(long position) {
+        return (IBuilderConfig)super.position(position);
+    }
+    @Override public IBuilderConfig getPointer(long i) {
+        return new IBuilderConfig((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Set the number of minimization iterations used when timing layers.
+     * 
+     *  When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
+     *  controls the number of iterations used in minimization. The builder may sometimes run layers for more
+     *  iterations to improve timing accuracy if this parameter is set to a small value and the runtime of the
+     *  layer is short.
+     * 
+     *  @see getMinTimingIterations()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMinTimingIterations(int minTiming);
+
+    /**
+     *  \brief Query the number of minimization iterations.
+     * 
+     *  By default the minimum number of iterations is 2.
+     * 
+     *  @see setMinTimingIterations()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getMinTimingIterations();
+
+    /**
+     *  \brief Set the number of averaging iterations used when timing layers.
+     * 
+     *  When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
+     *  controls the number of iterations used in averaging.
+     * 
+     *  @see getAvgTimingIterations()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAvgTimingIterations(int avgTiming);
+
+    /**
+     *  \brief Query the number of averaging iterations.
+     * 
+     *  By default the number of averaging iterations is 1.
+     * 
+     *  @see setAvgTimingIterations()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getAvgTimingIterations();
+
+    /**
+     *  \brief Configure the builder to target specified EngineCapability flow.
+     * 
+     *  The flow means a sequence of API calls that allow an application to set up a runtime, engine,
+     *  and execution context in order to run inference.
+     * 
+     *  The supported flows are specified in the EngineCapability enum.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setEngineCapability(EngineCapability capability);
+    public native @NoException(true) void setEngineCapability(@Cast("nvinfer1::EngineCapability") int capability);
+
+    /**
+     *  \brief Query EngineCapability flow configured for the builder.
+     * 
+     *  By default it returns EngineCapability::kSTANDARD.
+     * 
+     *  @see setEngineCapability()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) EngineCapability getEngineCapability();
+
+    /**
+     *  \brief Set Int8 Calibration interface.
+     * 
+     *  The calibrator is to minimize the information loss during the INT8 quantization process.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setInt8Calibrator(IInt8Calibrator calibrator);
+
+    /**
+     *  \brief Get Int8 Calibration interface.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IInt8Calibrator getInt8Calibrator();
+
+    /**
+     *  \brief Set the maximum workspace size.
+     * 
+     *  @param workspaceSize The maximum GPU temporary memory which the engine can use at execution time.
+     * 
+     *  @see getMaxWorkspaceSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMaxWorkspaceSize(@Cast("std::size_t") long workspaceSize);
+
+    /**
+     *  \brief Get the maximum workspace size.
+     * 
+     *  By default the workspace size is 0, which means there is no temporary memory.
+     * 
+     *  @return The maximum workspace size.
+     * 
+     *  @see setMaxWorkspaceSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("std::size_t") @NoException(true) long getMaxWorkspaceSize();
+
+    /**
+     *  \brief Set the build mode flags to turn on builder options for this network.
+     * 
+     *  The flags are listed in the BuilderFlags enum.
+     *  The flags set configuration options to build the network.
+     * 
+     *  @param builderFlags The build option for an engine.
+     * 
+     *  \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+     * 
+     *  @see getFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setFlags(@Cast("nvinfer1::BuilderFlags") int builderFlags);
+
+    /**
+     *  \brief Get the build mode flags for this builder config. Defaults to 0.
+     * 
+     *  @return The build options as a bitmask.
+     * 
+     *  @see setFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("nvinfer1::BuilderFlags") @NoException(true) int getFlags();
+
+    /**
+     *  \brief clear a single build mode flag.
+     * 
+     *  clears the builder mode flag from the enabled flags.
+     * 
+     *  @see setFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void clearFlag(BuilderFlag builderFlag);
+    public native @NoException(true) void clearFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+
+    /**
+     *  \brief Set a single build mode flag.
+     * 
+     *  Add the input builder mode flag to the already enabled flags.
+     * 
+     *  @see setFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setFlag(BuilderFlag builderFlag);
+    public native @NoException(true) void setFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+
+    /**
+     *  \brief Returns true if the build mode flag is set
+     * 
+     *  @see getFlags()
+     * 
+     *  @return True if flag is set, false if unset.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getFlag(BuilderFlag builderFlag);
+    public native @Cast("bool") @NoException(true) boolean getFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+
+    /**
+     *  \brief Set the device that this layer must execute on.
+     *  @param deviceType that this layer must execute on.
+     *  If DeviceType is not set or is reset, TensorRT will use the default DeviceType set in the builder.
+     * 
+     *  \note The device type for a layer must be compatible with the safety flow (if specified).
+     *  For example a layer cannot be marked for DLA execution while the builder is configured for kSAFETY.
+     * 
+     *  @see getDeviceType()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setDeviceType(@Const ILayer layer, DeviceType deviceType);
+    public native @NoException(true) void setDeviceType(@Const ILayer layer, @Cast("nvinfer1::DeviceType") int deviceType);
+
+    /**
+     *  \brief Get the device that this layer executes on.
+     *  @return Returns DeviceType of the layer.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) DeviceType getDeviceType(@Const ILayer layer);
+
+    /**
+     *  \brief whether the DeviceType has been explicitly set for this layer
+     *  @return true if device type is not default
+     *  @see setDeviceType() getDeviceType() resetDeviceType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isDeviceTypeSet(@Const ILayer layer);
+
+    /**
+     *  \brief reset the DeviceType for this layer
+     * 
+     *  @see setDeviceType() getDeviceType() isDeviceTypeSet()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void resetDeviceType(@Const ILayer layer);
+
+    /**
+     *  \brief Checks if a layer can run on DLA.
+     *  @return status true if the layer can on DLA else returns false.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean canRunOnDLA(@Const ILayer layer);
+
+    /**
+     *  \brief Sets the DLA core used by the network.
+     *  @param dlaCore The DLA core to execute the engine on (0 to N-1). Default value is 0.
+     * 
+     *  It can be used to specify which DLA core to use via indexing, if multiple DLA cores are available.
+     * 
+     *  @see IRuntime::setDLACore() getDLACore()
+     * 
+     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDLACore(int dlaCore);
+
+    /**
+     *  \brief Get the DLA core that the engine executes on.
+     *  @return If setDLACore is called, returns DLA core from 0 to N-1, else returns 0.
+     * 
+     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getDLACore();
+
+    /**
+     *  \brief Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on
+     *  this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer.
+     *  @see getDefaultDeviceType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDefaultDeviceType(DeviceType deviceType);
+    public native @NoException(true) void setDefaultDeviceType(@Cast("nvinfer1::DeviceType") int deviceType);
+
+    /**
+     *  \brief Get the default DeviceType which was set by setDefaultDeviceType.
+     * 
+     *  By default it returns DeviceType::kGPU.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) DeviceType getDefaultDeviceType();
+
+    /**
+     *  \brief Resets the builder configuration to defaults.
+     * 
+     *  When initializing a builder config object, we can call this function.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void reset();
+
+    /**
+     *  \brief De-allocates any internally allocated memory.
+     * 
+     *  When destroying a builder config object, we can call this function.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Set the cuda stream that is used to profile this network.
+     * 
+     *  @param stream The cuda stream used for profiling by the builder.
+     * 
+     *  @see getProfileStream()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setProfileStream(CUstream_st stream);
+
+    /**
+     *  \brief Get the cuda stream that is used to profile this network.
+     * 
+     *  @return The cuda stream set by setProfileStream, nullptr if setProfileStream has not been called.
+     * 
+     *  @see setProfileStream()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) CUstream_st getProfileStream();
+
+    /**
+     *  \brief Add an optimization profile.
+     * 
+     *  This function must be called at least once if the network has dynamic or shape input tensors.
+     *  This function may be called at most once when building a refittable engine, as more than
+     *  a single optimization profile are not supported for refittable engines.
+     * 
+     *  @param profile The new optimization profile, which must satisfy profile->isValid() == true
+     *  @return The index of the optimization profile (starting from 0) if the input is valid, or -1 if the input is
+     *          not valid.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int addOptimizationProfile(@Const IOptimizationProfile profile);
+
+    /**
+     *  \brief Get number of optimization profiles.
+     * 
+     *  This is one higher than the index of the last optimization profile that has be defined (or
+     *  zero, if none has been defined yet).
+     * 
+     *  @return The number of the optimization profiles.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOptimizationProfiles();
+
+    /**
+     *  \brief Set verbosity level of layer information exposed in NVTX annotations.
+     * 
+     *  Control how much layer information will be exposed in NVTX annotations.
+     * 
+     *  @see ProfilingVerbosity, getProfilingVerbosity()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setProfilingVerbosity(ProfilingVerbosity verbosity);
+    public native @NoException(true) void setProfilingVerbosity(@Cast("nvinfer1::ProfilingVerbosity") int verbosity);
+
+    /**
+     *  \brief Get verbosity level of layer information exposed in NVTX annotations.
+     * 
+     *  Get the current setting of verbosity level of layer information exposed in
+     *  NVTX annotations. Default value is ProfilingVerbosity::kDEFAULT.
+     * 
+     *  @see ProfilingVerbosity, setProfilingVerbosity()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) ProfilingVerbosity getProfilingVerbosity();
+
+    /**
+     *  \brief Set Algorithm Selector.
+     * 
+     *  @param selector The algorithm selector to be set in the build config. */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setAlgorithmSelector(IAlgorithmSelector selector);
+
+    /**
+     *  \brief Get Algorithm Selector.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IAlgorithmSelector getAlgorithmSelector();
+
+    /**
+     *  \brief Add a calibration profile.
+     * 
+     *  Calibration optimization profile must be set if int8 calibration is used to set scales for a network with
+     *  runtime dimensions.
+     * 
+     *  @param profile The new calibration profile, which must satisfy profile->isValid() == true or be nullptr.
+     *  MIN and MAX values will be overwritten by kOPT.
+     *  @return True if the calibration profile was set correctly.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setCalibrationProfile(@Const IOptimizationProfile profile);
+
+    /**
+     *  \brief Get the current calibration profile.
+     * 
+     *  @return A pointer to the current calibration profile or nullptr if calibration profile is unset.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Const @NoException(true) IOptimizationProfile getCalibrationProfile();
+
+    /**
+     *  \brief Set the quantization flags.
+     * 
+     *  The flags are listed in the QuantizationFlag enum.
+     *  The flags set configuration options to quantize the network in int8.
+     * 
+     *  @param flags The quantization flags.
+     * 
+     *  \note This function will override the previous set flags, rather than bitwise ORing the new flag.
+     * 
+     *  @see getQuantizationFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setQuantizationFlags(@Cast("nvinfer1::QuantizationFlags") int flags);
+
+    /**
+     *  \brief Get the quantization flags.
+     * 
+     *  @return The quantization flags as a bitmask.
+     * 
+     *  @see setQuantizationFlag()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("nvinfer1::QuantizationFlags") @NoException(true) int getQuantizationFlags();
+
+    /**
+     *  \brief clear a quantization flag.
+     * 
+     *  Clears the quantization flag from the enabled quantization flags.
+     * 
+     *  @see setQuantizationFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void clearQuantizationFlag(QuantizationFlag flag);
+    public native @NoException(true) void clearQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+
+    /**
+     *  \brief Set a single quantization flag.
+     * 
+     *  Add the input quantization flag to the already enabled quantization flags.
+     * 
+     *  @see setQuantizationFlags()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setQuantizationFlag(QuantizationFlag flag);
+    public native @NoException(true) void setQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+
+    /**
+     *  \brief Returns true if the quantization flag is set.
+     * 
+     *  @see getQuantizationFlags()
+     * 
+     *  @return True if quantization flag is set, false if unset.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(QuantizationFlag flag);
+    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+
+    /**
+     *  \brief Set tactic sources.
+     * 
+     *  This bitset controls which tactic sources TensorRT is allowed to use for tactic
+     *  selection.
+     * 
+     *  By default, kCUBLAS and kCUDNN are always enabled. kCUBLAS_LT is enabled for x86
+     *  platforms as well as non-x86 platforms when CUDA >= 11.0.
+     * 
+     *  Multiple tactic sources may be combined with a bitwise OR operation. For example,
+     *  to enable cublas and cublasLt as tactic sources, use a value of:
+     * 
+     *  1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U <<
+     *  static_cast<uint32_t>(TacticSource::kCUBLAS_LT)
+     * 
+     *  @see getTacticSources
+     * 
+     *  @return true if the tactic sources in the build configuration were updated.
+     *          The tactic sources in the build configuration will not be updated if the provided value is invalid.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setTacticSources(@Cast("nvinfer1::TacticSources") int tacticSources);
+
+    /**
+     *  \brief Get tactic sources.
+     * 
+     *  Get the tactic sources currently set in the engine build
+     *  configuration.
+     * 
+     *  @see setTacticSources
+     * 
+     *  @return tactic sources
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
+
+    /**
+     *  \brief Create timing cache
+     * 
+     *  Create ITimingCache instance from serialized raw data. The created timing cache doesn’t belong to
+     *  a specific IBuilderConfig. It can be shared by multiple builder instances. Call setTimingCache()
+     *  before launching a builder to attach cache to builder instance.
+     * 
+     *  @param blob A pointer to the raw data that contains serialized timing cache
+     *  @param size The size in bytes of the serialized timing cache. Size 0 means create a new cache from scratch
+     * 
+     *  @see setTimingCache
+     * 
+     *  @return the pointer to ITimingCache created
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITimingCache createTimingCache(@Const Pointer blob, @Cast("std::size_t") long size);
+
+    /**
+     *  \brief Attach a timing cache to IBuilderConfig
+     * 
+     *  The timing cache has verification header to make sure the provided cache can be used in current environment.
+     *  A failure will be reported if the CUDA device property in the provided cache is different from current
+     *  environment. ignoreMismatch = true skips strict verification and allows loading cache created from a different
+     *  device.
+     * 
+     *  The cache must not be destroyed until after the engine is built.
+     * 
+     *  @param cache the timing cache to be used
+     *  @param ignoreMismatch whether or not allow using a cache that contains different CUDA device property
+     * 
+     *  @return true if set successfully, false otherwise
+     * 
+     *  \warning Using cache generated from devices with different CUDA device properties may lead to
+     *           functional/performance bugs.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setTimingCache(@Const @ByRef ITimingCache cache, @Cast("bool") boolean ignoreMismatch);
+
+    /**
+     *  \brief Get the pointer to the timing cache from current IBuilderConfig
+     * 
+     *  @return pointer to the timing cache used in current IBuilderConfig
+     *  */
+    public native @Const @NoException(true) ITimingCache getTimingCache();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
new file mode 100644
index 00000000000..f578ba089af
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
@@ -0,0 +1,63 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IConcatenationLayer
+ * 
+ *  \brief A concatenation layer in a network definition.
+ * 
+ *  The output dimension along the concatenation axis is the sum of the corresponding input dimensions.
+ *  Every other output dimension is the same as the corresponding dimension of the inputs.
+ * 
+ *  \warning All tensors must have the same dimensions except along the concatenation axis.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IConcatenationLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IConcatenationLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the axis along which concatenation occurs.
+     * 
+     *  0 is the major axis (excluding the batch dimension). The default is the number of non-batch axes in the tensor
+     *  minus three (e.g. for an NCHW input it would be 0), or 0 if there are fewer than 3 non-batch axes.
+     * 
+     *  When running this layer on the DLA, only concat across the Channel axis is valid.
+     * 
+     *  @param axis The axis along which concatenation occurs.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAxis(int axis);
+
+    /**
+     *  \brief Get the axis along which concatenation occurs.
+     * 
+     *  @see setAxis()
+     *  */
+    public native @NoException(true) int getAxis();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
new file mode 100644
index 00000000000..e4da5b8331c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
@@ -0,0 +1,87 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IConstantLayer
+ * 
+ *  \brief Layer that represents a constant value.
+ *  \note This layer does not support boolean types.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IConstantLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IConstantLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the weights for the layer.
+     * 
+     *  If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
+     *  Otherwise the output is a tensor of real values and the output type will be
+     *  follow TensorRT's normal precision rules.
+     * 
+     *  @see getWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the weights for the layer.
+     * 
+     *  @see setWeights
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getWeights();
+
+    /**
+     *  \brief Set the dimensions for the layer.
+     * 
+     *  @param dimensions The dimensions of the layer
+     * 
+     *  @see setDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the dimensions for the layer.
+     * 
+     *  @return the dimensions for the layer
+     * 
+     *  @see getDimensions
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
new file mode 100644
index 00000000000..fae11cf4fae
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
@@ -0,0 +1,556 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class IConvolutionLayer
+ * 
+ *  \brief A convolution layer in a network definition.
+ * 
+ *  This layer performs a correlation operation between 3-dimensional filter with a 4-dimensional tensor to produce
+ *  another 4-dimensional tensor.
+ * 
+ *  An optional bias argument is supported, which adds a per-channel constant to each value in the output.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IConvolutionLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IConvolutionLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the HW kernel size of the convolution.
+     * 
+     *  If executing this layer on DLA, both height and width of kernel size must be in the range [1,32].
+     * 
+     *  @see getKernelSize()
+     * 
+     *  @deprecated Superseded by setKernelSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
+
+    /**
+     *  \brief Get the HW kernel size of the convolution.
+     * 
+     *  @see setKernelSize()
+     * 
+     *  @deprecated Superseded by getKernelSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getKernelSize();
+
+    /**
+     *  \brief Set the number of output maps for the convolution.
+     * 
+     *  If executing this layer on DLA, the number of output maps must be in the range [1,8192].
+     * 
+     *  @see getNbOutputMaps()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
+
+    /**
+     *  \brief Get the number of output maps for the convolution.
+     * 
+     *  @see setNbOutputMaps()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputMaps();
+
+    /**
+     *  \brief Get the stride of the convolution.
+     * 
+     *  Default: (1,1)
+     * 
+     *  If executing this layer on DLA, both height and width of stride must be in the range [1,8].
+     * 
+     *  @see getStride()
+     * 
+     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
+
+    /**
+     *  \brief Get the stride of the convolution.
+     * 
+     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
+
+    /**
+     *  \brief Set the padding of the convolution.
+     * 
+     *  The input will be zero-padded by this number of elements in the height and width directions.
+     *  Padding is symmetric.
+     * 
+     *  Default: (0,0)
+     * 
+     *  If executing this layer on DLA, both height and width of padding must be in the range [0,31],
+     *  and the padding size must be less than the kernel size.
+     * 
+     *  @see getPadding()
+     * 
+     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
+
+    /**
+     *  \brief Get the padding of the convolution. If the padding is asymmetric, the pre-padding is returned.
+     * 
+     *  @see setPadding()
+     * 
+     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
+
+    /**
+     *  \brief Set the number of groups for a convolution.
+     * 
+     *  The input tensor channels are  divided into \p nbGroups groups, and a convolution is executed for each group,
+     *  using a filter per group. The results of the group convolutions are concatenated to form the output.
+     * 
+     *  \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group
+     *  count) must be a multiple of 4 for both input and output.
+     * 
+     *  Default: 1
+     * 
+     *  If executing this layer on DLA, the max number of groups is 8192.
+     * 
+     *  @see getNbGroups()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbGroups(int nbGroups);
+
+    /**
+     *  \brief Get the number of groups of the convolution.
+     * 
+     *  @see setNbGroups()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbGroups();
+
+    /**
+     *  \brief Set the kernel weights for the convolution.
+     * 
+     *  The weights are specified as a contiguous array in \p GKCRS order, where \p G is the number of groups, \p K
+     *  the number of output feature maps, \p C the number of input channels, and \p R and \p S are the height and
+     *  width of the filter.
+     * 
+     *  @see getKernelWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the kernel weights of the convolution.
+     * 
+     *  @see setKernelWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+
+    /**
+     *  \brief Set the bias weights for the convolution.
+     * 
+     *  Bias is optional. To omit bias, set the count value of the weights structure to zero.
+     * 
+     *  The bias is applied per-channel, so the number of weights (if non-zero) must be equal to the number of output
+     *  feature maps.
+     * 
+     *  @see getBiasWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the bias weights for the convolution.
+     * 
+     *  @see setBiasWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+
+    /**
+     *  \brief Set the dilation for a convolution.
+     * 
+     *  Default: (1,1)
+     * 
+     *  If executing this layer on DLA, both height and width must be in the range [1,32].
+     * 
+     *  @see getDilation()
+     * 
+     *  @deprecated Superseded by setDilationNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setDilation(@ByVal DimsHW dilation);
+
+    /**
+     *  \brief Get the dilation for a convolution.
+     * 
+     *  @see setDilation()
+     * 
+     *  @deprecated Superseded by getDilationNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getDilation();
+
+    /**
+     *  \brief Set the multi-dimension pre-padding of the convolution.
+     * 
+     *  The start of the input will be zero-padded by this number of elements in each dimension.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,31], and the padding must be less than the kernel size.
+     * 
+     *  @see getPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the pre-padding.
+     * 
+     *  @see setPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+
+    /**
+     *  \brief Set the multi-dimension post-padding of the convolution.
+     * 
+     *  The end of the input will be zero-padded by this number of elements in each dimension.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,31], and the padding must be less than the kernel size.
+     * 
+     *  @see getPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the post-padding.
+     * 
+     *  @see setPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+
+    /**
+     *  \brief Set the padding mode.
+     * 
+     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see getPaddingMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+
+    /**
+     *  \brief Get the padding mode.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see setPaddingMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) PaddingMode getPaddingMode();
+
+    /**
+     *  \brief Set the multi-dimension kernel size of the convolution.
+     * 
+     *  If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in the
+     *  range [1,32].
+     * 
+     *  @see getKernelSizeNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
+
+    /**
+     *  \brief Get the multi-dimension kernel size of the convolution.
+     * 
+     *  @see setKernelSizeNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
+
+    /**
+     *  \brief Set the multi-dimension stride of the convolution.
+     * 
+     *  Default: (1, 1, ..., 1)
+     * 
+     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
+     *  [1,8].
+     * 
+     *  @see getStrideNd() setStride() getStride()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+
+    /**
+     *  \brief Get the multi-dimension stride of the convolution.
+     * 
+     *  @see setStrideNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+
+    /**
+     *  \brief Set the multi-dimension padding of the convolution.
+     * 
+     *  The input will be zero-padded by this number of elements in each dimension.
+     *  Padding is symmetric.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,31], and the padding must be less than the kernel size.
+     * 
+     *  @see getPaddingNd() setPadding() getPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the multi-dimension padding of the convolution.
+     * 
+     *  If the padding is asymmetric, the pre-padding is returned.
+     * 
+     *  @see setPaddingNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+
+    /**
+     *  \brief Set the multi-dimension dilation of the convolution.
+     * 
+     *  Default: (1, 1, ..., 1)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width must be in the range [1,32].
+     * 
+     *  @see getDilation()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
+
+    /**
+     *  \brief Get the multi-dimension dilation of the convolution.
+     * 
+     *  @see setDilation()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor
+     * 
+     *  For a IConvolutionLayer, only index 0 is valid unless explicit precision mode is enabled.
+     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
+     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
+     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
+     *  via appropriate set call.
+     * 
+     *  The indices are as follows:
+     * 
+     *  - 0: The input activation tensor.
+     *  - 1: The kernel weights tensor (a constant tensor).
+     * 
+     *  If this function is called with a value greater than 0, then the function getNbInputs() changes */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
new file mode 100644
index 00000000000..1a7c8ea978a
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
@@ -0,0 +1,651 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ICudaEngine
+ * 
+ *  \brief An engine for executing inference on a built network, with functionally unsafe features.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ICudaEngine extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public ICudaEngine() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ICudaEngine(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ICudaEngine(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public ICudaEngine position(long position) {
+        return (ICudaEngine)super.position(position);
+    }
+    @Override public ICudaEngine getPointer(long i) {
+        return new ICudaEngine((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Get the number of binding indices.
+     * 
+     *  There are separate binding indices for each optimization profile.
+     *  This method returns the total over all profiles.
+     *  If the engine has been built for K profiles, the first getNbBindings() / K bindings are used by profile
+     *  number 0, the following getNbBindings() / K bindings are used by profile number 1 etc.
+     * 
+     *  @see getBindingIndex();
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbBindings();
+
+    /**
+     *  \brief Retrieve the binding index for a named tensor.
+     * 
+     *  IExecutionContext::enqueue() and IExecutionContext::execute() require an array of buffers.
+     * 
+     *  Engine bindings map from tensor names to indices in this array.
+     *  Binding indices are assigned at engine build time, and take values in the range [0 ... n-1] where n is the total
+     *  number of inputs and outputs.
+     * 
+     *  To get the binding index of the name in an optimization profile with index k > 0,
+     *  mangle the name by appending " [profile k]", as described for method getBindingName().
+     * 
+     *  @param name The tensor name.
+     *  @return The binding index for the named tensor, or -1 if the name is not found.
+     * 
+     *  @see getNbBindings() getBindingName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getBindingIndex(String name);
+    public native @NoException(true) int getBindingIndex(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Retrieve the name corresponding to a binding index.
+     * 
+     *  This is the reverse mapping to that provided by getBindingIndex().
+     * 
+     *  For optimization profiles with an index k > 0, the name is mangled by appending
+     *  " [profile k]", with k written in decimal.  For example, if the tensor in the
+     *  INetworkDefinition had the name "foo", and bindingIndex refers to that tensor in the
+     *  optimization profile with index 3, getBindingName returns "foo [profile 3]".
+     * 
+     *  @param bindingIndex The binding index.
+     *  @return The name corresponding to the index, or nullptr if the index is out of range.
+     * 
+     *  @see getBindingIndex()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getBindingName(int bindingIndex);
+
+    /**
+     *  \brief Determine whether a binding is an input binding.
+     * 
+     *  @param bindingIndex The binding index.
+     *  @return True if the index corresponds to an input binding and the index is in range.
+     * 
+     *  @see getBindingIndex()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean bindingIsInput(int bindingIndex);
+
+    /**
+     *  \brief Get the dimensions of a binding.
+     * 
+     *  @param bindingIndex The binding index.
+     *  @return The dimensions of the binding if the index is in range, otherwise Dims().
+     *          Has -1 for any dimension that varies within the optimization profile.
+     * 
+     *  For example, suppose an INetworkDefinition has an input with shape [-1,-1]
+     *  that becomes a binding b in the engine.  If the associated optimization profile
+     *  specifies that b has minimum dimensions as [6,9] and maximum dimensions [7,9],
+     *  getBindingDimensions(b) returns [-1,9], despite the second dimension being
+     *  dynamic in the INetworkDefinition.
+     * 
+     *  Because each optimization profile has separate bindings, the returned value can
+     *  differ across profiles. Consider another binding b' for the same network input,
+     *  but for another optimization profile.  If that other profile specifies minimum
+     *  dimensions [5,8] and maximum dimensions [5,9], getBindingDimensions(b') returns [5,-1].
+     * 
+     *  @see getBindingIndex()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
+
+    /**
+     *  \brief Determine the required data type for a buffer from its binding index.
+     * 
+     *  @param bindingIndex The binding index.
+     *  @return The type of the data in the buffer.
+     * 
+     *  @see getBindingIndex()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getBindingDataType(int bindingIndex);
+
+    /**
+     *  \brief Get the maximum batch size which can be used for inference.
+     * 
+     *  For an engine built from an INetworkDefinition without an implicit batch dimension, this will always return 1.
+     * 
+     *  @return The maximum batch size for this engine.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getMaxBatchSize();
+
+    /**
+     *  \brief Get the number of layers in the network.
+     * 
+     *  The number of layers in the network is not necessarily the number in the original network definition, as layers
+     *  may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer
+     *  tables, such as when aggregating profiling data over a number of executions.
+     * 
+     *  @return The number of layers in the network.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbLayers();
+
+    /**
+     *  \brief Serialize the network to a stream.
+     * 
+     *  @return A IHostMemory object that contains the serialized engine.
+     * 
+     *  The network may be deserialized with IRuntime::deserializeCudaEngine().
+     * 
+     *  @see IRuntime::deserializeCudaEngine()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IHostMemory serialize();
+
+    /**
+     *  \brief Create an execution context.
+     * 
+     *  If the engine supports dynamic shapes, each execution context in concurrent use must use a separate optimization
+     *  profile. The first execution context created will call setOptimizationProfile(0) implicitly. For other execution
+     *  contexts, setOptimizationProfile() must be called with unique profile index before calling execute or enqueue.
+     *  If an error recorder has been set for the engine, it will also be passed to the execution context.
+     * 
+     *  @see IExecutionContext.
+     *  @see IExecutionContext::setOptimizationProfile()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IExecutionContext createExecutionContext();
+
+    /**
+     *  \brief Destroy this object;
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Get location of binding
+     * 
+     *  This lets you know whether the binding should be a pointer to device or host memory.
+     * 
+     *  @see ITensor::setLocation() ITensor::getLocation()
+     * 
+     *  @param bindingIndex The binding index.
+     *  @return The location of the bound tensor with given index.
+     *  */
+    
+    //!
+    //!
+    public native @NoException(true) TensorLocation getLocation(int bindingIndex);
+
+    /** \brief create an execution context without any device memory allocated
+     * 
+     *  The memory for execution of this device context must be supplied by the application.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) IExecutionContext createExecutionContextWithoutDeviceMemory();
+
+    /**
+     *  \brief Return the amount of device memory required by an execution context.
+     * 
+     *  @see IExecutionContext::setDeviceMemory()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("size_t") @NoException(true) long getDeviceMemorySize();
+
+    /**
+     *  \brief Return true if an engine can be refit.
+     * 
+     *  @see nvinfer1::createInferRefitter()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isRefittable();
+
+    /**
+     *  \brief Return the number of bytes per component of an element.
+     * 
+     *  The vector component size is returned if getBindingVectorizedDim() != -1.
+     * 
+     *  @param bindingIndex The binding Index.
+     * 
+     *  @see ICudaEngine::getBindingVectorizedDim()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getBindingBytesPerComponent(int bindingIndex);
+
+    /**
+     *  \brief Return the number of components included in one element.
+     * 
+     *  The number of elements in the vectors is returned if getBindingVectorizedDim() != -1.
+     * 
+     *  @param bindingIndex The binding Index.
+     * 
+     *  @see ICudaEngine::getBindingVectorizedDim()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getBindingComponentsPerElement(int bindingIndex);
+
+    /**
+     *  \brief Return the binding format.
+     * 
+     *  @param bindingIndex The binding Index.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) TensorFormat getBindingFormat(int bindingIndex);
+
+    /**
+     *  \brief Return the human readable description of the tensor format.
+     * 
+     *  The description includes the order, vectorization, data type, strides,
+     *  and etc. Examples are shown as follows:
+     *    Example 1: kCHW + FP32
+     *      "Row major linear FP32 format"
+     *    Example 2: kCHW2 + FP16
+     *      "Two wide channel vectorized row major FP16 format"
+     *    Example 3: kHWC8 + FP16 + Line Stride = 32
+     *      "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
+     * 
+     *  @param bindingIndex The binding Index.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getBindingFormatDesc(int bindingIndex);
+
+    /**
+     *  \brief Return the dimension index that the buffer is vectorized.
+     * 
+     *  Specifically -1 is returned if scalars per vector is 1.
+     * 
+     *  @param bindingIndex The binding Index.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getBindingVectorizedDim(int bindingIndex);
+
+    /**
+     *  \brief Returns the name of the network associated with the engine.
+     * 
+     *  The name is set during network creation and is retrieved after
+     *  building or deserialization.
+     * 
+     *  @see INetworkDefinition::setName(), INetworkDefinition::getName()
+     * 
+     *  @return A zero delimited C-style string representing the name of the network.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Get the number of optimization profiles defined for this engine.
+     * 
+     *  @return Number of optimization profiles. It is always at least 1.
+     * 
+     *  @see IExecutionContext::setOptimizationProfile() */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOptimizationProfiles();
+
+    /**
+     *  \brief Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile.
+     * 
+     *  @param bindingIndex The binding index, which must belong to the given profile,
+     *         or be between 0 and bindingsPerProfile-1 as described below.
+     * 
+     *  @param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
+     * 
+     *  @param select Whether to query the minimum, optimum, or maximum dimensions for this binding.
+     * 
+     *  @return The minimum / optimum / maximum dimensions for this binding in this profile.
+     *          If the profileIndex or bindingIndex are invalid, return Dims with nbDims=-1.
+     * 
+     *  For backwards compatibility with earlier versions of TensorRT, if the bindingIndex
+     *  does not belong to the current optimization profile, but is between 0 and bindingsPerProfile-1,
+     *  where bindingsPerProfile = getNbBindings()/getNbOptimizationProfiles,
+     *  then a corrected bindingIndex is used instead, computed by:
+     * 
+     *      profileIndex * bindingsPerProfile + bindingIndex % bindingsPerProfile
+     * 
+     *  Otherwise the bindingIndex is considered invalid.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, @Cast("nvinfer1::OptProfileSelector") int select);
+
+    /**
+     *  \brief Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
+     * 
+     *  @param profileIndex The profile index (must be between 0 and getNbOptimizationProfiles()-1)
+     * 
+     *  @param inputIndex The input index (must be between 0 and getNbBindings() - 1)
+     * 
+     *  @param select Whether to query the minimum, optimum, or maximum shape values for this binding.
+     * 
+     *  @return If the binding is an input shape binding, return a pointer to an array that has
+     *          the same number of elements as the corresponding tensor, i.e. 1 if dims.nbDims == 0, or dims.d[0]
+     *          if dims.nbDims == 1, where dims = getBindingDimensions(inputIndex). The array contains
+     *          the elementwise minimum / optimum / maximum values for this shape binding under the profile.
+     *          If either of the indices is out of range, or if the binding is not an input shape binding, return
+     *          nullptr.
+     * 
+     *  For backwards compatibility with earlier versions of TensorRT, a bindingIndex that does not belong
+     *  to the profile is corrected as described for getProfileDimensions.
+     * 
+     *  @see ICudaEngine::getProfileDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Const @NoException(true) IntPointer getProfileShapeValues(int profileIndex, int inputIndex, OptProfileSelector select);
+    public native @Const @NoException(true) IntBuffer getProfileShapeValues(int profileIndex, int inputIndex, @Cast("nvinfer1::OptProfileSelector") int select);
+
+    /**
+     *  \brief True if tensor is required as input for shape calculations or output from them.
+     * 
+     *  TensorRT evaluates a network in two phases:
+     * 
+     *  1. Compute shape information required to determine memory allocation requirements
+     *     and validate that runtime sizes make sense.
+     * 
+     *  2. Process tensors on the device.
+     * 
+     *  Some tensors are required in phase 1.  These tensors are called "shape tensors", and always
+     *  have type Int32 and no more than one dimension.  These tensors are not always shapes
+     *  themselves, but might be used to calculate tensor shapes for phase 2.
+     * 
+     *  isShapeBinding(i) returns true if the tensor is a required input or an output computed in phase 1.
+     *  isExecutionBinding(i) returns true if the tensor is a required input or an output computed in phase 2.
+     * 
+     *  For example, if a network uses an input tensor with binding i as an addend
+     *  to an IElementWiseLayer that computes the "reshape dimensions" for IShuffleLayer,
+     *  then isShapeBinding(i) == true.
+     * 
+     *  It's possible to have a tensor be required by both phases.  For instance, a tensor
+     *  can be used for the "reshape dimensions" and as the indices for an IGatherLayer
+     *  collecting floating-point data.
+     * 
+     *  It's also possible to have a tensor be required by neither phase, but nonetheless
+     *  shows up in the engine's inputs.  For example, if an input tensor is used only
+     *  as an input to IShapeLayer, only its shape matters and its values are irrelevant.
+     * 
+     *  @see isExecutionBinding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isShapeBinding(int bindingIndex);
+
+    /**
+     *  \brief True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
+     * 
+     *  For example, if a network uses an input tensor with binding i ONLY as the "reshape dimensions"
+     *  input of IShuffleLayer, then isExecutionBinding(i) is false, and a nullptr can be
+     *  supplied for it when calling IExecutionContext::execute or IExecutionContext::enqueue.
+     * 
+     *  @see isShapeBinding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isExecutionBinding(int bindingIndex);
+
+    /**
+     *  \brief Determine what execution capability this engine has.
+     * 
+     *  If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid.
+     *  If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid.
+     *  If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions are
+     *  valid.
+     * 
+     *  @return The EngineCapability flag that the engine was built for.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) EngineCapability getEngineCapability();
+
+    /** \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief Get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+     *  an error handler has not been set.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Query whether the engine was built with an implicit batch dimension.
+     * 
+     *  @return True if tensors have implicit batch dimension, false otherwise.
+     * 
+     *  This is an engine-wide property.  Either all tensors in the engine
+     *  have an implicit batch dimension or none of them do.
+     * 
+     *  hasImplicitBatchDimension() is true if and only if the INetworkDefinition
+     *  from which this engine was built was created with createNetwork() or
+     *  createNetworkV2() without NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
+     * 
+     *  @see createNetworkV2
+     *  */
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
+
+    /** \brief return the tactic sources required by this engine
+     * 
+     *  @see IBuilderConfig::setTacticSources()
+     *  */
+    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
new file mode 100644
index 00000000000..3f7e3a76abb
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
@@ -0,0 +1,517 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IDeconvolutionLayer
+ * 
+ *  \brief A deconvolution layer in a network definition.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IDeconvolutionLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IDeconvolutionLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the HW kernel size of the convolution.
+     * 
+     *  If executing this layer on DLA, both height and width of kernel size must be in the range [1,32], or the
+     *  combinations of [64, 96, 128] in one dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid,
+     *  but not [64x64].
+     * 
+     *  @see getKernelSize()
+     * 
+     *  @deprecated Superseded by setKernelSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
+
+    /**
+     *  \brief Get the HW kernel size of the deconvolution.
+     * 
+     *  @see setKernelSize()
+     * 
+     *  @deprecated Superseded by getKernelSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getKernelSize();
+
+    /**
+     *  \brief Set the number of output feature maps for the deconvolution.
+     * 
+     *  If executing this layer on DLA, the number of output maps must be in the range [1,8192].
+     * 
+     *  @see getNbOutputMaps()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
+
+    /**
+     *  \brief Get the number of output feature maps for the deconvolution.
+     * 
+     *  @see setNbOutputMaps()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputMaps();
+
+    /**
+     *  \brief Get the stride of the deconvolution.
+     * 
+     *  If executing this layer on DLA, both height and width of stride must be in the range [1,32] or the combinations
+     *  of [64, 96, 128] in one dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not
+     *  [64x64].
+     * 
+     *  @see setStride()
+     * 
+     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
+
+    /**
+     *  \brief Get the stride of the deconvolution.
+     * 
+     *  Default: (1,1)
+     * 
+     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
+
+    /**
+     *  \brief Set the padding of the deconvolution.
+     * 
+     *  The output will be trimmed by this number of elements on each side in the height and width directions.
+     *  In other words, it resembles the inverse of a convolution layer with this padding size.
+     *  Padding is symmetric, and negative padding is not supported.
+     * 
+     *  Default: (0,0)
+     * 
+     *  If executing this layer on DLA, both height and width of padding must be 0.
+     * 
+     *  @see getPadding()
+     * 
+     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
+
+    /**
+     *  \brief Get the padding of the deconvolution.
+     * 
+     *  Default: (0, 0)
+     * 
+     *  @see setPadding()
+     * 
+     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
+
+    /**
+     *  \brief Set the number of groups for a deconvolution.
+     * 
+     *  The input tensor channels are divided into \p nbGroups groups, and a deconvolution is executed for each group,
+     *  using a filter per group. The results of the group convolutions are concatenated to form the output.
+     * 
+     *  If executing this layer on DLA, nbGroups must be one
+     * 
+     *  \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group count)
+     *  must be a multiple of 4 for both input and output.
+     * 
+     *  Default: 1
+     * 
+     *  @see getNbGroups()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbGroups(int nbGroups);
+
+    /**
+     *  \brief Get the number of groups for a deconvolution.
+     * 
+     *  @see setNbGroups()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbGroups();
+
+    /**
+     *  \brief Set the kernel weights for the deconvolution.
+     * 
+     *  The weights are specified as a contiguous array in \p CKRS order, where \p C the number of
+     *  input channels, \p K the number of output feature maps, and \p R and \p S are the height and width
+     *  of the filter.
+     * 
+     *  @see getWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the kernel weights for the deconvolution.
+     * 
+     *  @see setNbGroups()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+
+    /**
+     *  \brief Set the bias weights for the deconvolution.
+     * 
+     *  Bias is optional. To omit bias, set the count value of the weights structure to zero.
+     * 
+     *  The bias is applied per-feature-map, so the number of weights (if non-zero) must be equal to the number of
+     *  output feature maps.
+     * 
+     *  @see getBiasWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the bias weights for the deconvolution.
+     * 
+     *  @see getBiasWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+
+    /**
+     *  \brief Set the multi-dimension pre-padding of the deconvolution.
+     * 
+     *  The output will be trimmed by this number of elements on the start of every dimension.
+     *  In other words, it resembles the inverse of a convolution layer with this padding size.
+     *  Negative padding is not supported.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, padding must be 0.
+     * 
+     *  @see getPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the pre-padding.
+     * 
+     *  @see setPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+
+    /**
+     *  \brief Set the multi-dimension post-padding of the deconvolution.
+     * 
+     *  The output will be trimmed by this number of elements on the end of every dimension.
+     *  In other words, it resembles the inverse of a convolution layer with this padding size.
+     *  Negative padding is not supported.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, padding must be 0.
+     * 
+     *  @see getPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the padding.
+     * 
+     *  @see setPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+
+    /**
+     *  \brief Set the padding mode.
+     * 
+     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see getPaddingMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+
+    /**
+     *  \brief Get the padding mode.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see setPaddingMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) PaddingMode getPaddingMode();
+
+    /**
+     *  \brief Set the multi-dimension kernel size of the deconvolution.
+     * 
+     *  If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in
+     *  the range [1-32].
+     * 
+     *  @see getKernelSizeNd() setKernelSize() getKernelSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
+
+    /**
+     *  \brief Get the multi-dimension kernel size of the deconvolution.
+     * 
+     *  @see setKernelSizeNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
+
+    /**
+     *  \brief Set the multi-dimension stride of the deconvolution.
+     * 
+     *  Default: (1, 1, ..., 1)
+     * 
+     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
+     *  [1-32].
+     * 
+     *  @see getStrideNd() setStride() getStride()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+
+    /**
+     *  \brief Get the multi-dimension stride of the deconvolution.
+     * 
+     *  @see setStrideNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+
+    /**
+     *  \brief Set the multi-dimension padding of the deconvolution.
+     * 
+     *  The output will be trimmed by this number of elements on both sides of every dimension.
+     *  In other words, it resembles the inverse of a convolution layer with this padding size.
+     *  Padding is symmetric, and negative padding is not supported.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, padding must be 0.
+     * 
+     *  @see getPaddingNd() setPadding() getPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the multi-dimension padding of the deconvolution.
+     * 
+     *  If the padding is asymmetric, the pre-padding is returned.
+     * 
+     *  @see setPaddingNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor
+     * 
+     *  For a IDeconvolutionLayer, only index 0 is valid unless explicit precision mode is enabled.
+     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
+     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
+     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
+     *  via appropriate set call. The indices are as follows:
+     * 
+     *  - 0: The input activation tensor.
+     *  - 1: The kernel weights tensor (a constant tensor).
+     * 
+     *  If this function is called with a value greater than 0, then the function getNbInputs() changes
+     *  */
+    
+    //!
+    //!
+    //!
+
+    /** \brief Set the multi-dimension dilation of the deconvolution.
+     * 
+     *  Default: (1, 1, ..., 1)
+     * 
+     *  @see getDilationNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
+
+    /**
+     *  \brief Get the multi-dimension dilation of the deconvolution.
+     * 
+     *  @see setDilationNd()
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
new file mode 100644
index 00000000000..d8a4f22979d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
@@ -0,0 +1,104 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IDequantizeLayer
+ * 
+ *  \brief A Dequantize layer in a network definition.
+ * 
+ *  This layer accepts a signed 8-bit integer input tensor, and uses the configured scale and zeroPt inputs to
+ *  dequantize the input according to:
+ *  \p output = (\p input - \p zeroPt) * \p scale
+ * 
+ *  The first input (index 0) is the tensor to be quantized.
+ *  The second (index 1) and third (index 2) are the scale and zero point respectively.
+ *  Each of \p scale and \p zeroPt must be either a scalar, or a 1D tensor.
+ * 
+ *  The \p zeroPt tensor is optional, and if not set, will be assumed to be zero.  Its data type must be
+ *  DataType::kINT8. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
+ *  supported.
+ *  The \p scale value must be either a scalar for per-tensor quantization, or a 1D tensor for per-channel
+ *  quantization. All \p scale coefficients must have positive values.  The size of the 1-D \p scale tensor must match
+ *  the size of the quantization axis. The size of the \p scale must match the size of the \p zeroPt.
+ * 
+ *  The subgraph which terminates with the \p scale tensor must be a build-time constant.  The same restrictions apply
+ *  to the \p zeroPt.
+ *  The output type, if constrained, must be constrained to DataType::kINT8. The input type, if constrained, must be
+ *  constrained to DataType::kFLOAT (FP16 input is not supported).
+ *  The output size is the same as the input size. The quantization axis is in reference to the input tensor's
+ *  dimensions.
+ * 
+ *  IDequantizeLayer only supports DataType::kINT8 precision and will default to this precision during instantiation.
+ *  IDequantizeLayer only supports DataType::kFLOAT output.
+ * 
+ *  As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
+ *  single scale coefficient (referred to as per-tensor quantization):
+ *      For each n in N:
+ *          For each c in C:
+ *              For each h in H:
+ *                  For each w in W:
+ *                      output[n,c,h,w] = (\p input[n,c,h,w] - \p zeroPt) * \p scale
+ * 
+ *  Per-channel dequantization is supported only for input that is rooted at an IConstantLayer (i.e. weights).
+ *  Activations cannot be quantized per-channel. As an example of per-channel operation, imagine a 4D KCRS weights input
+ *  and K (dimension 0) as the quantization axis. The scale is an array of coefficients, which is the same size as the
+ *  quantization axis.
+ *      For each k in K:
+ *          For each c in C:
+ *              For each r in R:
+ *                  For each s in S:
+ *                      output[k,c,r,s] = (\p input[k,c,r,s] - \p zeroPt[k]) * \p scale[k]
+ * 
+ *  \note Only symmetric quantization is supported.
+ *  \note Currently the only allowed build-time constant \p scale and \zeroPt subgraphs are:
+ *  1. Constant -> Quantize
+ *  2. Constant -> Cast -> Quantize
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IDequantizeLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IDequantizeLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Get the quantization axis.
+     * 
+     *  @return axis parameter set by setAxis().
+     *  The return value is the index of the quantization axis in the input tensor's dimensions.
+     *  A value of -1 indicates per-tensor quantization.
+     *  The default value is -1.
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getAxis();
+    /**
+     *  \brief Set the quantization axis.
+     * 
+     *  Set the index of the quantization axis (with reference to the input tensor's dimensions).
+     *  The axis must be a valid axis if the scale tensor has more than one coefficient.
+     *  The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
+     *  */
+    public native @NoException(true) void setAxis(int axis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
new file mode 100644
index 00000000000..036a9c3875f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
@@ -0,0 +1,46 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class IDimensionExpr
+ * 
+ *  An IDimensionExpr represents an integer expression constructed from constants,
+ *  input dimensions, and binary operations.  These expressions are can be used
+ *  in overrides of IPluginV2DynamicExt::getOutputDimensions to define output
+ *  dimensions in terms of input dimensions.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ * 
+ *  @see DimensionOperation, IPluginV2DynamicExt::getOutputDimensions
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IDimensionExpr extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IDimensionExpr(Pointer p) { super(p); }
+
+    /** Return true if expression is a build-time constant. */
+    public native @Cast("bool") @NoException(true) boolean isConstant();
+
+    /** If isConstant(), returns value of the constant.
+     *  If !isConstant(), return std::numeric_limits<int32_t>::min(). */
+    public native @NoException(true) int getConstantValue();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
new file mode 100644
index 00000000000..56810205721
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
@@ -0,0 +1,73 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class IElementWiseLayer
+ * 
+ *  \brief A elementwise layer in a network definition.
+ * 
+ *  This layer applies a per-element binary operation between corresponding elements of two tensors.
+ * 
+ *  The input tensors must have the same number of dimensions. For each dimension, their lengths must
+ *  match, or one of them must be one. In the latter case, the tensor is broadcast along that axis.
+ * 
+ *  The output tensor has the same number of dimensions as the inputs. For each output dimension,
+ *  its length is equal to the lengths of the corresponding input dimensions if they match,
+ *  otherwise it is equal to the length that is not one. */
+//！
+/** \warning When running this layer on the DLA with Int8 data type, the dynamic ranges of two input tensors shall be
+/** equal. If the dynamic ranges are generated using calibrator, the largest value shall be used.
+/**
+/** \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+/** */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IElementWiseLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IElementWiseLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the binary operation for the layer.
+     * 
+     *  DLA supports only kSUM, kPROD, kMAX and kMIN.
+     * 
+     *  @see getOperation(), ElementWiseOperation
+     * 
+     *  @see getBiasWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOperation(ElementWiseOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::ElementWiseOperation") int op);
+
+    /**
+     *  \brief Get the binary operation for the layer.
+     * 
+     *  @see setOperation(), ElementWiseOperation
+     * 
+     *  @see setBiasWeights()
+     *  */
+    public native @NoException(true) ElementWiseOperation getOperation();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
new file mode 100644
index 00000000000..e9b481bc821
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
@@ -0,0 +1,231 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class IErrorRecorder
+ * 
+ *  \brief Reference counted application-implemented error reporting interface for TensorRT objects.
+ * 
+ *  The error reporting mechanism is a user defined object that interacts with the internal state of the object
+ *  that it is assigned to in order to determine information about abnormalities in execution. The error recorder
+ *  gets both an error enum that is more descriptive than pass/fail and also a string description that gives more
+ *  detail on the exact failure modes. In the safety context, the error strings are all limited to 128 characters
+ *  in length.
+ *  The ErrorRecorder gets passed along to any class that is created from another class that has an ErrorRecorder
+ *  assigned to it. For example, assigning an ErrorRecorder to an IBuilder allows all INetwork's, ILayer's, and
+ *  ITensor's to use the same error recorder. For functions that have their own ErrorRecorder accessor functions.
+ *  This allows registering a different error recorder or de-registering of the error recorder for that specific
+ *  object.
+ * 
+ *  The ErrorRecorder object implementation must be thread safe if the same ErrorRecorder is passed to different
+ *  interface objects being executed in parallel in different threads. All locking and synchronization is
+ *  pushed to the interface implementation and TensorRT does not hold any synchronization primitives when accessing
+ *  the interface functions.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IErrorRecorder extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IErrorRecorder(Pointer p) { super(p); }
+
+    /**
+     *  A typedef of a c-style string for reporting error descriptions.
+     *  */
+    
+    
+    //!
+    //!
+
+    /**
+     *  The length limit for an error description, excluding the '\0' string terminator.
+     *  */
+    
+    
+    //!
+    //!
+    @MemberGetter public static native @Cast("const size_t") long kMAX_DESC_LENGTH();
+    public static final long kMAX_DESC_LENGTH = kMAX_DESC_LENGTH();
+
+    /**
+     *  A typedef of a 32bit integer for reference counting.
+     *  */
+
+    // Public API used to retrieve information from the error recorder.
+
+    /**
+     *  \brief Return the number of errors
+     * 
+     *  Determines the number of errors that occurred between the current point in execution
+     *  and the last time that the clear() was executed. Due to the possibility of asynchronous
+     *  errors occuring, a TensorRT API can return correct results, but still register errors
+     *  with the Error Recorder. The value of getNbErrors must monotonically increases until clear()
+     *  is called.
+     * 
+     *  @return Returns the number of errors detected, or 0 if there are no errors.
+     * 
+     *  @see clear
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbErrors();
+
+    /**
+     *  \brief Returns the ErrorCode enumeration.
+     * 
+     *  @param errorIdx A 32bit integer that indexes into the error array.
+     * 
+     *  The errorIdx specifies what error code from 0 to getNbErrors()-1 that the application
+     *  wants to analyze and return the error code enum.
+     * 
+     *  @return Returns the enum corresponding to errorIdx.
+     * 
+     *  @see getErrorDesc, ErrorCode
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ErrorCode getErrorCode(int errorIdx);
+
+    /**
+     *  \brief Returns the c-style string description of the error.
+     * 
+     *  @param errorIdx A 32bit integer that indexes into the error array.
+     * 
+     *  For the error specified by the idx value, return the string description of the error. The
+     *  error string is a c-style string that is zero delimited. In the safety context there is a
+     *  constant length requirement to remove any dynamic memory allocations and the error message
+     *  may be truncated. The format of the string is "<EnumAsStr> - <Description>".
+     * 
+     *  @return Returns a string representation of the error along with a description of the error.
+     * 
+     *  @see getErrorCode
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getErrorDesc(int errorIdx);
+
+    /**
+     *  \brief Determine if the error stack has overflowed.
+     * 
+     *  In the case when the number of errors is large, this function is used to query if one or more
+     *  errors have been dropped due to lack of storage capacity. This is especially important in the
+     *  automotive safety case where the internal error handling mechanisms cannot allocate memory.
+     * 
+     *  @return true if errors have been dropped due to overflowing the error stack.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean hasOverflowed();
+
+    /**
+     *  \brief Clear the error stack on the error recorder.
+     * 
+     *  Removes all the tracked errors by the error recorder.  This function must guarantee that after
+     *  this function is called, and as long as no error occurs, the next call to getNbErrors will return
+     *  zero.
+     * 
+     *  @see getNbErrors
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void clear();
+
+    // API used by TensorRT to report Error information to the application.
+
+    /**
+     *  \brief Report an error to the error recorder with the corresponding enum and description.
+     * 
+     *  @param val The error code enum that is being reported.
+     *  @param desc The string description of the error.
+     * 
+     *  Report an error to the user that has a given value and human readable description. The function returns false
+     *  if processing can continue, which implies that the reported error is not fatal. This does not guarantee that
+     *  processing continues, but provides a hint to TensorRT.
+     * 
+     *  @return True if the error is determined to be fatal and processing of the current function must end.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean reportError(ErrorCode val, String desc);
+    public native @Cast("bool") @NoException(true) boolean reportError(@Cast("nvinfer1::ErrorCode") int val, @Cast("const char*") BytePointer desc);
+
+    /**
+     *  \brief Increments the refcount for the current ErrorRecorder.
+     * 
+     *  Increments the reference count for the object by one and returns the current value.
+     *  This reference count allows the application to know that an object inside of TensorRT has
+     *  taken a reference to the ErrorRecorder. If the ErrorRecorder is released before the
+     *  reference count hits zero, then behavior in TensorRT is undefined. It is strongly recommended
+     *  that the increment is an atomic operation. TensorRT guarantees that each incRefCount called on
+     *  an objects construction is paired with a decRefCount call when an object is destructed.
+     * 
+     *  @return The current reference counted value.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("nvinfer1::IErrorRecorder::RefCount") @NoException(true) int incRefCount();
+
+    /**
+     *  \brief Decrements the refcount for the current ErrorRecorder.
+     * 
+     *  Decrements the reference count for the object by one and returns the current value. It is undefined behavior
+     *  to call decRefCount when RefCount is zero. If the ErrorRecorder is destroyed before the reference count
+     *  hits zero, then behavior in TensorRT is undefined. It is strongly recommended that the decrement is an
+     *  atomic operation. TensorRT guarantees that each decRefCount called when an object is destructed is
+     *  paired with a incRefCount call when that object was constructed.
+     * 
+     *  @return The current reference counted value.
+     *  */
+    public native @Cast("nvinfer1::IErrorRecorder::RefCount") @NoException(true) int decRefCount();
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
new file mode 100644
index 00000000000..873202f0c03
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
@@ -0,0 +1,654 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IExecutionContext
+ * 
+ *  \brief Context for executing inference using an engine, with functionally unsafe features.
+ * 
+ *  Multiple execution contexts may exist for one ICudaEngine instance, allowing the same
+ *  engine to be used for the execution of multiple batches simultaneously. If the engine supports
+ *  dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IExecutionContext extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IExecutionContext() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IExecutionContext(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IExecutionContext(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IExecutionContext position(long position) {
+        return (IExecutionContext)super.position(position);
+    }
+    @Override public IExecutionContext getPointer(long i) {
+        return new IExecutionContext((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Synchronously execute inference on a batch.
+     * 
+     *  This method requires an array of input and output buffers. The mapping from tensor names to indices
+     *  can be queried using ICudaEngine::getBindingIndex()
+     * 
+     *  @param batchSize The batch size. This is at most the value supplied when the engine was built.
+     *  @param bindings An array of pointers to input and output buffers for the network.
+     * 
+     *  @return True if execution succeeded.
+     * 
+     *  \warning This function will trigger layer resource updates if hasImplicitBatchDimension()
+     *           returns true and batchSize changes between subsequent calls, possibly resulting
+     *           in performance bottlenecks.
+     * 
+     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") PointerPointer bindings);
+    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings);
+
+    /**
+     *  \brief Asynchronously execute inference on a batch.
+     * 
+     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
+     *  queried using ICudaEngine::getBindingIndex() @param batchSize The batch size. This is at most the value supplied
+     *  when the engine was built.
+     * 
+     *  @param bindings An array of pointers to input and output buffers for the network.
+     *  @param stream A cuda stream on which the inference kernels will be enqueued.
+     *  @param inputConsumed An optional event which will be signaled when the input buffers can be refilled with new
+     *  data.
+     * 
+     *  @return True if the kernels were enqueued successfully.
+     * 
+     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
+     * 
+     *  \warning Calling enqueue() in from the same IExecutionContext object with different CUDA streams concurrently
+     *           results in undefined behavior. To perform inference concurrently in multiple streams, use one execution
+     *           context per stream.
+     * 
+     *  \warning This function will trigger layer resource updates if hasImplicitBatchDimension()
+     *           returns true and batchSize changes between subsequent calls, possibly resulting in performance
+     *           bottlenecks.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean enqueue(int batchSize, @Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @Cast("bool") @NoException(true) boolean enqueue(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+
+    /**
+     *  \brief Set the debug sync flag.
+     * 
+     *  If this flag is set to true, the engine will log the successful execution for each kernel during execute(). It
+     *  has no effect when using enqueue().
+     * 
+     *  @see getDebugSync()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDebugSync(@Cast("bool") boolean sync);
+
+    /**
+     *  \brief Get the debug sync flag.
+     * 
+     *  @see setDebugSync()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getDebugSync();
+
+    /**
+     *  \brief Set the profiler.
+     * 
+     *  @see IProfiler getProfiler()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setProfiler(IProfiler profiler);
+
+    /**
+     *  \brief Get the profiler.
+     * 
+     *  @see IProfiler setProfiler()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) IProfiler getProfiler();
+
+    /**
+     *  \brief Get the associated engine.
+     * 
+     *  @see ICudaEngine
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Const @ByRef @NoException(true) ICudaEngine getEngine();
+
+    /**
+     *  \brief Destroy this object.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Set the name of the execution context.
+     * 
+     *  This method copies the name string.
+     * 
+     *  @see getName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Return the name of the execution context.
+     * 
+     *  @see setName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Set the device memory for use by this execution context.
+     * 
+     *  The memory must be aligned with cuda memory alignment property (using cudaGetDeviceProperties()), and its size
+     *  must be at least that returned by getDeviceMemorySize(). Setting memory to nullptr is acceptable if
+     *  getDeviceMemorySize() returns 0. If using enqueue() to run the network, the memory is in use from the invocation
+     *  of enqueue() until network execution is complete. If using execute(), it is in use until execute() returns.
+     *  Releasing or otherwise using the memory for other purposes during this time will result in undefined behavior.
+     * 
+     *  @see ICudaEngine::getDeviceMemorySize() ICudaEngine::createExecutionContextWithoutDeviceMemory()
+     *  */
+
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setDeviceMemory(Pointer memory);
+
+    /**
+     *  \brief Return the strides of the buffer for the given binding.
+     * 
+     *  The strides are in units of elements, not components or bytes.
+     *  For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
+     * 
+     *  Note that strides can be different for different execution contexts
+     *  with dynamic shapes.
+     * 
+     *  If the bindingIndex is invalid or there are dynamic dimensions that have not been
+     *  set yet, returns Dims with Dims::nbDims = -1.
+     * 
+     *  @param bindingIndex The binding index.
+     * 
+     *  @see getBindingComponentsPerElement
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides(int bindingIndex);
+    /**
+     *  \brief Select an optimization profile for the current context.
+     * 
+     *  @param profileIndex Index of the profile. It must lie between 0 and
+     *         getEngine().getNbOptimizationProfiles() - 1
+     * 
+     *  The selected profile will be used in subsequent calls to execute() or enqueue().
+     * 
+     *  When an optimization profile is switched via this API, TensorRT may
+     *  enqueue GPU memory copy operations required to set up the new profile during the subsequent enqueue()
+     *  operations. To avoid these calls during enqueue(), use setOptimizationProfileAsync() instead.
+     * 
+     *  If the associated CUDA engine has dynamic inputs, this method must be called at least once
+     *  with a unique profileIndex before calling execute or enqueue (i.e. the profile index
+     *  may not be in use by another execution context that has not been destroyed yet).
+     *  For the first execution context that is created for an engine, setOptimizationProfile(0)
+     *  is called implicitly.
+     * 
+     *  If the associated CUDA engine does not have inputs with dynamic shapes, this method need not be
+     *  called, in which case the default profile index of 0 will be used (this is particularly
+     *  the case for all safe engines).
+     * 
+     *  setOptimizationProfile() must be called before calling setBindingDimensions() and
+     *  setInputShapeBinding() for all dynamic input tensors or input shape tensors, which in
+     *  turn must be called before either execute() or enqueue().
+     * 
+     *  \warning This function will trigger layer resource updates on the next
+     *           call of enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks.
+     * 
+     *  @return true if the call succeeded, else false (e.g. input out of range)
+     * 
+     *  @deprecated This API is superseded by setOptimizationProfileAsync and will be removed in TensorRT 9.0.
+     * 
+     *  @see ICudaEngine::getNbOptimizationProfiles() IExecutionContext::setOptimizationProfileAsync()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @Deprecated @NoException(true) boolean setOptimizationProfile(int profileIndex);
+
+    /**
+     *  \brief Get the index of the currently selected optimization profile.
+     * 
+     *  If the profile index has not been set yet (implicitly to 0 for the first execution context
+     *  to be created, or explicitly for all subsequent contexts), an invalid value of -1 will be returned
+     *  and all calls to enqueue() or execute() will fail until a valid profile index has been set.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getOptimizationProfile();
+
+    /**
+     *  \brief Set the dynamic dimensions of a binding
+     * 
+     *  @param bindingIndex index of an input tensor whose dimensions must be compatible with
+     *         the network definition (i.e. only the wildcard dimension -1 can be replaced with a
+     *         new dimension >= 0).
+     * 
+     *  @param dimensions specifies the dimensions of the input tensor. It must be in the valid
+     *         range for the currently selected optimization profile, and the corresponding engine must
+     *         not be safety-certified.
+     * 
+     *  This method requires the engine to be built without an implicit batch dimension.
+     *  This method will fail unless a valid optimization profile is defined for the current
+     *  execution context (getOptimizationProfile() must not be -1).
+     * 
+     *  For all dynamic non-output bindings (which have at least one wildcard dimension of -1),
+     *  this method needs to be called before either enqueue() or execute() may be called.
+     *  This can be checked using the method allInputDimensionsSpecified().
+     * 
+     *  \warning This function will trigger layer resource updates on the next
+     *           call of enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks,
+     *           if the dimensions are different than the previous set dimensions.
+     * 
+     *  @return false if an error occurs (e.g. bindingIndex is out of range for the currently selected
+     *          optimization profile or binding dimension is inconsistent with min-max range of the
+     *          optimization profile), else true. Note that the network can still be invalid for certain
+     *          combinations of input shapes that lead to invalid output shapes. To confirm the correctness
+     *          of the network input shapes, check whether the output binding has valid
+     *          dimensions using getBindingDimensions() on the output bindingIndex.
+     * 
+     *  @see ICudaEngine::getBindingIndex
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setBindingDimensions(int bindingIndex, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the dynamic dimensions of a binding
+     * 
+     *  If the engine was built with an implicit batch dimension, same as ICudaEngine::getBindingDimensions.
+     * 
+     *  If setBindingDimensions() has been called on this binding (or if there are no
+     *  dynamic dimensions), all dimensions will be positive. Otherwise, it is necessary to
+     *  call setBindingDimensions() before enqueue() or execute() may be called.
+     * 
+     *  If the bindingIndex is out of range, an invalid Dims with nbDims == -1 is returned.
+     *  The same invalid Dims will be returned if the engine was not built with an implicit
+     *  batch dimension and if the execution context is not currently associated with a valid
+     *  optimization profile (i.e. if getOptimizationProfile() returns -1).
+     * 
+     *  If ICudaEngine::bindingIsInput(bindingIndex) is false, then both
+     *  allInputDimensionsSpecified() and allInputShapesSpecified() must be true
+     *  before calling this method.
+     * 
+     *  @return Currently selected binding dimensions
+     * 
+     *  For backwards compatibility with earlier versions of TensorRT, a bindingIndex that does not belong
+     *  to the current profile is corrected as described for ICudaEngine::getProfileDimensions.
+     * 
+     *  @see ICudaEngine::getProfileDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
+
+    /**
+     *  \brief Set values of input tensor required by shape calculations.
+     * 
+     *  @param bindingIndex index of an input tensor for which
+     *         ICudaEngine::isShapeBinding(bindingIndex) and ICudaEngine::bindingIsInput(bindingIndex)
+     *         are both true.
+     * 
+     *  @param data pointer to values of the input tensor.  The number of values should be
+     *          the product of the dimensions returned by getBindingDimensions(bindingIndex).
+     * 
+     *  If ICudaEngine::isShapeBinding(bindingIndex) and ICudaEngine::bindingIsInput(bindingIndex)
+     *  are both true, this method must be called before enqueue() or execute() may be called.
+     *  This method will fail unless a valid optimization profile is defined for the current
+     *  execution context (getOptimizationProfile() must not be -1).
+     * 
+     *  \warning This function will trigger layer resource updates on the next call of
+     *           enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks, if the
+     *           shapes are different than the previous set shapes.
+     * 
+     *  @return false if an error occurs (e.g. bindingIndex is out of range for the currently selected
+     *          optimization profile or shape data is inconsistent with min-max range of the
+     *          optimization profile), else true. Note that the network can still be invalid for certain
+     *          combinations of input shapes that lead to invalid output shapes. To confirm the correctness
+     *          of the network input shapes, check whether the output binding has valid
+     *          dimensions using getBindingDimensions() on the output bindingIndex. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntPointer data);
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntBuffer data);
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const int[] data);
+
+    /**
+     *  \brief Get values of an input tensor required for shape calculations or an output tensor produced by shape
+     *  calculations.
+     * 
+     *  @param bindingIndex index of an input or output tensor for which
+     *         ICudaEngine::isShapeBinding(bindingIndex) is true.
+     * 
+     *  @param data pointer to where values will be written.  The number of values written is
+     *         the product of the dimensions returned by getBindingDimensions(bindingIndex).
+     * 
+     *  If ICudaEngine::bindingIsInput(bindingIndex) is false, then both
+     *  allInputDimensionsSpecified() and allInputShapesSpecified() must be true
+     *  before calling this method. The method will also fail if no valid optimization profile
+     *  has been set for the current execution context, i.e. if getOptimizationProfile() returns -1.
+     * 
+     *  @see isShapeBinding(bindingIndex)
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntPointer data);
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntBuffer data);
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, int[] data);
+
+    /**
+     *  \brief Whether all dynamic dimensions of input tensors have been specified
+     * 
+     *  @return True if all dynamic dimensions of input tensors have been specified
+     *          by calling setBindingDimensions().
+     * 
+     *  Trivially true if network has no dynamically shaped input tensors.
+     * 
+     *  @see setBindingDimensions(bindingIndex,dimensions)
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean allInputDimensionsSpecified();
+
+    /**
+     *  \brief Whether all input shape bindings have been specified
+     * 
+     *  @return True if all input shape bindings have been specified by setInputShapeBinding().
+     * 
+     *  Trivially true if network has no input shape bindings.
+     * 
+     *  @see isShapeBinding(bindingIndex)
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean allInputShapesSpecified();
+
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief Get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+     *  an error handler has not been set.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Synchronously execute inference a network.
+     * 
+     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
+     *  queried using ICudaEngine::getBindingIndex().
+     *  This method only works for execution contexts built with full dimension networks.
+     *  @param bindings An array of pointers to input and output buffers for the network.
+     * 
+     *  @return True if execution succeeded.
+     * 
+     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") PointerPointer bindings);
+    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") @ByPtrPtr Pointer bindings);
+
+    /**
+     *  \brief Asynchronously execute inference.
+     * 
+     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
+     *  queried using ICudaEngine::getBindingIndex().
+     *  This method only works for execution contexts built with full dimension networks.
+     *  @param bindings An array of pointers to input and output buffers for the network.
+     *  @param stream A cuda stream on which the inference kernels will be enqueued
+     *  @param inputConsumed An optional event which will be signaled when the input buffers can be refilled with new
+     *  data
+     * 
+     *  @return True if the kernels were enqueued successfully.
+     * 
+     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
+     * 
+     *  \note Calling enqueueV2() with a stream in CUDA graph capture mode has a known issue. If dynamic shapes are
+     *        used, the first enqueueV2() call after a setInputShapeBinding() call will cause failure in stream capture
+     *        due to resource allocation. Please call enqueueV2() once before capturing the graph.
+     * 
+     *  \warning Calling enqueueV2() in from the same IExecutionContext object with different CUDA streams concurrently
+     *           results in undefined behavior. To perform inference concurrently in multiple streams, use one execution
+     *           context per stream.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+
+    /**
+     *  \brief Select an optimization profile for the current context with async
+     *  semantics.
+     * 
+     *  @param profileIndex Index of the profile. The value must lie between 0 and
+     *         getEngine().getNbOptimizationProfiles() - 1
+     * 
+     *  @param stream A cuda stream on which the cudaMemcpyAsyncs may be
+     *  enqueued
+     * 
+     *  When an optimization profile is switched via this API, TensorRT may
+     *  require that data is copied via cudaMemcpyAsync. It is the
+     *  application’s responsibility to guarantee that synchronization between
+     *  the profile sync stream and the enqueue stream occurs.
+     * 
+     *  The selected profile will be used in subsequent calls to execute() or
+     *  enqueue().
+     *  If the associated CUDA engine has inputs with dynamic shapes, the
+     *  optimization profile must be set with a unique profileIndex before
+     *  calling execute or enqueue.
+     *  For the first execution context that is created for an engine,
+     *  setOptimizationProfile(0) is called implicitly.
+     * 
+     *  If the associated CUDA engine does not have inputs with dynamic shapes,
+     *  this method need not be called, in which case the default profile index
+     *  of 0 will be used.
+     * 
+     *  setOptimizationProfileAsync() must be called before calling
+     *  setBindingDimensions() and setInputShapeBinding() for all dynamic input
+     *  tensors or input shape tensors, which in turn must be called before
+     *  either execute() or enqueue().
+     * 
+     *  \warning This function will trigger layer resource updates on the next call of
+     *           enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks.
+     * 
+     *  \warning Not synchronizing the stream used at enqueue with the stream
+     *  used to set optimization profile asynchronously using this API will
+     *  result in undefined behavior.
+     * 
+     *  @return true if the call succeeded, else false (e.g. input out of range)
+     * 
+     *  @see ICudaEngine::getNbOptimizationProfiles()
+     *  @see IExecutionContext::setOptimizationProfile() */
+    public native @Cast("bool") @NoException(true) boolean setOptimizationProfileAsync(int profileIndex, CUstream_st stream);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
new file mode 100644
index 00000000000..b9095e8392d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IExprBuilder
+ * 
+ *  Object for constructing IDimensionExpr.
+ * 
+ *  There is no public way to construct an IExprBuilder.  It appears as an argument to
+ *  method IPluginV2DynamicExt::getOutputDimensions().  Overrides of that method can use
+ *  that IExprBuilder argument to construct expressions that define output dimensions
+ *  in terms of input dimensions.
+ * 
+ *  Clients should assume that any values constructed by the IExprBuilder are destroyed
+ *  after IPluginV2DynamicExt::getOutputDimensions() returns.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ * 
+ *  @see IDimensionExpr
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IExprBuilder extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IExprBuilder(Pointer p) { super(p); }
+
+    /** Return pointer to IDimensionExp for given value. */
+    public native @Const @NoException(true) IDimensionExpr constant(int value);
+
+    /** Return pointer to IDimensionExp that represents the given operation applied to first and second.
+     *  Returns nullptr if op is not a valid DimensionOperation. */
+    public native @Const @NoException(true) IDimensionExpr operation(
+            DimensionOperation op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
+    public native @Const @NoException(true) IDimensionExpr operation(
+            @Cast("nvinfer1::DimensionOperation") int op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
new file mode 100644
index 00000000000..7dac15c60b4
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
@@ -0,0 +1,229 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \brief Generate an output tensor with specified mode.
+ * 
+ *  The fill layer has two variants, static and dynamic. Static fill specifies its parameters
+ *  at layer creation time via Dims and the get/set accessor functions of the IFillLayer.
+ *  Dynamic fill specifies one or more of its parameters as ITensors, by using ILayer::setTensor to add
+ *  a corresponding input.  The corresponding static parameter is used if an input is missing or null.
+ * 
+ *  The shape of the output is specified by the parameter \p Dimension, or if non-null and present,
+ *  the first input, which must be a 1D Int32 shape tensor.  Thus an application can determine if the
+ *  IFillLayer has a dynamic output shape based on whether it has a non-null first input.
+ * 
+ *  Alpha and Beta are treated differently based on the Fill Operation specified. See details in
+ *  IFillLayer::setAlpha(), IFillLayer::setBeta(), and IFillLayer::setInput().
+ * 
+ *  A fill layer can produce a shape tensor if the following restrictions are met:
+ * 
+ *  * The FillOperation is kLINSPACE.
+ *  * The output is a 1D Int32 tensor with length not exceeding 2*Dims::MAX_DIMS.
+ *  * There is at most one input, and if so, that input is input 0.
+ *  * If input 0 exists, the length of the output tensor must be computable by constant folding.
+ * 
+ *  @see FillOperation
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IFillLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IFillLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the output tensor's dimensions.
+     * 
+     *  @param dimensions The output tensor's dimensions.
+     * 
+     *  If the first input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getDimensions */
+    //
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the output tensor's dimensions.
+     * 
+     *  @return The output tensor's dimensions, or an invalid Dims structure.
+     * 
+     *  If the first input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+
+    /**
+     *  \brief Set the fill operation for the layer.
+     * 
+     *  @see getOperation(), FillOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOperation(FillOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::FillOperation") int op);
+
+    /**
+     *  \brief Get the fill operation for the layer.
+     * 
+     *  @see setOperation(), FillOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) FillOperation getOperation();
+
+    /**
+     *  \brief Set the alpha parameter.
+     * 
+     *  @param alpha has different meanings for each operator:
+     * 
+     *  Operation          | Usage
+     *  kLINSPACE          | the start value;
+     *  kRANDOMUNIFORM     | the minimum value;
+     * 
+     *  If a second input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getAlpha */
+    //
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAlpha(double alpha);
+
+    /**
+     *  \brief Get the value of alpha parameter.
+     * 
+     *  @return A double value of alpha.
+     * 
+     *  If the second input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setAlpha
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) double getAlpha();
+
+    /**
+     *  \brief Set the beta parameter.
+     * 
+     *  @param beta has different meanings for each operator:
+     * 
+     *  Operation          | Usage
+     *  kLINSPACE          | the delta value;
+     *  kRANDOMUNIFORM     | the maximal value;
+     * 
+     *  If a third input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getBeta
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBeta(double beta);
+
+    /**
+     *  \brief Get the value of beta parameter.
+     * 
+     *  @return A double value of beta.
+     * 
+     *  If the third input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setBeta
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) double getBeta();
+
+    /**
+     *  \brief replace an input of this layer with a specific tensor.
+     * 
+     *  @param index the index of the input to set.
+     *  @param tensor the new input tensor
+     * 
+     *  Indices for kLINSPACE are described as:
+     * 
+     *  - 0: Shape tensor, represents the output tensor's dimensions.
+     *  - 1: Start, a scalar, represents the start value.
+     *  - 2: Delta, a 1D tensor, length equals to shape tensor's nbDims, represents the delta value for each dimension.
+     * 
+     *  Indices for kRANDOM_UNIFORM are described as:
+     * 
+     *  - 0: Shape tensor, represents the output tensor's dimensions.
+     *  - 1: Minimum, a scalar, represents the minimum random value.
+     *  - 2: Maximum, a scalar, represents the maximal random value.
+     * 
+     *  Using the corresponding setter resets the input to null.
+     * 
+     *  If either inputs 1 or 2, is non-null, then both must be non-null and have the same data type.
+     * 
+     *  If this function is called for an index greater or equal to getNbInputs(),
+     *  then afterwards getNbInputs() returns index + 1, and any missing intervening
+     *  inputs are set to null.
+     *  */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
new file mode 100644
index 00000000000..06c60520e83
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
@@ -0,0 +1,152 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IFullyConnectedLayer
+ * 
+ *  \brief A fully connected layer in a network definition.
+ *  This layer expects an input tensor of three or more non-batch dimensions.  The input is automatically
+ *  reshaped into an {@code MxV} tensor {@code X}, where {@code V} is a product of the last three dimensions and {@code M}
+ *  is a product of the remaining dimensions (where the product over 0 dimensions is defined as 1).  For example:
+ * 
+ *  - If the input tensor has shape {@code {C, H, W}}, then the tensor is reshaped into {@code {1, C*H*W}}.
+ *  - If the input tensor has shape {@code {P, C, H, W}}, then the tensor is reshaped into {@code {P, C*H*W}}.
+ * 
+ *  The layer then performs the following operation:
+ * 
+ *  ~~~
+ *  Y := matmul(X, W^T) + bias
+ *  ~~~
+ * 
+ *  Where {@code X} is the {@code MxV} tensor defined above, {@code W} is the {@code KxV} weight tensor
+ *  of the layer, and {@code bias} is a row vector size {@code K} that is broadcasted to
+ *  {@code MxK}.  {@code K} is the number of output channels, and configurable via
+ *  setNbOutputChannels().  If {@code bias} is not specified, it is implicitly {@code 0}.
+ * 
+ *  The {@code MxK} result {@code Y} is then reshaped such that the last three dimensions are {@code {K, 1, 1}} and
+ *  the remaining dimensions match the dimensions of the input tensor. For example:
+ * 
+ *  - If the input tensor has shape {@code {C, H, W}}, then the output tensor will have shape {@code {K, 1, 1}}.
+ *  - If the input tensor has shape {@code {P, C, H, W}}, then the output tensor will have shape {@code {P, K, 1, 1}}.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IFullyConnectedLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IFullyConnectedLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the number of output channels {@code K} from the fully connected layer.
+     * 
+     *  If executing this layer on DLA, number of output channels must in the range [1,8192].
+     * 
+     *  @see getNbOutputChannels()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbOutputChannels(int nbOutputs);
+
+    /**
+     *  \brief Get the number of output channels {@code K} from the fully connected layer.
+     * 
+     *  @see setNbOutputChannels()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputChannels();
+
+    /**
+     *  \brief Set the kernel weights, given as a {@code KxC} matrix in row-major order.
+     * 
+     *  @see getKernelWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the kernel weights.
+     * 
+     *  @see setKernelWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+
+    /**
+     *  \brief Set the bias weights.
+     * 
+     *  Bias is optional. To omit bias, set the count value in the weights structure to zero.
+     * 
+     *  @see getBiasWeightsWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+
+    /**
+     *  \brief Get the bias weights.
+     * 
+     *  @see setBiasWeightsWeights()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor
+     * 
+     *  For a IFullyConnectedLayer, only index 0 is valid unless explicit precision mode is enabled.
+     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
+     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
+     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
+     *  via appropriate set call. The indices are as follows:
+     * 
+     *  - 0: The input activation tensor.
+     *  - 1: The kernel weights tensor (a constant tensor).
+     * 
+     *  If this function is called with a value greater than 0, then the function getNbInputs() changes */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
new file mode 100644
index 00000000000..4bab3e1c1ba
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
@@ -0,0 +1,76 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IGatherLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IGatherLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the axis to gather on.
+     *   The axis must be less than the number of dimensions in the data input.
+     * 
+     *  @see getGatherAxis()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setGatherAxis(int axis);
+
+    /**
+     *  \brief Get the axis to gather on.
+     * 
+     *  @see setGatherAxis()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getGatherAxis();
+
+    /**
+     *  \brief Set the number of leading dimensions of indices tensor to be handled elementwise.
+     *  k must be 0 if there is an implicit batch dimension.  It can be 0 or 1 if there is not an implicit batch
+     *  dimension.
+     * 
+     *  @see getNbElementWiseDims()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNbElementWiseDims(int k);
+
+    /**
+     *  \brief Get the number of leading dimensions of indices tensor to be handled elementwise.
+     * 
+     *  @see setNbElementWiseDims()
+     *  */
+    public native @NoException(true) int getNbElementWiseDims();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
new file mode 100644
index 00000000000..8881309101f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
@@ -0,0 +1,110 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IGpuAllocator
+ * 
+ *  \brief Application-implemented class for controlling allocation on the GPU.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IGpuAllocator extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IGpuAllocator(Pointer p) { super(p); }
+
+    /**
+     *  A thread-safe callback implemented by the application to handle acquisition of GPU memory.
+     * 
+     *  @param size The size of the memory required.
+     *  @param alignment The required alignment of memory. Alignment will be zero
+     *         or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
+     *         Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
+     *         An alignment value of zero indicates any alignment is acceptable.
+     *  @param flags Reserved for future use. In the current release, 0 will be passed.
+     * 
+     *  If an allocation request of size 0 is made, nullptr should be returned.
+     * 
+     *  If an allocation request cannot be satisfied, nullptr should be returned.
+     * 
+     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
+     *  requests.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Name("allocate") @NoException(true) Pointer _allocate(@Cast("const uint64_t") long size, @Cast("const uint64_t") long alignment, @Cast("const nvinfer1::AllocatorFlags") int flags);
+
+    /**
+     *  A thread-safe callback implemented by the application to handle release of GPU memory.
+     * 
+     *  TensorRT may pass a nullptr to this function if it was previously returned by allocate().
+     * 
+     *  @param memory The acquired memory.
+     * 
+     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
+     *  requests.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Name("free") @NoException(true) void _free(Pointer memory);
+
+    /**
+     *  Destructor declared virtual as general good practice for a class with virtual methods.
+     *  TensorRT never calls the destructor for an IGpuAllocator defined by the application.
+     *  */
+
+    /**
+     *  A thread-safe callback implemented by the application to resize an existing allocation.
+     * 
+     *  Only allocations which were allocated with AllocatorFlag::kRESIZABLE will be resized.
+     * 
+     *  Options are one of:
+     *  * resize in place leaving min(oldSize, newSize) bytes unchanged and return the original address
+     *  * move min(oldSize, newSize) bytes to a new location of sufficient size and return its address
+     *  * return nullptr, to indicate that the request could not be fulfilled.
+     * 
+     *  If nullptr is returned, TensorRT will assume that resize() is not implemented, and that the
+     *  allocation at baseAddr is still valid.
+     * 
+     *  This method is made available for use cases where delegating the resize
+     *  strategy to the application provides an opportunity to improve memory management.
+     *  One possible implementation is to allocate a large virtual device buffer and
+     *  progressively commit physical memory with cuMemMap. CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
+     *  is suggested in this case.
+     * 
+     *  TensorRT may call realloc to increase the buffer by relatively small amounts.
+     * 
+     *  @param baseAddr the address of the original allocation.
+     *  @param alignment The alignment used by the original allocation.
+     *  @param newSize The new memory size required.
+     *  @return the address of the reallocated memory
+     * 
+     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
+     *  requests.
+     *  */
+    public native @NoException(true) Pointer reallocate(Pointer baseAddr, @Cast("uint64_t") long alignment, @Cast("uint64_t") long newSize);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
new file mode 100644
index 00000000000..309f48751ec
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
@@ -0,0 +1,72 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IHostMemory
+ * 
+ *  \brief Class to handle library allocated memory that is accessible to the user.
+ * 
+ *  The memory allocated via the host memory object is owned by the library and will
+ *  be de-allocated when the destroy method is called.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IHostMemory extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IHostMemory() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IHostMemory(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IHostMemory(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IHostMemory position(long position) {
+        return (IHostMemory)super.position(position);
+    }
+    @Override public IHostMemory getPointer(long i) {
+        return new IHostMemory((Pointer)this).offsetAddress(i);
+    }
+
+
+    /** A pointer to the raw data that is owned by the library. */
+    public native @NoException(true) Pointer data();
+
+    /** The size in bytes of the data that was allocated. */
+    public native @Cast("std::size_t") @NoException(true) long size();
+
+    /** The type of the memory that was allocated. */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType type();
+    /**
+     *  Destroy the allocated memory.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    public native @Deprecated @NoException(true) void destroy();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
new file mode 100644
index 00000000000..f6e78ae79e0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
@@ -0,0 +1,39 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IIdentityLayer
+ * 
+ *  \brief A layer that represents the identity function.
+ * 
+ *  If tensor precision is being explicitly specified, it can be used to convert from one precision to another.
+ *  Other than conversion between the same precision (kFLOAT -> kFLOAT for example), the only valid
+ *  tranformations supported are: (kHALF -> kINT32), (kHALF -> kFLOAT), (kFLOAT -> kINT32), (kINT32 -> kHALF),
+ *  (kINT32 -> kFLOAT), (kBOOL -> kBOOL), (kBOOL -> kHALF), (kBOOL -> kFLOAT).
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IIdentityLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IIdentityLayer(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
new file mode 100644
index 00000000000..ed922916d2d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
@@ -0,0 +1,130 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IInt8Calibrator
+ * 
+ *  \brief Application-implemented interface for calibration.
+ * 
+ *  Calibration is a step performed by the builder when deciding suitable scale factors for 8-bit inference.
+ * 
+ *  It must also provide a method for retrieving representative images which the calibration process can use to examine
+ *  the distribution of activations. It may optionally implement a method for caching the calibration result for reuse
+ *  on subsequent runs.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IInt8Calibrator extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IInt8Calibrator() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IInt8Calibrator(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IInt8Calibrator(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IInt8Calibrator position(long position) {
+        return (IInt8Calibrator)super.position(position);
+    }
+    @Override public IInt8Calibrator getPointer(long i) {
+        return new IInt8Calibrator((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Get the batch size used for calibration batches.
+     * 
+     *  @return The batch size.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    @Virtual(true) public native @NoException(true) @Const({false, false, true}) int getBatchSize();
+
+    /**
+     *  \brief Get a batch of input for calibration.
+     * 
+     *  The batch size of the input must match the batch size returned by getBatchSize().
+     * 
+     *  @param bindings An array of pointers to device memory that must be updated to point to device memory
+     *  containing each network input data.
+     *  @param names The names of the network input for each pointer in the binding array.
+     *  @param nbBindings The number of pointers in the bindings array.
+     *  @return False if there are no more batches for calibration.
+     * 
+     *  @see getBatchSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    @Virtual(true) public native @Cast("bool") @NoException(true) boolean getBatch(@Cast("void**") PointerPointer bindings, @Cast("const char**") PointerPointer names, int nbBindings);
+
+    /**
+     *  \brief Load a calibration cache.
+     * 
+     *  Calibration is potentially expensive, so it can be useful to generate the calibration data once, then use it on
+     *  subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate
+     *  it, and will not be used if these do not batch the settings of the current calibrator. However, the network
+     *  should also be recalibrated if its structure changes, or the input data set changes, and it is the
+     *  responsibility of the application to ensure this.
+     * 
+     *  @param length The length of the cached data, that should be set by the called function. If there is no data,
+     *  this should be zero.
+     * 
+     *  @return A pointer to the cache, or nullptr if there is no data.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    @Virtual(true) public native @Const @NoException(true) Pointer readCalibrationCache(@Cast("std::size_t*") @ByRef LongPointer length);
+
+    /**
+     *  \brief Save a calibration cache.
+     * 
+     *  @param ptr A pointer to the data to cache.
+     *  @param length The length in bytes of the data to cache.
+     * 
+     *  @see readCalibrationCache()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    @Virtual(true) public native @NoException(true) void writeCalibrationCache(@Const Pointer ptr, @Cast("std::size_t") long length);
+
+    /**
+     *  \brief Get the algorithm used by this calibrator.
+     * 
+     *  @return The algorithm used by the calibrator.
+     *  */
+    @Virtual(true) public native @NoException(true) CalibrationAlgoType getAlgorithm();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
new file mode 100644
index 00000000000..7664b69b916
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  Entropy calibrator. This is the Legacy Entropy calibrator. It is less complicated than the legacy calibrator and
+ *  produces better results.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IInt8EntropyCalibrator extends IInt8Calibrator {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IInt8EntropyCalibrator() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IInt8EntropyCalibrator(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IInt8EntropyCalibrator(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IInt8EntropyCalibrator position(long position) {
+        return (IInt8EntropyCalibrator)super.position(position);
+    }
+    @Override public IInt8EntropyCalibrator getPointer(long i) {
+        return new IInt8EntropyCalibrator((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  Signal that this is the entropy calibrator.
+     *  */
+    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
new file mode 100644
index 00000000000..706e4366305
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  Entropy calibrator 2. This is the preferred calibrator. This is the required calibrator for DLA, as it supports per
+ *  activation tensor scaling.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IInt8EntropyCalibrator2 extends IInt8Calibrator {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IInt8EntropyCalibrator2() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IInt8EntropyCalibrator2(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IInt8EntropyCalibrator2(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IInt8EntropyCalibrator2 position(long position) {
+        return (IInt8EntropyCalibrator2)super.position(position);
+    }
+    @Override public IInt8EntropyCalibrator2 getPointer(long i) {
+        return new IInt8EntropyCalibrator2((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  Signal that this is the entropy calibrator 2.
+     *  */
+    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
new file mode 100644
index 00000000000..3894da826f7
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
@@ -0,0 +1,111 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  Legacy calibrator left for backward compatibility with TensorRT 2.0. This calibrator requires user parameterization,
+ *  and is provided as a fallback option if the other calibrators yield poor results.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IInt8LegacyCalibrator extends IInt8Calibrator {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IInt8LegacyCalibrator() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IInt8LegacyCalibrator(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IInt8LegacyCalibrator(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IInt8LegacyCalibrator position(long position) {
+        return (IInt8LegacyCalibrator)super.position(position);
+    }
+    @Override public IInt8LegacyCalibrator getPointer(long i) {
+        return new IInt8LegacyCalibrator((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  Signal that this is the legacy calibrator.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
+
+    /**
+     *  \brief The quantile (between 0 and 1) that will be used to select the region maximum when the quantile method
+     *  is in use.
+     * 
+     *  See the user guide for more details on how the quantile is used.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    @Virtual(true) public native @NoException(true) @Const({false, false, true}) double getQuantile();
+
+    /**
+     *  \brief The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regression
+     *  to determine the region maximum.
+     * 
+     *  See the user guide for more details on how the regression cutoff is used
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    @Virtual(true) public native @NoException(true) @Const({false, false, true}) double getRegressionCutoff();
+
+    /**
+     *  \brief Load a histogram.
+     * 
+     *  Histogram generation is potentially expensive, so it can be useful to generate the histograms once, then use
+     *  them when exploring the space of calibrations. The histograms should be regenerated if the network structure
+     *  changes, or the input data set changes, and it is the responsibility of the application to ensure this.
+     * 
+     *  @param length The length of the cached data, that should be set by the called function. If there is no data,
+     *  this should be zero.
+     * 
+     *  @return A pointer to the cache, or nullptr if there is no data.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    @Virtual(true) public native @Const @NoException(true) Pointer readHistogramCache(@Cast("std::size_t*") @ByRef LongPointer length);
+
+    /**
+     *  \brief Save a histogram cache.
+     * 
+     *  @param ptr A pointer to the data to cache.
+     *  @param length The length in bytes of the data to cache.
+     * 
+     *  @see readHistogramCache()
+     *  */
+    @Virtual(true) public native @NoException(true) void writeHistogramCache(@Const Pointer ptr, @Cast("std::size_t") long length);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
new file mode 100644
index 00000000000..ba98a49e13f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
@@ -0,0 +1,47 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  MinMax Calibrator. It supports per activation tensor scaling.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IInt8MinMaxCalibrator extends IInt8Calibrator {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IInt8MinMaxCalibrator() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IInt8MinMaxCalibrator(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IInt8MinMaxCalibrator(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IInt8MinMaxCalibrator position(long position) {
+        return (IInt8MinMaxCalibrator)super.position(position);
+    }
+    @Override public IInt8MinMaxCalibrator getPointer(long i) {
+        return new IInt8MinMaxCalibrator((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  Signal that this is the MinMax Calibrator.
+     *  */
+    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
new file mode 100644
index 00000000000..0bbcad6bc6e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
@@ -0,0 +1,43 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IIteratorLayer extends ILoopBoundaryLayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IIteratorLayer(Pointer p) { super(p); }
+
+    /** Set axis to iterate over. */
+    public native @NoException(true) void setAxis(int axis);
+
+    /** Get axis being iterated over. */
+    public native @NoException(true) int getAxis();
+
+    /** For reverse=false, the layer is equivalent to addGather(tensor, I, 0) where I is a
+     *  scalar tensor containing the loop iteration number.
+     *  For reverse=true, the layer is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
+     *  computed from TripLimits of kind kCOUNT.
+     *  The default is reverse=false. */
+    public native @NoException(true) void setReverse(@Cast("bool") boolean reverse);
+
+    /** True if and only if reversing input. */
+    public native @Cast("bool") @NoException(true) boolean getReverse();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
new file mode 100644
index 00000000000..244ab953ad0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
@@ -0,0 +1,134 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ILRNLayer
+ * 
+ *  \brief A LRN layer in a network definition.
+ * 
+ *  The output size is the same as the input size.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILRNLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILRNLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the LRN window size.
+     * 
+     *  The window size must be odd and in the range of [1, 15].
+     * 
+     *  If executing this layer on the DLA, only values in the set, [3, 5, 7, 9], are valid.
+     * 
+     *  @see setWindowStride()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setWindowSize(int windowSize);
+
+    /**
+     *  \brief Get the LRN window size.
+     * 
+     *  @see getWindowStride()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getWindowSize();
+
+    /**
+     *  \brief Set the LRN alpha value.
+     * 
+     *  The valid range is [-1e20, 1e20].
+     *  @see getAlpha()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAlpha(float alpha);
+
+    /**
+     *  \brief Get the LRN alpha value.
+     * 
+     *  @see setAlpha()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) float getAlpha();
+
+    /**
+     *  \brief Set the LRN beta value.
+     * 
+     *  The valid range is [0.01, 1e5f].
+     *  @see getBeta()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBeta(float beta);
+
+    /**
+     *  \brief Get the LRN beta value.
+     * 
+     *  @see setBeta()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) float getBeta();
+
+    /**
+     *  \brief Set the LRN K value.
+     * 
+     *  The valid range is [1e-5, 1e10].
+     *  @see getK()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setK(float k);
+
+    /**
+     *  \brief Get the LRN K value.
+     * 
+     *  @see setK()
+     *  */
+    public native @NoException(true) float getK();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
new file mode 100644
index 00000000000..c5ffeef0cf3
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
@@ -0,0 +1,291 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ILayer
+ * 
+ *  \brief Base class for all layer classes in a network definition.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILayer extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return the type of a layer.
+     * 
+     *  @see LayerType
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) LayerType getType();
+
+    /**
+     *  \brief Set the name of a layer.
+     * 
+     *  This method copies the name string.
+     * 
+     *  @see getName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Return the name of a layer.
+     * 
+     <p>
+     *  @see setName()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Get the number of inputs of a layer.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbInputs();
+
+    /**
+     *  \brief Get the layer input corresponding to the given index.
+     * 
+     *  @param index The index of the input tensor.
+     * 
+     *  @return The input tensor, or nullptr if the index is out of range or the tensor is optional
+     *  (\ref ISliceLayer and \ref IRNNv2Layer).
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) ITensor getInput(int index);
+
+    /**
+     *  \brief Get the number of outputs of a layer.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputs();
+
+    /**
+     *  \brief Get the layer output corresponding to the given index.
+     * 
+     *  @return The indexed output tensor, or nullptr if the index is out of range or the tensor is optional
+     *  (\ref IRNNv2Layer).
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITensor getOutput(int index);
+
+    /**
+     *  \brief Replace an input of this layer with a specific tensor.
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor
+     * 
+     *  Except for IFillLayer, ILoopOutputLayer, IResizeLayer, IShuffleLayer, and ISliceLayer,
+     *  this method cannot change the number of inputs to a layer. The index argument must be
+     *  less than the value of getNbInputs().
+     * 
+     *  See comments for overloads of setInput() for layers with special behavior.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setInput(int index, @ByRef ITensor tensor);
+
+    /**
+     *  \brief Set the computational precision of this layer
+     * 
+     *  Setting the precision allows TensorRT to choose implementation which run at this computational precision.
+     *  Layer input type would also get inferred from layer computational precision. TensorRT could still choose a
+     *  non-conforming fastest implementation ignoring set layer precision. Use BuilderFlag::kSTRICT_TYPES to force
+     *  choose implementations with requested precision. In case no implementation is found with requested precision,
+     *  TensorRT would choose available fastest implementation. If precision is not set, TensorRT will select the layer
+     *  computational precision and layer input type based on performance considerations and the flags specified to the
+     *  builder.
+     * 
+     *  @param dataType the computational precision.
+     * 
+     *  @see getPrecision() precisionIsSet() resetPrecision()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPrecision(DataType dataType);
+    public native @NoException(true) void setPrecision(@Cast("nvinfer1::DataType") int dataType);
+
+    /**
+     *  \brief get the computational precision of this layer
+     * 
+     *  @return the computational precision
+     * 
+     *  @see setPrecision() precisionIsSet() resetPrecision()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getPrecision();
+
+    /**
+     *  \brief whether the computational precision has been set for this layer
+     * 
+     *  @return whether the computational precision has been explicitly set
+     * 
+     *  @see setPrecision() getPrecision() resetPrecision()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean precisionIsSet();
+
+    /**
+     *  \brief reset the computational precision for this layer
+     * 
+     *  @see setPrecision() getPrecision() precisionIsSet()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void resetPrecision();
+
+    /**
+     *  \brief Set the output type of this layer
+     * 
+     *  Setting the output type constrains TensorRT to choose implementations which generate output data with the
+     *  given type. If it is not set, TensorRT will select output type based on layer computational precision. TensorRT
+     *  could still choose non-conforming output type based on fastest implementation. Use BuilderFlag::kSTRICT_TYPES to
+     *  force choose requested output type. In case layer precision is not specified, output type would depend on
+     *  chosen implementation based on performance considerations and the flags specified to the builder.
+     * 
+     *  This method cannot be used to set the data type of the second output tensor of the TopK layer. The data type of
+     *  the second output tensor of the topK layer is always Int32. Also the output type of all layers that are shape
+     *  operations must be DataType::kINT32, and all attempts to set the output type to some other data type will be
+     *  ignored except for issuing an error message.
+     * 
+     *  Note that the layer output type is generally not identical to the data type of the output tensor, as TensorRT
+     *  may insert implicit reformatting operations to convert the former to the latter. Calling layer->setOutputType(i,
+     *  type) has no effect on the data type of the i-th output tensor of layer, and users need to call
+     *  layer->getOutput(i)->setType(type) to change the tensor data type. This is particularly relevant if the tensor
+     *  is marked as a network output, since only setType() [but not setOutputType()] will affect the data
+     *  representation in the corresponding output binding.
+     * 
+     *  @param index the index of the output to set
+     *  @param dataType the type of the output
+     * 
+     *  @see getOutputType() outputTypeIsSet() resetOutputType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOutputType(int index, DataType dataType);
+    public native @NoException(true) void setOutputType(int index, @Cast("nvinfer1::DataType") int dataType);
+
+    /**
+     *  \brief get the output type of this layer
+     * 
+     *  @param index the index of the output
+     *  @return the output precision. If no precision has been set, DataType::kFLOAT will be returned,
+     *          unless the output type is inherently DataType::kINT32.
+     * 
+     *  @see getOutputType() outputTypeIsSet() resetOutputType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getOutputType(int index);
+
+    /**
+     *  \brief whether the output type has been set for this layer
+     * 
+     *  @param index the index of the output
+     *  @return whether the output type has been explicitly set
+     * 
+     *  @see setOutputType() getOutputType() resetOutputType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean outputTypeIsSet(int index);
+
+    /**
+     *  \brief reset the output type for this layer
+     * 
+     *  @param index the index of the output
+     * 
+     *  @see setOutputType() getOutputType() outputTypeIsSet()
+     *  */
+    public native @NoException(true) void resetOutputType(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
new file mode 100644
index 00000000000..7a03125b900
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
@@ -0,0 +1,80 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ILogger
+ * 
+ *  \brief Application-implemented logging interface for the builder, engine and runtime.
+ * 
+ *  Note that although a logger is passed on creation to each instance of a IBuilder or IRuntime interfaces, the logger
+ *  is internally considered a singleton, and thus multiple instances of IRuntime and/or IBuilder must all use the same
+ *  logger.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILogger extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILogger(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ILogger(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public ILogger position(long position) {
+        return (ILogger)super.position(position);
+    }
+    @Override public ILogger getPointer(long i) {
+        return new ILogger((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \enum Severity
+     * 
+     *  The severity corresponding to a log message.
+     *  */
+    public enum Severity {
+        /** An internal error has occurred. Execution is unrecoverable. */
+        kINTERNAL_ERROR(0),
+        /** An application error has occurred. */
+        kERROR(1),
+        /** An application error has been discovered, but TensorRT has recovered or fallen back to a default. */
+        kWARNING(2),
+        /**  Informational messages with instructional information. */
+        kINFO(3),
+        /**  Verbose messages with debugging information. */
+        kVERBOSE(4);
+
+        public final int value;
+        private Severity(int v) { this.value = v; }
+        private Severity(Severity e) { this.value = e.value; }
+        public Severity intern() { for (Severity e : values()) if (e.value == value) return e; return this; }
+        @Override public String toString() { return intern().name(); }
+    }
+
+    /**
+     *  A callback implemented by the application to handle logging messages;
+     * 
+     *  @param severity The severity of the message.
+     *  @param msg The log message, null terminated.
+     *  */
+    @Virtual(true) public native @NoException(true) void log(Severity severity, String msg);
+
+    public ILogger() { super((Pointer)null); allocate(); }
+    private native void allocate();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
new file mode 100644
index 00000000000..df4cdee446d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
@@ -0,0 +1,127 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  Helper for creating a recurrent subgraph.
+ * 
+ *  An ILoop cannot be added to an INetworkDefinition where hasImplicitBatchDimensions() returns true.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILoop extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILoop(Pointer p) { super(p); }
+
+    /**
+     *  \brief Create a recurrence layer for this loop with initialValue as its first input.
+     * 
+     *  IRecurrenceLayer requires exactly two inputs.  The 2nd input must be added, via method
+     *  IRecurrenceLayer::setInput(1,...) before an Engine can be built.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IRecurrenceLayer addRecurrence(@ByRef ITensor initialValue);
+
+    /**
+     *  \brief Add a trip-count limiter, based on the given tensor.
+     * 
+     *  There may be at most one kCOUNT and one kWHILE limiter for a loop.
+     *  When both trip limits exist, the loop exits when the
+     *  count is reached or condition is falsified.
+     *  It is an error to not add at least one trip limiter.
+     * 
+     *  For kCOUNT, the input tensor must be available before the loop starts.
+     * 
+     *  For kWHILE, the input tensor must be the output of a subgraph that contains
+     *  only layers that are not ITripLimitLayer, IIteratorLayer or ILoopOutputLayer.
+     *  Any IRecurrenceLayers in the subgraph must belong to the same loop as the
+     *  ITripLimitLayer.  A trivial example of this rule is that the input to the kWHILE
+     *  is the output of an IRecurrenceLayer for the same loop.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, TripLimit _limit);
+    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, @Cast("nvinfer1::TripLimit") int _limit);
+
+    /**
+     *  \brief Return layer that subscripts tensor by loop iteration.
+     * 
+     *  For reverse=false, this is equivalent to addGather(tensor, I, 0) where I is a
+     *  scalar tensor containing the loop iteration number.
+     *  For reverse=true, this is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
+     *  computed from TripLimits of kind kCOUNT.
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor, int axis/*=0*/, @Cast("bool") boolean reverse/*=false*/);
+    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor);
+
+    /** \brief Make an output for this loop, based on the given tensor.
+     * 
+     *  axis is the axis for concatenation (if using outputKind of kCONCATENATE or kREVERSE).
+     * 
+     *  If outputKind is kCONCATENATE or kREVERSE, a second input specifying the
+     *  concatenation dimension must be added via method ILoopOutputLayer::setInput.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind, int axis/*=0*/);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind, int axis/*=0*/);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind);
+
+    /**
+     *  \brief Set the name of the loop.
+     * 
+     *  The name is used in error diagnostics.
+     *  This method copies the name string.
+     * 
+     *  @see getName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Return the name of the loop.
+     * 
+     *  @see setName()
+     *  */
+    public native @NoException(true) String getName();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
new file mode 100644
index 00000000000..e88e0245b37
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILoopBoundaryLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILoopBoundaryLayer(Pointer p) { super(p); }
+
+    /** Return pointer to ILoop associated with this boundary layer. */
+    public native @NoException(true) ILoop getLoop();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
new file mode 100644
index 00000000000..8a27f7357b3
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
@@ -0,0 +1,97 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  An ILoopOutputLayer is the sole way to get output from a loop.
+ * 
+ *  The first input tensor must be defined inside the loop; the output tensor is outside the loop.
+ *  The second input tensor, if present, must be defined outside the loop.
+ * 
+ *  If getLoopOutput() is kLAST_VALUE, a single input must be provided,
+ *  and that input must from a IRecurrenceLayer in the same loop.
+ * 
+ *  If getLoopOutput() is kCONCATENATE or kREVERSE, a second input must be provided.
+ *  The second input must be a scalar “shape tensor”, defined before the loop commences,
+ *  that specifies the concatenation length of the output.
+ * 
+ *  The output tensor has j more dimensions than the input tensor, where
+ *  j == 0 if getLoopOutput() is kLAST_VALUE
+ *  j == 1 if getLoopOutput() is kCONCATENATE or kREVERSE.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ILoopOutputLayer extends ILoopBoundaryLayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ILoopOutputLayer(Pointer p) { super(p); }
+
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) LoopOutput getLoopOutput();
+
+    /**
+     *  \brief Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
+     * 
+     *  For example, if the input tensor has dimensions [b,c,d],
+     *  and getLoopOutput() is  kCONCATENATE, the output has four dimensions.
+     *  Let a be the value of the second input.
+     *  setAxis(0) causes the output to have dimensions [a,b,c,d].
+     *  setAxis(1) causes the output to have dimensions [b,a,c,d].
+     *  setAxis(2) causes the output to have dimensions [b,c,a,d].
+     *  setAxis(3) causes the output to have dimensions [b,c,d,a].
+     *  Default is axis is 0.
+     *  */
+    public native @NoException(true) void setAxis(int axis);
+
+    /** Get axis being concatenated over. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getAxis();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor */
+    //
+    /** Sets the input tensor for the given index. The index must be 0 for a kLAST_VALUE loop output layer.
+    /** Loop output layer is converted to a kCONCATENATE or kREVERSE loop output layer by calling setInput with an
+    /** index 1. A kCONCATENATE or kREVERSE loop output layer cannot be converted back to a kLAST_VALUE loop output
+    /** layer.
+    /**
+    /** For a kCONCATENATE or kREVERSE loop output layer, the values 0 and 1 are valid.
+    /** The indices in the kCONCATENATE or kREVERSE cases are as follows:
+    /**
+    /** - 0: Contribution to the output tensor.  The contribution must come from inside the loop.
+    /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D Int32 shape tensor.
+    /**
+    /** If this function is called with a value 1, then the function getNbInputs() changes
+    /** from returning 1 to 2.
+    /** */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
new file mode 100644
index 00000000000..4a14443dcdf
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
@@ -0,0 +1,72 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IMatrixMultiplyLayer
+ * 
+ *  \brief Layer that represents a Matrix Multiplication.
+ * 
+ *  Let A be op(getInput(0)) and B be op(getInput(1)) where
+ *  op(x) denotes the corresponding MatrixOperation.
+ * 
+ *  When A and B are matrices or vectors, computes the inner product A * B:
+ * 
+ *      matrix * matrix -> matrix
+ *      matrix * vector -> vector
+ *      vector * matrix -> vector
+ *      vector * vector -> scalar
+ * 
+ *  Inputs of higher rank are treated as collections of matrices or vectors.
+ *  The output will be a corresponding collection of matrices, vectors, or scalars.
+ * 
+ *  For a dimension that is not one of the matrix or vector dimensions:
+ *  If the dimension is 1 for one of the tensors but not the other tensor,
+ *  the former tensor is broadcast along that dimension to match the dimension of the latter tensor.
+ *  The number of these extra dimensions for A and B must match.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IMatrixMultiplyLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IMatrixMultiplyLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the operation for an input tensor.
+     *  @param index Input tensor number (0 or 1).
+     *  @param op New operation.
+     *  @see getOperation()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setOperation(int index, MatrixOperation op);
+    public native @NoException(true) void setOperation(int index, @Cast("nvinfer1::MatrixOperation") int op);
+
+    /**
+     *  \brief Get the operation for an input tensor.
+     *  @param index Input tensor number (0 or 1).
+     *  @see setOperation()
+     *  */
+    public native @NoException(true) MatrixOperation getOperation(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
new file mode 100644
index 00000000000..75e9db3a9a0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
@@ -0,0 +1,1480 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class INetworkDefinition
+ * 
+ *  \brief A network definition for input to the builder.
+ * 
+ *  A network definition defines the structure of the network, and combined with a IBuilderConfig, is built
+ *  into an engine using an IBuilder. An INetworkDefinition can either have an implicit batch dimensions, specified
+ *  at runtime, or all dimensions explicit, full dims mode, in the network definition. When a network has been
+ *  created using createNetwork(), only implicit batch size mode is supported. The function hasImplicitBatchDimension()
+ *  is used to query the mode of the network.
+ * 
+ *  A network with implicit batch dimensions returns the dimensions of a layer without the implicit dimension,
+ *  and instead the batch is specified at execute/enqueue time. If the network has all dimensions specified, then
+ *  the first dimension follows elementwise broadcast rules: if it is 1 for some inputs and is some value N for all
+ *  other inputs, then the first dimension of each outut is N, and the inputs with 1 for the first dimension are
+ *  broadcast. Having divergent batch sizes across inputs to a layer is not supported.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class INetworkDefinition extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public INetworkDefinition() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public INetworkDefinition(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public INetworkDefinition(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public INetworkDefinition position(long position) {
+        return (INetworkDefinition)super.position(position);
+    }
+    @Override public INetworkDefinition getPointer(long i) {
+        return new INetworkDefinition((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Add an input tensor to the network.
+     * 
+     *  The name of the input tensor is used to find the index into the buffer array for an engine built from
+     *  the network. The volume of the dimensions must be less than 2^30 elements.
+     * 
+     *  For networks with an implicit batch dimension, this volume includes the batch dimension with its length set
+     *  to the maximum batch size. For networks with all explicit dimensions and with wildcard dimensions, the volume
+     *  is based on the maxima specified by an IOptimizationProfile.Dimensions are normally non-negative integers. The
+     *  exception is that in networks with all explicit dimensions, -1 can be used as a wildcard for a dimension to
+     *  be specified at runtime. Input tensors with such a wildcard must have a corresponding entry in the
+     *  IOptimizationProfiles indicating the permitted extrema, and the input dimensions must be set by
+     *  IExecutionContext::setBindingDimensions. Different IExecutionContext instances can have different dimensions.
+     *  Wildcard dimensions are only supported for EngineCapability::kSTANDARD. They are not
+     *  supported in safety contexts. DLA does not support Wildcard dimensions.
+     * 
+     *  Tensor dimensions are specified independent of format.  For example, if a
+     *  tensor is formatted in "NHWC" or a vectorized format, the dimensions are
+     *  still specified in the order{N, C, H, W}. For 2D images with a channel
+     *  dimension, the last three dimensions are always {C,H,W}. For 3D images
+     *  with a channel dimension, the last four dimensions are always {C,D,H,W}.
+     * 
+     *  @param name The name of the tensor.
+     *  @param type The type of the data held in the tensor.
+     *  @param dimensions The dimensions of the tensor.
+     * 
+     *  \warning It is an error to specify a wildcard value on a dimension that is determined by trained parameters.
+     * 
+     *  \warning If run on DLA with explicit dimensions, only leading dimension can be a wildcard. And provided profile
+     *  must have same minimum, optimum, and maximum dimensions.
+     * 
+     *  @see ITensor
+     * 
+     *  @return The new tensor or nullptr if there is an error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITensor addInput(String name, DataType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @NoException(true) ITensor addInput(@Cast("const char*") BytePointer name, @Cast("nvinfer1::DataType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Mark a tensor as a network output.
+     * 
+     *  @param tensor The tensor to mark as an output tensor.
+     * 
+     *  \warning It is an error to mark a network input as an output.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void markOutput(@ByRef ITensor tensor);
+
+    /**
+     *  \brief Add a convolution layer to the network.
+     * 
+     *  @param input The input tensor to the convolution.
+     *  @param nbOutputMaps The number of output feature maps for the convolution.
+     *  @param kernelSize The HW-dimensions of the convolution kernel.
+     *  @param kernelWeights The kernel weights for the convolution.
+     *  @param biasWeights The optional bias weights for the convolution.
+     * 
+     *  @see IConvolutionLayer
+     * 
+     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new convolution layer, or nullptr if it could not be created.
+     * 
+     *  @deprecated Superseded by addConvolutionNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) IConvolutionLayer addConvolution(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+
+    /**
+     *  \brief Add a fully connected layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param nbOutputs The number of outputs of the layer.
+     *  @param kernelWeights The kernel weights for the fully connected layer.
+     *  @param biasWeights The optional bias weights for the fully connected layer.
+     * 
+     *  @see IFullyConnectedLayer
+     * 
+     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new fully connected layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IFullyConnectedLayer addFullyConnected(
+            @ByRef ITensor input, int nbOutputs, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+
+    /**
+     *  \brief Add an activation layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param type The type of activation function to apply.
+     * 
+     *  Note that the setAlpha() and setBeta() methods must be used on the
+     *  output for activations that require these parameters.
+     * 
+     *  @see IActivationLayer ActivationType
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new activation layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, ActivationType type);
+    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, @Cast("nvinfer1::ActivationType") int type);
+
+    /**
+     *  \brief Add a pooling layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param type The type of pooling to apply.
+     *  @param windowSize The size of the pooling window.
+     * 
+     *  @see IPoolingLayer PoolingType
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new pooling layer, or nullptr if it could not be created.
+     * 
+     *  @deprecated Superseded by addPoolingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, PoolingType type, @ByVal DimsHW windowSize);
+    public native @Deprecated @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal DimsHW windowSize);
+
+    /**
+     *  \brief Add a LRN layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param window The size of the window.
+     *  @param alpha The alpha value for the LRN computation.
+     *  @param beta The beta value for the LRN computation.
+     *  @param k The k value for the LRN computation.
+     * 
+     *  @see ILRNLayer
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new LRN layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ILRNLayer addLRN(@ByRef ITensor input, int window, float alpha, float beta, float k);
+
+    /**
+     *  \brief Add a Scale layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *               This tensor is required to have a minimum of 3 dimensions in implicit batch mode
+     *               and a minimum of 4 dimensions in explicit batch mode.
+     *  @param mode The scaling mode.
+     *  @param shift The shift value.
+     *  @param scale The scale value.
+     *  @param power The power value.
+     * 
+     *  If the weights are available, then the size of weights are dependent on the ScaleMode.
+     *  For ::kUNIFORM, the number of weights equals 1.
+     *  For ::kCHANNEL, the number of weights equals the channel dimension.
+     *  For ::kELEMENTWISE, the number of weights equals the product of the last three dimensions of the input.
+     * 
+     *  @see addScaleNd
+     *  @see IScaleLayer
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new Scale layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
+    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
+
+    /**
+     *  \brief Add a SoftMax layer to the network.
+     * 
+     *  @see ISoftMaxLayer
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new SoftMax layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ISoftMaxLayer addSoftMax(@ByRef ITensor input);
+
+    /**
+     *  \brief Add a concatenation layer to the network.
+     * 
+     *  @param inputs The input tensors to the layer.
+     *  @param nbInputs The number of input tensors.
+     * 
+     *  @see IConcatenationLayer
+     * 
+     *  @return The new concatenation layer, or nullptr if it could not be created.
+     * 
+     *  \warning All tensors must have the same dimensions except along the concatenation axis.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IConcatenationLayer addConcatenation(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs);
+    public native @NoException(true) IConcatenationLayer addConcatenation(@ByPtrPtr ITensor inputs, int nbInputs);
+
+    /**
+     *  \brief Add a deconvolution layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param nbOutputMaps The number of output feature maps.
+     *  @param kernelSize The HW-dimensions of the deconvolution kernel.
+     *  @param kernelWeights The kernel weights for the deconvolution.
+     *  @param biasWeights The optional bias weights for the deconvolution.
+     * 
+     *  @see IDeconvolutionLayer
+     * 
+     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new deconvolution layer, or nullptr if it could not be created.
+     * 
+     *  @deprecated Superseded by addDeconvolutionNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) IDeconvolutionLayer addDeconvolution(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+
+    /**
+     *  \brief Add an elementwise layer to the network.
+     * 
+     *  @param input1 The first input tensor to the layer.
+     *  @param input2 The second input tensor to the layer.
+     *  @param op The binary operation that the layer applies.
+     * 
+     *  The input tensors must have the same number of dimensions.
+     *  For each dimension, their lengths must match, or one of them must be one.
+     *  In the latter case, the tensor is broadcast along that axis.
+     * 
+     *  The output tensor has the same number of dimensions as the inputs.
+     *  For each dimension, its length is the maximum of the lengths of the
+     *  corresponding input dimension.
+     * 
+     *  @see IElementWiseLayer
+     *  \warning For shape tensors, ElementWiseOperation::kPOW is not a valid op.
+     * 
+     *  @return The new elementwise layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, ElementWiseOperation op);
+    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, @Cast("nvinfer1::ElementWiseOperation") int op);
+
+    /**
+     *  \brief Add a unary layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param operation The operation to apply.
+     * 
+     *  @see IUnaryLayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  \warning Shape tensors are not supported as outputs.
+     * 
+     *  @return The new unary layer, or nullptr if it could not be created
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, UnaryOperation operation);
+    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, @Cast("nvinfer1::UnaryOperation") int operation);
+
+    /** \brief Add a padding layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param prePadding The padding to apply to the start of the tensor.
+     *  @param postPadding The padding to apply to the end of the tensor.
+     * 
+     *  @see IPaddingLayer
+     * 
+     *  @return The new padding layer, or nullptr if it could not be created.
+     * 
+     *  @deprecated Superseded by addPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) IPaddingLayer addPadding(@ByRef ITensor input, @ByVal DimsHW prePadding, @ByVal DimsHW postPadding);
+
+    /**
+     *  \brief Add a shuffle layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     * 
+     *  @see IShuffleLayer
+     * 
+     *  @return The new shuffle layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IShuffleLayer addShuffle(@ByRef ITensor input);
+
+    /**
+     *  \brief Get the number of layers in the network.
+     * 
+     *  @return The number of layers in the network.
+     * 
+     *  @see getLayer()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbLayers();
+
+    /**
+     *  \brief Get the layer specified by the given index.
+     * 
+     *  @param index The index of the layer.
+     * 
+     *  @return The layer, or nullptr if the index is out of range.
+     * 
+     *  @see getNbLayers()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ILayer getLayer(int index);
+
+    /**
+     *  \brief Get the number of inputs in the network.
+     * 
+     *  @return The number of inputs in the network.
+     * 
+     *  @see getInput()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbInputs();
+
+    /**
+     *  \brief Get the input tensor specified by the given index.
+     * 
+     *  @param index The index of the input tensor.
+     * 
+     *  @return The input tensor, or nullptr if the index is out of range.
+     * 
+     *  \note adding inputs invalidates indexing here
+     * 
+     *  @see getNbInputs()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITensor getInput(int index);
+
+    /**
+     *  \brief Get the number of outputs in the network.
+     * 
+     *  The outputs include those marked by markOutput or markOutputForShapes.
+     * 
+     *  @return The number of outputs in the network.
+     * 
+     *  @see getOutput()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputs();
+
+    /**
+     *  \brief Get the output tensor specified by the given index.
+     * 
+     *  @param index The index of the output tensor.
+     * 
+     *  @return The output tensor, or nullptr if the index is out of range.
+     * 
+     *  \note adding inputs invalidates indexing here
+     * 
+     *  @see getNbOutputs()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITensor getOutput(int index);
+
+    /**
+     *  \brief Destroy this INetworkDefinition object.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Add a reduce layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param operation The reduction operation to perform.
+     *  @param reduceAxes The reduction dimensions.
+     *         The bit in position i of bitmask reduceAxes corresponds to explicit dimension i if result.
+     *         E.g., the least significant bit corresponds to the first explicit dimension and the next to least
+     *         significant bit corresponds to the second explicit dimension.
+     * 
+     *  @param keepDimensions The boolean that specifies whether or not to keep the reduced dimensions in the
+     *  output of the layer.
+     * 
+     *  The reduce layer works by performing an operation specified by \p operation to reduce the tensor \p input across
+     *  the
+     *  axes specified by \p reduceAxes.
+     * 
+     *  @see IReduceLayer
+     * 
+     *  \warning If output is a shape tensor, ReduceOperation::kAVG is unsupported.
+     * 
+     *  @return The new reduce layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IReduceLayer addReduce(
+            @ByRef ITensor input, ReduceOperation operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
+    public native @NoException(true) IReduceLayer addReduce(
+            @ByRef ITensor input, @Cast("nvinfer1::ReduceOperation") int operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
+
+    /**
+     *  \brief Add a TopK layer to the network.
+     * 
+     *  The TopK layer has two outputs of the same dimensions. The first contains data values,
+     *  the second contains index positions for the values. Output values are sorted, largest first
+     *  for operation kMAX and smallest first for operation kMIN.
+     * 
+     *  Currently only values of K up to 1024 are supported.
+     * 
+     *  @param input The input tensor to the layer.
+     * 
+     *  @param op Operation to perform.
+     * 
+     *  @param k Number of elements to keep.
+     * 
+     *  @param reduceAxes The reduction dimensions.
+     *         The bit in position i of bitmask reduceAxes corresponds to explicit dimension i of the result.
+     *         E.g., the least significant bit corresponds to the first explicit dimension and the next to least
+     *         significant bit corresponds to the second explicit dimension.
+     * 
+     *         Currently reduceAxes must specify exactly one dimension, and it must be one of the last four dimensions.
+     * 
+     *  @see ITopKLayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new TopK layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, TopKOperation op, int k, @Cast("uint32_t") int reduceAxes);
+    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, @Cast("nvinfer1::TopKOperation") int op, int k, @Cast("uint32_t") int reduceAxes);
+
+    /**
+     *  \brief Add a gather layer to the network.
+     * 
+     *  @param data The tensor to gather values from.
+     *  @param indices The tensor to get indices from to populate the output tensor.
+     *  @param axis The axis in the data tensor to gather on.
+     * 
+     *  @see IGatherLayer
+     * 
+     *  @return The new gather layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IGatherLayer addGather(@ByRef ITensor data, @ByRef ITensor indices, int axis);
+
+    /**
+     *  \brief Add a RaggedSoftMax layer to the network.
+     * 
+     *  @param input The ZxS input tensor.
+     *  @param bounds The Zx1 bounds tensor.
+     * 
+     *  @see IRaggedSoftMaxLayer
+     * 
+     *  \warning The bounds tensor cannot have the last dimension be the wildcard character.
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new RaggedSoftMax layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IRaggedSoftMaxLayer addRaggedSoftMax(@ByRef ITensor input, @ByRef ITensor bounds);
+
+    /**
+     *  \brief Add a MatrixMultiply layer to the network.
+     * 
+     *  @param input0 The first input tensor (commonly A).
+     *  @param op0 The operation to apply to input0.
+     *  @param input1 The second input tensor (commonly B).
+     *  @param op1 The operation to apply to input1.
+     * 
+     *  @see IMatrixMultiplyLayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new matrix multiply layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
+            @ByRef ITensor input0, MatrixOperation op0, @ByRef ITensor input1, MatrixOperation op1);
+    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
+            @ByRef ITensor input0, @Cast("nvinfer1::MatrixOperation") int op0, @ByRef ITensor input1, @Cast("nvinfer1::MatrixOperation") int op1);
+
+    /**
+     *  \brief Add a constant layer to the network.
+     * 
+     *  @param dimensions The dimensions of the constant.
+     *  @param weights The constant value, represented as weights.
+     * 
+     *  @see IConstantLayer
+     * 
+     *  @return The new constant layer, or nullptr if it could not be created.
+     * 
+     *  If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
+     *  Otherwise the output is a tensor of real values and the output type will be
+     *  follow TensorRT's normal precision rules.
+     * 
+     *  If tensors in the network have an implicit batch dimension, the constant
+     *  is broadcast over that dimension.
+     * 
+     *  If a wildcard dimension is used, the volume of the runtime dimensions must equal
+     *  the number of weights specified.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IConstantLayer addConstant(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @ByVal Weights weights);
+
+    /**
+     *  \brief Add an \p layerCount deep RNN layer to the network with \p hiddenSize internal states that can
+     *  take a batch with fixed or variable sequence lengths.
+     * 
+     *  @param input The input tensor to the layer (see below).
+     *  @param layerCount The number of layers in the RNN.
+     *  @param hiddenSize Size of the internal hidden state for each layer.
+     *  @param maxSeqLen Maximum sequence length for the input.
+     *  @param op The type of RNN to execute.
+     * 
+     *  By default, the layer is configured with RNNDirection::kUNIDIRECTION and RNNInputMode::kLINEAR.
+     *  To change these settings, use IRNNv2Layer::setDirection() and IRNNv2Layer::setInputMode().
+     * 
+     *  %Weights and biases for the added layer should be set using
+     *  IRNNv2Layer::setWeightsForGate() and IRNNv2Layer::setBiasForGate() prior
+     *  to building an engine using this network.
+     * 
+     *  The input tensors must be of the type DataType::kFLOAT or DataType::kHALF.
+     *  The layout of the weights is row major and must be the same datatype as the input tensor.
+     *  \p weights contain 8 matrices and \p bias contains 8 vectors.
+     * 
+     *  See IRNNv2Layer::setWeightsForGate() and IRNNv2Layer::setBiasForGate() for details on the required input
+     *  format for \p weights and \p bias.
+     * 
+     *  The \p input ITensor should contain zero or more index dimensions {@code {N1, ..., Np}}, followed by
+     *  two dimensions, defined as follows:
+     *    - {@code S_max} is the maximum allowed sequence length (number of RNN iterations)
+     *    - {@code E} specifies the embedding length (unless ::kSKIP is set, in which case it should match
+     *      getHiddenSize()).
+     * 
+     *  By default, all sequences in the input are assumed to be size \p maxSeqLen.  To provide explicit sequence
+     *  lengths for each input sequence in the batch, use IRNNv2Layer::setSequenceLengths().
+     * 
+     *  The RNN layer outputs up to three tensors.
+     * 
+     *  The first output tensor is the output of the final RNN layer across all timesteps, with dimensions
+     *  {@code {N1, ..., Np, S_max, H}}:
+     * 
+     *    - {@code N1..Np} are the index dimensions specified by the input tensor
+     *    - {@code S_max} is the maximum allowed sequence length (number of RNN iterations)
+     *    - {@code H} is an output hidden state (equal to getHiddenSize() or 2x getHiddenSize())
+     * 
+     *  The second tensor is the final hidden state of the RNN across all layers, and if the RNN
+     *  is an LSTM (i.e. getOperation() is ::kLSTM), then the third tensor is the final cell state
+     *  of the RNN across all layers.  Both the second and third output tensors have dimensions
+     *  {@code {N1, ..., Np, L, H}}:
+     * 
+     *   - {@code N1..Np} are the index dimensions specified by the input tensor
+     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
+     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
+     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
+     *      final backward hidden state is stored in {@code L= 2*l + 1}.
+     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
+     * 
+     *  @see IRNNv2Layer
+     * 
+     *  @deprecated Superseded by INetworkDefinition::addLoop and will be removed in TensorRT 9.0.
+     * 
+     *  \warning RNN inputs do not support wildcard dimensions or explicit batch size networks.
+     *  \warning Int32 tensors are not valid input tensors, only for sequence lengths.
+     * 
+     *  @return The new RNN layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) IRNNv2Layer addRNNv2(
+            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, RNNOperation op);
+    public native @Deprecated @NoException(true) IRNNv2Layer addRNNv2(
+            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, @Cast("nvinfer1::RNNOperation") int op);
+
+    /**
+     *  \brief Add an identity layer.
+     * 
+     *  @param input The input tensor to the layer.
+     * 
+     *  @see IIdentityLayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new identity layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IIdentityLayer addIdentity(@ByRef ITensor input);
+
+    /**
+     *  \brief remove a tensor from the network definition.
+     * 
+     *  @param tensor the tensor to remove
+     * 
+     *  It is illegal to remove a tensor that is the input or output of a layer.
+     *  if this method is called with such a tensor, a warning will be emitted on the log
+     *  and the call will be ignored. Its intended use is to remove detached tensors after
+     *  e.g. concatenating two networks with Layer::setInput().
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void removeTensor(@ByRef ITensor tensor);
+
+    /**
+     *  \brief unmark a tensor as a network output.
+     * 
+     *  @param tensor The tensor to unmark as an output tensor.
+     * 
+     *  see markOutput()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void unmarkOutput(@ByRef ITensor tensor);
+
+    /**
+     *  \brief Add a plugin layer to the network using the IPluginV2 interface.
+     * 
+     *  @param inputs The input tensors to the layer.
+     *  @param nbInputs The number of input tensors.
+     *  @param plugin The layer plugin.
+     * 
+     *  @see IPluginV2Layer
+     * 
+     *  \warning Dimension wildcard are only supported with IPluginV2DynamicExt or IPluginV2IOExt plugins.
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new plugin layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IPluginV2Layer addPluginV2(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs, @ByRef IPluginV2 plugin);
+    public native @NoException(true) IPluginV2Layer addPluginV2(@ByPtrPtr ITensor inputs, int nbInputs, @ByRef IPluginV2 plugin);
+
+    /**
+     *  \brief Add a slice layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param start The start offset
+     *  @param size The output dimension
+     *  @param stride The slicing stride
+     * 
+     *  Positive, negative, zero stride values, and combinations of them in different dimensions are allowed.
+     * 
+     *  @see ISliceLayer
+     * 
+     *  @return The new slice layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ISliceLayer addSlice(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 start, @ByVal @Cast("nvinfer1::Dims*") Dims32 size, @ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+
+    /**
+     *  \brief Sets the name of the network.
+     * 
+     *  @param name The name to assign to this network.
+     * 
+     *  Set the name of the network so that it can be associated with a built
+     *  engine. The \p name must be a zero delimited C-style string of length
+     *  no greater than 128 characters. TensorRT makes no use of this string
+     *  except storing it as part of the engine so that it may be retrieved at
+     *  runtime. A name unique to the builder will be generated by default.
+     * 
+     *  This method copies the name string.
+     * 
+     *  @see INetworkDefinition::getName(), ISafeCudaEngine::getName()
+     * 
+     *  @return none
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Returns the name associated with the network.
+     * 
+     *  The memory pointed to by getName() is owned by the INetworkDefinition object.
+     * 
+     *  @see INetworkDefinition::setName()
+     * 
+     *  @return A zero delimited C-style string representing the name of the network.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Add a shape layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     * 
+     *  @see IShapeLayer
+     * 
+     *  \warning addShape is only supported when hasImplicitBatchDimensions is false.
+     * 
+     *  \warning input to addShape cannot contain wildcard dimension values.
+     * 
+     *  @return The new shape layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IShapeLayer addShape(@ByRef ITensor input);
+
+    /**
+     *  \brief Query whether the network was created with an implicit batch dimension.
+     * 
+     *  @return True if tensors have implicit batch dimension, false otherwise.
+     * 
+     *  This is a network-wide property.  Either all tensors in the network
+     *  have an implicit batch dimension or none of them do.
+     * 
+     *  hasImplicitBatchDimension() is true if and only if this INetworkDefinition
+     *  was created with createNetwork() or createNetworkV2() without
+     *  NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
+     * 
+     *  @see createNetworkV2
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
+
+    /**
+     *  \brief Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
+     * 
+     *  @return True if successful, false if tensor is already marked as an output.
+     * 
+     *  The tensor must be of type DataType::kINT32 and have no more than one dimension.
+     * 
+     *  \warning The tensor must have dimensions that can be determined to be constants at build time.
+     * 
+     *  \warning It is an error to mark a network input as a shape output.
+     * 
+     *  @see isShapeBinding(), getShapeBinding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean markOutputForShapes(@ByRef ITensor tensor);
+
+    /**
+     *  \brief Undo markOutputForShapes.
+     * 
+     *  \warning inputs to addShape cannot contain wildcard dimension values.
+     * 
+     *  @return True if successful, false if tensor is not marked as an output.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean unmarkOutputForShapes(@ByRef ITensor tensor);
+
+    /**
+     *  \brief Add a parametric ReLU layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param slope The slope tensor to the layer. This tensor should be unidirectionally broadcastable
+     *         to the input tensor.
+     * 
+     *  @see IParametricReLULayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new parametric ReLU layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IParametricReLULayer addParametricReLU(@ByRef ITensor input, @ByRef ITensor slope);
+
+    /**
+     *  \brief Add a multi-dimension convolution layer to the network.
+     * 
+     *  @param input The input tensor to the convolution.
+     *  @param nbOutputMaps The number of output feature maps for the convolution.
+     *  @param kernelSize The multi-dimensions of the convolution kernel.
+     *  @param kernelWeights The kernel weights for the convolution.
+     *  @param biasWeights The optional bias weights for the convolution.
+     * 
+     *  @see IConvolutionLayer
+     * 
+     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+     *  \warning Int32 tensors are not valid input tensors.
+     *  \warning Only 2D or 3D convolution is supported.
+     * 
+     *  @return The new convolution layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IConvolutionLayer addConvolutionNd(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+
+    /**
+     *  \brief Add a multi-dimension pooling layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param type The type of pooling to apply.
+     *  @param windowSize The size of the pooling window.
+     * 
+     *  @see IPoolingLayer PoolingType
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     *  \warning Only 2D or 3D pooling is supported.
+     * 
+     *  @return The new pooling layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, PoolingType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+
+    /**
+     *  \brief Add a multi-dimension deconvolution layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param nbOutputMaps The number of output feature maps.
+     *  @param kernelSize The multi-dimensions of the deconvolution kernel.
+     *  @param kernelWeights The kernel weights for the deconvolution.
+     *  @param biasWeights The optional bias weights for the deconvolution.
+     * 
+     *  @see IDeconvolutionLayer
+     * 
+     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
+     *  \warning Int32 tensors are not valid input tensors.
+     *  \warning Only 2D or 3D deconvolution is supported. */
+    //
+    /** @return The new deconvolution layer, or nullptr if it could not be created.
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IDeconvolutionLayer addDeconvolutionNd(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+
+    /**
+     *  \brief Add a multi-dimension scale layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param mode The scaling mode.
+     *  @param shift The shift value.
+     *  @param scale The scale value.
+     *  @param power The power value.
+     *  @param channelAxis The channel axis.
+     * 
+     *  If the weights are available, then the size of weights are dependent on the ScaleMode.
+     *  For ::kUNIFORM, the number of weights equals 1.
+     *  For ::kCHANNEL, the number of weights equals the channel dimension.
+     *  For ::kELEMENTWISE, the number of weights equals the product of all input dimensions at channelAxis and beyond.
+     * 
+     *  For example, if the inputs dimensions are [A,B,C,D,E,F], and channelAxis=2:
+     *  For ::kUNIFORM, the number of weights is equal to 1.
+     *  For ::kCHANNEL, the number of weights is C.
+     *  For ::kELEMENTWISE, the number of weights is C*D*E*F.
+     * 
+     *  channelAxis can also be set explicitly using setChannelAxis().
+     * 
+     *  @see IScaleLayer
+     *  @see setChannelAxis()
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     *  \warning Only 2D or 3D scale is supported.
+     * 
+     *  @return The new Scale layer, or nullptr if it could not be created.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IScaleLayer addScaleNd(
+            @ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
+    public native @NoException(true) IScaleLayer addScaleNd(
+            @ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
+
+    /** \brief Add a resize layer to the network.
+     * 
+     *  @param input The input tensor to the layer.
+     * 
+     *  @see IResizeLayer
+     * 
+     *  \warning Int32 tensors are not valid input tensors.
+     * 
+     *  @return The new resize layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IResizeLayer addResize(@ByRef ITensor input);
+
+    /**
+     *  \brief True if network is an explicit precision network
+     * 
+     *  @deprecated Will be removed in TensorRT 10.0.
+     * 
+     *  hasExplicitPrecision() is true if and only if this INetworkDefinition
+     *  was created with createNetworkV2() with NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION set.
+     * 
+     *  @see createNetworkV2
+     * 
+     *  @return True if network has explicit precision, false otherwise.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @Deprecated @NoException(true) boolean hasExplicitPrecision();
+
+    /**
+     *  \brief Add a loop to the network.
+     * 
+     *  An ILoop provides a way to specify a recurrent subgraph.
+     * 
+     *  @return Pointer to ILoop that can be used to add loop boundary layers for the loop,
+     *          or nullptr if network has an implicit batch dimension or this version
+     *          of TensorRT does not support loops.
+     * 
+     *  The network must not have an implicit batch dimension.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ILoop addLoop();
+
+    /** \brief Add a select layer to the network.
+     * 
+     *  @param condition The condition tensor to the layer. Must have type DataType::kBOOL.
+     *  @param thenInput The "then" input tensor to the layer.
+     *  @param elseInput The "else" input tensor to the layer.
+     * 
+     *  All three input tensors must have the same number of dimensions, and along each axis
+     *  must have the same length or a length of one. If the length is one, the tensor
+     *  is broadcast along that axis. The output tensor has the dimensions of the inputs AFTER
+     *  the broadcast rule is applied. For example, given:
+     * 
+     *     dimensions of condition:  [1,1,5,9]
+     *     dimensions of thenInput:  [1,1,5,9]
+     *     dimensions of elseInput:  [1,3,1,9]
+     * 
+     *  the output dimensions are [1,3,5,9], and the output contents are defined by:
+     * 
+     *       output[0,i,j,k] = condition[0,0,j,k] ? thenInput[0,0,j,k] : elseInput[0,i,0,k]
+     * 
+     *  The output dimensions are not necessarily the max of the input dimensions if any input
+     *  is an empty tensor. For example, if in the preceding example, 5 is changed to 0:
+     * 
+     *     dimensions of condition:  [1,1,0,9]
+     *     dimensions of thenInput:  [1,1,0,9]
+     *     dimensions of elseInput:  [1,3,1,9]
+     * 
+     *  then the output dimensions are [1,3,0,9].
+     * 
+     *  The network must not have an implicit batch dimension.
+     * 
+     *  @see ISelectLayer
+     * 
+     *  @return The new select layer, or nullptr if it could not be created. */
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ISelectLayer addSelect(@ByRef ITensor condition, @ByRef ITensor thenInput, @ByRef ITensor elseInput);
+
+    /** \brief Add a fill layer to the network.
+     * 
+     *  @param dimensions The output tensor dimensions.
+     *  @param op The fill operation that the layer applies.
+     * 
+     *  \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1.
+     * 
+     *  The network must not have an implicit batch dimension.
+     * 
+     *  @see IFillLayer
+     * 
+     *  @return The new fill layer, or nullptr if it could not be created.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, FillOperation op);
+    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @Cast("nvinfer1::FillOperation") int op);
+
+    /** \brief Add a padding layer to the network. Only 2D padding is currently supported.
+     * 
+     *  @param input The input tensor to the layer.
+     *  @param prePadding The padding to apply to the start of the tensor.
+     *  @param postPadding The padding to apply to the end of the tensor.
+     * 
+     *  @see IPaddingLayer
+     * 
+     *  @return The new padding layer, or nullptr if it could not be created.
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) IPaddingLayer addPaddingNd(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 prePadding, @ByVal @Cast("nvinfer1::Dims*") Dims32 postPadding);
+
+    /** \brief Associate a name with all current uses of the given weights.
+     * 
+     *  The name must be set after the Weights are used in the network.
+     *  Lookup is associative. The name applies to all Weights with matching
+     *  type, value pointer, and count. If Weights with a matching value
+     *  pointer, but different type or count exists in the network, an
+     *  error message is issued, the name is rejected, and return false.
+     *  If the name has already been used for other weights,
+     *  return false. A nullptr causes the weights to become unnamed,
+     *  i.e. clears any previous name.
+     * 
+     *  @param weights The weights to be named.
+     *  @param name The name to associate with the weights.
+     * 
+     *  @return true on success. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, String name);
+    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, @Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class.
+     *  A nullptr will be returned if setErrorRecorder has not been called.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Add a dequantization layer to the network.
+     * 
+     *  @param input The input tensor to be quantized.
+     *  @param scale A tensor with the scale value.
+     * 
+     *  @see IDequantizeLayer
+     * 
+     *  \p input tensor data type must be DataType::kFLOAT.
+     *  \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
+     *  be a build-time constant.
+     * 
+     *  @return The new quantization layer, or nullptr if it could not be created.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IDequantizeLayer addDequantize(@ByRef ITensor input, @ByRef ITensor scale);
+
+    /**
+     *  \brief Add a quantization layer to the network.
+     * 
+     *  @param input The input tensor to be quantized.
+     *  @param scale A tensor with the scale value.
+     * 
+     *  @see IQuantizeLayer
+     * 
+     *  \p input tensor data type must be DataType::kFLOAT.
+     *  \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
+     *  be a build-time constant.
+     * 
+     *  @return The new quantization layer, or nullptr if it could not be created.
+     *  */
+    public native @NoException(true) IQuantizeLayer addQuantize(@ByRef ITensor input, @ByRef ITensor scale);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
new file mode 100644
index 00000000000..2580639b841
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
@@ -0,0 +1,37 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class INoCopy
+ * 
+ *  \brief Base class for all TensorRT interfaces that are implemented by the TensorRT libraries
+ * 
+ *  Objects of such classes are not movable or copyable, and should only be manipulated
+ *  via pointers.
+ *  */
+
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class INoCopy extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public INoCopy(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
new file mode 100644
index 00000000000..c76e084040c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
@@ -0,0 +1,231 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IOptimizationProfile
+ *  \brief Optimization profile for dynamic input dimensions and shape tensors.
+ * 
+ *  When building an ICudaEngine from an INetworkDefinition that has dynamically resizable inputs (at least
+ *  one input tensor has one or more of its dimensions specified as -1) or shape input tensors, users need to specify
+ *  at least one optimization profile. Optimization profiles are numbered 0, 1, ...
+ *  The first optimization profile that has been defined (with index 0) will be used by the ICudaEngine whenever no
+ *  optimization profile has been selected explicitly. If none of the inputs are dynamic, the default optimization
+ *  profile will be generated automatically unless it is explicitly provided by the user (this is possible but not
+ *  required in this case). If more than a single optimization profile is defined, users may set a target how
+ *  much additional weight space should be maximally allocated to each additional profile (as a fraction of the
+ *  maximum, unconstrained memory).
+ * 
+ *  Users set optimum input tensor dimensions, as well as minimum and maximum input tensor dimensions. The builder
+ *  selects the kernels that result in the lowest runtime for the optimum input tensor dimensions, and are valid for
+ *  all input tensor sizes in the valid range between minimum and maximum dimensions. A runtime error will be raised
+ *  if the input tensor dimensions fall outside the valid range for this profile. Likewise, users provide minimum,
+ *  optimum, and maximum values for all shape tensor input values.
+ * 
+ *  @see IBuilderConfig::addOptimizationProfile()
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IOptimizationProfile extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IOptimizationProfile(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
+     * 
+     *  This function must be called three times (for the minimum, optimum, and maximum) for any network input tensor
+     *  that has dynamic dimensions. If minDims, optDims, and maxDims are the minimum, optimum, and maximum dimensions,
+     *  and networkDims are the dimensions for this input tensor that are provided to the INetworkDefinition object,
+     *  then the following conditions must all hold:
+     * 
+     *  (1) minDims.nbDims == optDims.nbDims == maxDims.nbDims == networkDims.nbDims
+     *  (2) 0 <= minDims.d[i] <= optDims.d[i] <= maxDims.d[i] for i = 0, ..., networkDims.nbDims-1
+     *  (3) if networkDims.d[i] != -1, then minDims.d[i] == optDims.d[i] == maxDims.d[i] == networkDims.d[i]
+     * 
+     *  This function may (but need not be) called for an input tensor that does not have dynamic dimensions. In this
+     *  case, the third argument must always equal networkDims.
+     * 
+     *  @param inputName The input tensor name
+     *  @param select Whether to set the minimum, optimum, or maximum dimensions
+     *  @param dims The minimum, optimum, or maximum dimensions for this input tensor
+     * 
+     *  @return false if an inconsistency was detected (e.g. the rank does not match another dimension that was
+     *          previously set for the same input), true if no inconsistency was detected. Note that inputs can be
+     *          validated only partially; a full validation is performed at engine build time.
+     * 
+     *  \warning If run on DLA, minimum, optimum, and maximum dimensions must to be the same.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setDimensions(String inputName, OptProfileSelector select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
+    public native @Cast("bool") @NoException(true) boolean setDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
+
+    /**
+     *  \brief Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
+     * 
+     *  If the dimensions have not been previously set via setDimensions(), return an invalid Dims with nbDims == -1.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(String inputName, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
+
+    /**
+     *  \brief Set the minimum / optimum / maximum values for an input shape tensor.
+     * 
+     *  This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
+     *  This implies that the datatype of t is DataType::kINT32, the rank is either 0 or 1, and the dimensions of t
+     *  are fixed at network definition time. This function must not be called for any input tensor that is not a
+     *  shape tensor.
+     * 
+     *  Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
+     *  if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
+     *  minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
+     *  i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
+     * 
+     *  Shape tensors are tensors that contribute to shape calculations in some way, and can contain
+     *  any int32_t values appropriate for the network. Examples:
+     * 
+     *  * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
+     *    The corresponding minVal[i] should be -1.
+     * 
+     *  * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
+     *    The values could be positive, negative, or zero.
+     * 
+     *  * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
+     *    contain any non-positive values that yield a valid slice operation.
+     * 
+     *  Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
+     * 
+     *  @param inputName The input tensor name
+     *  @param select Whether to set the minimum, optimum, or maximum input values.
+     *  @param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
+     *  @param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
+     * 
+     *  @return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
+     *          tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
+     *          time.
+     * 
+     *  \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const IntPointer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntBuffer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const int[] values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntPointer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const IntBuffer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const int[] values, int nbValues);
+
+    /**
+     *  \brief Get the number of values for an input shape tensor.
+     * 
+     *  This will return the number of shape values if setShapeValues() has been called before for this input tensor.
+     *  Otherwise, return -1.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbShapeValues(String inputName);
+    public native @NoException(true) int getNbShapeValues(@Cast("const char*") BytePointer inputName);
+
+    /**
+     *  \brief Get the minimum / optimum / maximum values for an input shape tensor.
+     * 
+     *  If the shape values have not been set previously with setShapeValues(), this returns nullptr.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Const @NoException(true) IntPointer getShapeValues(String inputName, OptProfileSelector select);
+    public native @Const @NoException(true) IntBuffer getShapeValues(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
+
+    /**
+     *  \brief Set a target for extra GPU memory that may be used by this profile.
+     * 
+     *  @param target Additional memory that the builder should aim to maximally allocate for this profile, as a
+     *         fraction of the memory it would use if the user did not impose any constraints on memory. This
+     *         unconstrained case is the default; it corresponds to target == 1.0. If target == 0.0, the builder
+     *         aims to create the new optimization profile without allocating any additional weight memory.
+     *         Valid inputs lie between 0.0 and 1.0. This parameter is only a hint, and TensorRT does not guarantee
+     *         that the target will be reached. This parameter is ignored for the first (default) optimization profile
+     *         that is defined.
+     * 
+     *  @return true if the input is in the valid range (between 0 and 1 inclusive), else false
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setExtraMemoryTarget(float target);
+
+    /**
+     *  \brief Get the extra memory target that has been defined for this profile.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) float getExtraMemoryTarget();
+
+    /**
+     *  \brief Check whether the optimization profile can be passed to an IBuilderConfig object.
+     * 
+     *  This function performs partial validation, by e.g. checking that whenever one of the minimum, optimum, or
+     *  maximum dimensions of a tensor have been set, the others have also been set and have the same rank, as
+     *  well as checking that the optimum dimensions are always as least as large as the minimum dimensions, and
+     *  that the maximum dimensions are at least as large as the optimum dimensions. Some validation steps require
+     *  knowledge of the network definition and are deferred to engine build time.
+     * 
+     *  @return true if the optimization profile is valid and may be passed to an IBuilderConfig, else false
+     *  */
+    public native @Cast("bool") @NoException(true) boolean isValid();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
new file mode 100644
index 00000000000..d98a844963b
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
@@ -0,0 +1,162 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IPaddingLayer
+ * 
+ *  \brief Layer that represents a padding operation.
+ * 
+ *  The padding layer adds zero-padding at the start and end of the input tensor. It only supports padding along the two
+ *  innermost dimensions. Applying negative padding results in cropping of the input.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPaddingLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPaddingLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the padding that is applied at the start of the tensor.
+     * 
+     *  Negative padding results in trimming the edge by the specified amount
+     * 
+     *  @see getPrePadding
+     * 
+     *  @deprecated Superseded by setPrePaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setPrePadding(@ByVal DimsHW padding);
+
+    /**
+     *  \brief Get the padding that is applied at the start of the tensor.
+     * 
+     *  @see setPrePadding
+     * 
+     *  @deprecated Superseded by getPrePaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getPrePadding();
+
+    /**
+     *  \brief Set the padding that is applied at the end of the tensor.
+     * 
+     *  Negative padding results in trimming the edge by the specified amount
+     * 
+     *  @see getPostPadding
+     * 
+     *  @deprecated Superseded by setPostPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setPostPadding(@ByVal DimsHW padding);
+
+    /**
+     *  \brief Get the padding that is applied at the end of the tensor.
+     * 
+     *  @see setPostPadding
+     * 
+     *  @deprecated Superseded by getPostPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getPostPadding();
+
+    /**
+     *  \brief Set the padding that is applied at the start of the tensor.
+     * 
+     *  Negative padding results in trimming the edge by the specified amount.
+     * 
+     *  \warning Only 2 dimensional padding is currently supported.
+     * 
+     *  @see getPrePaddingNd
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPrePaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the padding that is applied at the start of the tensor.
+     * 
+     *  \warning Only 2 dimensional padding is currently supported.
+     * 
+     *  @see setPrePaddingNd
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePaddingNd();
+
+    /**
+     *  \brief Set the padding that is applied at the end of the tensor.
+     * 
+     *  Negative padding results in trimming the edge by the specified amount
+     * 
+     *  \warning Only 2 dimensional padding is currently supported.
+     * 
+     *  @see getPostPaddingNd
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPostPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the padding that is applied at the end of the tensor.
+     * 
+     *  \warning Only 2 dimensional padding is currently supported.
+     * 
+     *  @see setPostPaddingNd
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPaddingNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
new file mode 100644
index 00000000000..8ecf2f593a9
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IParametricReLULayer
+ * 
+ *  \brief Layer that represents a parametric ReLU operation.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IParametricReLULayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IParametricReLULayer(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
new file mode 100644
index 00000000000..9371e3db003
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
@@ -0,0 +1,27 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPlugin extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public IPlugin() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPlugin(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
new file mode 100644
index 00000000000..5e639796f44
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
@@ -0,0 +1,111 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IPluginCreator
+ * 
+ *  \brief Plugin creator class for user implemented layers.
+ * 
+ *  @see IPlugin and IPluginFactory
+ *  */
+
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginCreator extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginCreator(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return the version of the API the plugin creator was compiled with.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getTensorRTVersion();
+
+    /**
+     *  \brief Return the plugin name.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) String getPluginName();
+
+    /**
+     *  \brief Return the plugin version.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) String getPluginVersion();
+
+    /**
+     *  \brief Return a list of fields that needs to be passed to createPlugin.
+     *  @see PluginFieldCollection
+     *  */
+    
+    
+    //!
+    //!
+    public native @Const @NoException(true) PluginFieldCollection getFieldNames();
+
+    /**
+     *  \brief Return a plugin object. Return nullptr in case of error.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) IPluginV2 createPlugin(String name, @Const PluginFieldCollection fc);
+    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer name, @Const PluginFieldCollection fc);
+
+    /**
+     *  \brief Called during deserialization of plugin layer. Return a plugin object.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) IPluginV2 deserializePlugin(String name, @Const Pointer serialData, @Cast("size_t") long serialLength);
+    public native @NoException(true) IPluginV2 deserializePlugin(@Cast("const char*") BytePointer name, @Const Pointer serialData, @Cast("size_t") long serialLength);
+
+    /**
+     *  \brief Set the namespace of the plugin creator based on the plugin
+     *  library it belongs to. This can be set while registering the plugin creator.
+     * 
+     *  @see IPluginRegistry::registerCreator()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setPluginNamespace(String pluginNamespace);
+    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer pluginNamespace);
+
+    /**
+     *  \brief Return the namespace of the plugin creator object.
+     *  */
+    public native @NoException(true) String getPluginNamespace();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
new file mode 100644
index 00000000000..1d1aac43b71
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
@@ -0,0 +1,27 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginExt extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public IPluginExt() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginExt(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
new file mode 100644
index 00000000000..ff7c9c0c161
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
@@ -0,0 +1,33 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+/** Forward declaration of IPluginFactory for use by other interfaces.
+<p>
+//!
+//!
+//!
+//! */
+@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginFactory extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public IPluginFactory() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginFactory(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
new file mode 100644
index 00000000000..3c1caa8ca54
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
@@ -0,0 +1,27 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginLayer extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public IPluginLayer() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginLayer(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
new file mode 100644
index 00000000000..ce49f026e24
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
@@ -0,0 +1,135 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IPluginRegistry
+ * 
+ *  \brief Single registration point for all plugins in an application. It is
+ *  used to find plugin implementations during engine deserialization.
+ *  Internally, the plugin registry is considered to be a singleton so all
+ *  plugins in an application are part of the same global registry.
+ *  Note that the plugin registry is only supported for plugins of type
+ *  IPluginV2 and should also have a corresponding IPluginCreator implementation.
+ * 
+ *  @see IPluginV2 and IPluginCreator
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ * 
+ *  \warning In the automotive safety context, be sure to call IPluginRegistry::setErrorRecorder() to register
+ *  an error recorder with the registry before using other methods in the registry.
+ *  */
+
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginRegistry extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginRegistry(Pointer p) { super(p); }
+
+    /**
+     *  \brief Register a plugin creator. Returns false if one with same type
+     *  is already registered.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean registerCreator(@ByRef IPluginCreator creator, String pluginNamespace);
+    public native @Cast("bool") @NoException(true) boolean registerCreator(@ByRef IPluginCreator creator, @Cast("const char*") BytePointer pluginNamespace);
+
+    /**
+     *  \brief Return all the registered plugin creators and the number of
+     *  registered plugin creators. Returns nullptr if none found.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("nvinfer1::IPluginCreator*const*") PointerPointer getPluginCreatorList(IntPointer numCreators);
+
+    /**
+     *  \brief Return plugin creator based on plugin name, version, and
+     *  namespace associated with plugin during network creation.
+     *  */
+    public native @NoException(true) IPluginCreator getPluginCreator(
+            String pluginName, String pluginVersion, String pluginNamespace/*=""*/);
+    public native @NoException(true) IPluginCreator getPluginCreator(
+            String pluginName, String pluginVersion);
+    public native @NoException(true) IPluginCreator getPluginCreator(
+            @Cast("const char*") BytePointer pluginName, @Cast("const char*") BytePointer pluginVersion, @Cast("const char*") BytePointer pluginNamespace/*=""*/);
+    public native @NoException(true) IPluginCreator getPluginCreator(
+            @Cast("const char*") BytePointer pluginName, @Cast("const char*") BytePointer pluginVersion);
+    
+    
+    
+    
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief Set the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A default error recorder does not exist,
+     *  so a nullptr will be returned if setErrorRecorder has not been called, or an ErrorRecorder has not been
+     *  inherited.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Deregister a previously registered plugin creator.
+     * 
+     *  Since there may be a desire to limit the number of plugins,
+     *  this function provides a mechanism for removing plugin creators registered in TensorRT.
+     *  The plugin creator that is specified by \p creator is removed from TensorRT and no longer tracked.
+     * 
+     *  @return True if the plugin creator was deregistered, false if it was not found in the registry or otherwise
+     *  could
+     *      not be deregistered.
+     *  */
+    public native @Cast("bool") @NoException(true) boolean deregisterCreator(@Const @ByRef IPluginCreator creator);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
new file mode 100644
index 00000000000..a4ea26eb3bd
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
@@ -0,0 +1,297 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IPluginV2
+ * 
+ *  \brief Plugin class for user-implemented layers.
+ * 
+ *  Plugins are a mechanism for applications to implement custom layers. When
+ *  combined with IPluginCreator it provides a mechanism to register plugins and
+ *  look up the Plugin Registry during de-serialization.
+ * 
+ *  @see IPluginCreator
+ *  @see IPluginRegistry
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginV2 extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginV2(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return the API version with which this plugin was built.
+     * 
+     *  Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with plugins.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getTensorRTVersion();
+
+    /**
+     *  \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
+     *  @see IPluginCreator::getPluginName()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) String getPluginType();
+
+    /**
+     *  \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
+     *  @see IPluginCreator::getPluginVersion()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getPluginVersion();
+
+    /**
+     *  \brief Get the number of outputs from the layer.
+     * 
+     *  @return The number of outputs.
+     * 
+     *  This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
+     *  prior to any call to initialize().
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbOutputs();
+
+    /**
+     *  \brief Get the dimension of an output tensor.
+     * 
+     *  @param index The index of the output tensor.
+     *  @param inputs The input tensors.
+     *  @param nbInputDims The number of input tensors.
+     * 
+     *  This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
+     *  prior to any call to initialize().
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions(int index, @Cast("const nvinfer1::Dims*") Dims32 inputs, int nbInputDims);
+
+    /**
+     *  \brief Check format support.
+     * 
+     *  @param type DataType requested.
+     *  @param format PluginFormat requested.
+     *  @return true if the plugin supports the type-format combination.
+     * 
+     *  This function is called by the implementations of INetworkDefinition, IBuilder, and
+     *  safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
+     *  engine.
+     * 
+     *  \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
+     *  will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
+     *  or PluginV2DynamicExt for other PluginFormats.
+     * 
+     *  \warning DataType:kBOOL not supported.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean supportsFormat(DataType type, @Cast("nvinfer1::PluginFormat") TensorFormat format);
+    public native @Cast("bool") @NoException(true) boolean supportsFormat(@Cast("nvinfer1::DataType") int type, @Cast("nvinfer1::PluginFormat") int format);
+
+    /**
+     *  \brief Configure the layer.
+     * 
+     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+     *  algorithm choices on the basis of its weights, dimensions, and maximum batch size.
+     * 
+     *  @param inputDims The input tensor dimensions.
+     *  @param nbInputs The number of inputs.
+     *  @param outputDims The output tensor dimensions.
+     *  @param nbOutputs The number of outputs.
+     *  @param type The data type selected for the engine.
+     *  @param format The format selected for the engine.
+     *  @param maxBatchSize The maximum batch size.
+     * 
+     *  The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
+     *  3-dimensional CHW dimensions).
+     * 
+     *  \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
+     *  will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
+     *  or PluginV2DynamicExt for other PluginFormats.
+     * 
+     *  \warning DataType:kBOOL not supported.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void configureWithFormat(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            DataType type, @Cast("nvinfer1::PluginFormat") TensorFormat format, int maxBatchSize);
+    public native @NoException(true) void configureWithFormat(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType") int type, @Cast("nvinfer1::PluginFormat") int format, int maxBatchSize);
+
+    /**
+     *  \brief Initialize the layer for execution. This is called when the engine is created.
+     * 
+     *  @return 0 for success, else non-zero (which will cause engine termination).
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int initialize();
+
+    /**
+     *  \brief Release resources acquired during plugin layer initialization. This is called when the engine is
+     *  destroyed.
+     *  @see initialize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void terminate();
+
+    /**
+     *  \brief Find the workspace size required by the layer.
+     * 
+     *  This function is called during engine startup, after initialize(). The workspace size returned should be
+     *  sufficient for any batch size up to the maximum.
+     * 
+     *  @return The workspace size.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("size_t") @NoException(true) long getWorkspaceSize(int maxBatchSize);
+
+    /**
+     *  \brief Execute the layer.
+     * 
+     *  @param batchSize The number of inputs in the batch.
+     *  @param inputs The memory for the input tensors.
+     *  @param outputs The memory for the output tensors.
+     *  @param workspace Workspace for execution.
+     *  @param stream The stream in which to execute the kernels.
+     * 
+     *  @return 0 for success, else non-zero (which will cause engine termination).
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int enqueue(int batchSize, @Cast("const void*const*") PointerPointer inputs, @Cast("void*const*") PointerPointer outputs, Pointer workspace,
+            CUstream_st stream);
+    public native @NoException(true) int enqueue(int batchSize, @Cast("const void*const*") @ByPtrPtr Pointer inputs, @Cast("void*const*") @ByPtrPtr Pointer outputs, Pointer workspace,
+            CUstream_st stream);
+
+    /**
+     *  \brief Find the size of the serialization buffer required.
+     * 
+     *  @return The size of the serialization buffer.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("size_t") @NoException(true) long getSerializationSize();
+
+    /**
+     *  \brief Serialize the layer.
+     * 
+     *  @param buffer A pointer to a buffer to serialize data. Size of buffer must be equal to value returned by
+     *  getSerializationSize.
+     * 
+     *  @see getSerializationSize()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void serialize(Pointer buffer);
+
+    /**
+     *  \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void destroy();
+
+    /**
+     *  \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
+     *  these parameters.
+     * 
+     *  The TensorRT runtime calls clone() to clone the plugin when an execution context is created for an engine,
+     *  after the engine has been created.  The runtime does not call initialize() on the cloned plugin,
+     *  so the cloned plugin should be created in an initialized state.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) IPluginV2 clone();
+
+    /**
+     *  \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
+     *  objects from the same plugin library should have the same namespace.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setPluginNamespace(String pluginNamespace);
+    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer pluginNamespace);
+
+    /**
+     *  \brief Return the namespace of the plugin object.
+     *  */
+    public native @NoException(true) String getPluginNamespace();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
new file mode 100644
index 00000000000..3ec342c80fd
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
@@ -0,0 +1,233 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IPluginV2DynamicExt
+ * 
+ *  Similar to IPluginV2Ext, but with support for dynamic shapes.
+ * 
+ *  Clients should override the public methods, including the following inherited methods:
+ * 
+ *      virtual int32_t getNbOutputs() const noexcept = 0;
+ *      virtual nvinfer1::DataType getOutputDataType(int32_t index, const nvinfer1::DataType* inputTypes, int32_t
+ *      nbInputs) const noexcept = 0; virtual size_t getSerializationSize() const noexcept = 0; virtual void
+ *      serialize(void* buffer) const noexcept = 0; virtual void destroy() noexcept = 0; virtual void
+ *      setPluginNamespace(const char* pluginNamespace) noexcept = 0; virtual const char* getPluginNamespace() const
+ *      noexcept = 0;
+ * 
+ *  For getOutputDataType, the inputTypes will always be DataType::kFLOAT or DataType::kINT32,
+ *  and the returned type is canonicalized to DataType::kFLOAT if it is DataType::kHALF or DataType:kINT8.
+ *  Details about the floating-point precision are elicited later by method supportsFormatCombination.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginV2DynamicExt extends IPluginV2Ext {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginV2DynamicExt(Pointer p) { super(p); }
+
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IPluginV2DynamicExt clone();
+
+    /**
+     *  \brief Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
+     * 
+     *  @param outputIndex The index of the output tensor
+     *  @param inputs Expressions for dimensions of the input tensors
+     *  @param nbInputs The number of input tensors
+     *  @param exprBuilder Object for generating new expressions
+     * 
+     *  This function is called by the implementations of IBuilder during analysis of the network.
+     * 
+     *  Example #1: A plugin has a single output that transposes the last two dimensions of the plugin's single input.
+     *  The body of the override of getOutputDimensions can be:
+     * 
+     *      DimsExprs output(inputs[0]);
+     *      std::swap(output.d[output.nbDims-1], output.d[output.nbDims-2]);
+     *      return output;
+     * 
+     *  Example #2: A plugin concatenates its two inputs along the first dimension.
+     *  The body of the override of getOutputDimensions can be:
+     * 
+     *      DimsExprs output(inputs[0]);
+     *      output.d[0] = exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[0], *inputs[1].d[0]);
+     *      return output;
+     *  */
+    
+    
+    //!
+    //!
+    public native @ByVal @NoException(true) DimsExprs getOutputDimensions(
+            int outputIndex, @Const DimsExprs inputs, int nbInputs, @ByRef IExprBuilder exprBuilder);
+
+    /**
+     *  Limit on number of format combinations accepted.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    @MemberGetter public static native int kFORMAT_COMBINATION_LIMIT();
+    public static final int kFORMAT_COMBINATION_LIMIT = kFORMAT_COMBINATION_LIMIT();
+
+    /**
+     *  \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
+     * 
+     *  For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
+     *  Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs-1.
+     * 
+     *  TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
+     *  by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
+     *  are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
+     *  make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
+     *  that the plugin supports.  The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
+     *  which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
+     * 
+     *  Some examples:
+     * 
+     *  * A definition for a plugin that supports only FP16 NCHW:
+     * 
+     *          return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
+     * 
+     *  * A definition for a plugin that supports only FP16 NCHW for its two inputs,
+     *    and FP32 NCHW for its single output:
+     * 
+     *          return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ?  DataType::kHALF :
+     *          DataType::kFLOAT);
+     * 
+     *  * A definition for a "polymorphic" plugin with two inputs and one output that supports
+     *    any format or type, but the inputs and output must have the same format and type:
+     * 
+     *          return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
+     * 
+     *  Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean supportsFormatCombination(
+            int pos, @Const PluginTensorDesc inOut, int nbInputs, int nbOutputs);
+
+    /**
+     *  \brief Configure the plugin.
+     * 
+     *  configurePlugin() can be called multiple times in both the build and execution phases. The build phase happens
+     *  before initialize() is called and only occurs during creation of an engine by IBuilder. The execution phase
+     *  happens after initialize() is called and occurs during both creation of an engine by IBuilder and execution
+     *  of an engine by IExecutionContext.
+     * 
+     *  Build phase:
+     *  IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for profiling but not for any
+     *  specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
+     *  input and output formats, along with the bound of possible dimensions. The min and max value of the
+     *  DynamicPluginTensorDesc correspond to the kMIN and kMAX value of the current profile that the plugin is being
+     *  profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network creation.
+     *  Wildcard dimensions will exist during this phase in the desc.dims field.
+     * 
+     *  Execution phase:
+     *  IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for executing the plugin for a
+     *  specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the
+     *  explicit input dimensions stored in desc.dims field.
+     *   * IBuilder will call this function once per profile, with desc.dims resolved to the values specified by the
+     *   kOPT
+     *     field of the current profile. Wildcard dimensions will not exist during this phase.
+     *   * IExecutionContext will call this during the next subsequent instance enqueue[V2]() or execute[V2]() if:
+     *     - The batch size is changed from previous call of execute()/enqueue() if hasImplicitBatchDimension() returns
+     *     true.
+     *     - The optimization profile is changed via setOptimizationProfile() or setOptimizationProfileAsync().
+     *     - An input shape binding is changed via setInputShapeBinding().
+     *     - An input execution binding is changed via setBindingDimensions().
+     *  \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
+     *  called from IBuilder. Performance bottlenecks of configurePlugin won't show up during engine building but will
+     *  be visible during execution after calling functions that trigger layer resource updates.
+     * 
+     *  @param in The input tensors attributes that are used for configuration.
+     *  @param nbInputs Number of input tensors.
+     *  @param out The output tensors attributes that are used for configuration.
+     *  @param nbOutputs Number of output tensors.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void configurePlugin(@Const DynamicPluginTensorDesc in, int nbInputs,
+            @Const DynamicPluginTensorDesc out, int nbOutputs);
+
+    /**
+     *  \brief Find the workspace size required by the layer.
+     * 
+     *  This function is called after the plugin is configured, and possibly during execution.
+     *  The result should be a sufficient workspace size to deal with inputs and outputs of the given size
+     *  or any smaller problem.
+     * 
+     *  @return The workspace size.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("size_t") @NoException(true) long getWorkspaceSize(@Const PluginTensorDesc inputs, int nbInputs, @Const PluginTensorDesc outputs,
+            int nbOutputs);
+
+    /**
+     *  \brief Execute the layer.
+     * 
+     *  @param inputDesc how to interpret the memory for the input tensors.
+     *  @param outputDesc how to interpret the memory for the output tensors.
+     *  @param inputs The memory for the input tensors.
+     *  @param outputs The memory for the output tensors.
+     *  @param workspace Workspace for execution.
+     *  @param stream The stream in which to execute the kernels.
+     * 
+     *  @return 0 for success, else non-zero (which will cause engine termination).
+     *  */
+    public native @NoException(true) int enqueue(@Const PluginTensorDesc inputDesc, @Const PluginTensorDesc outputDesc,
+            @Cast("const void*const*") PointerPointer inputs, @Cast("void*const*") PointerPointer outputs, Pointer workspace, CUstream_st stream);
+    public native @NoException(true) int enqueue(@Const PluginTensorDesc inputDesc, @Const PluginTensorDesc outputDesc,
+            @Cast("const void*const*") @ByPtrPtr Pointer inputs, @Cast("void*const*") @ByPtrPtr Pointer outputs, Pointer workspace, CUstream_st stream);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
new file mode 100644
index 00000000000..d9330ddd2e2
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
@@ -0,0 +1,193 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IPluginV2Ext
+ * 
+ *  \brief Plugin class for user-implemented layers.
+ * 
+ *  Plugins are a mechanism for applications to implement custom layers. This
+ *  interface provides additional capabilities to the IPluginV2 interface by
+ *  supporting different output data types and broadcast across batch.
+ * 
+ *  @see IPluginV2
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginV2Ext extends IPluginV2 {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginV2Ext(Pointer p) { super(p); }
+
+    /**
+     *  \brief Return the DataType of the plugin output at the requested index.
+     *  The default behavior should be to return the type of the first input, or DataType::kFLOAT if the layer has no inputs.
+     *  The returned data type must have a format that is supported by the plugin.
+     *  @see supportsFormat()
+     * 
+     *  \warning DataType:kBOOL not supported.
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getOutputDataType(
+            int index, @Cast("nvinfer1::DataType*") IntPointer inputTypes, int nbInputs);
+    public native @NoException(true) @Cast("nvinfer1::DataType") int getOutputDataType(
+            int index, @Cast("nvinfer1::DataType*") IntBuffer inputTypes, int nbInputs);
+    public native @NoException(true) DataType getOutputDataType(
+            int index, @Cast("nvinfer1::DataType*") int[] inputTypes, int nbInputs);
+
+    /** \brief Return true if output tensor is broadcast across a batch.
+     * 
+     *  @param outputIndex The index of the output
+     *  @param inputIsBroadcasted The ith element is true if the tensor for the ith input is broadcast across a batch.
+     *  @param nbInputs The number of inputs
+     * 
+     *  The values in inputIsBroadcasted refer to broadcasting at the semantic level,
+     *  i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
+     *  physical replication of the values.
+     *  */
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isOutputBroadcastAcrossBatch(
+            int outputIndex, @Cast("const bool*") BoolPointer inputIsBroadcasted, int nbInputs);
+    public native @Cast("bool") @NoException(true) boolean isOutputBroadcastAcrossBatch(
+            int outputIndex, @Cast("const bool*") boolean[] inputIsBroadcasted, int nbInputs);
+
+    /** \brief Return true if plugin can use input that is broadcast across batch without replication.
+     * 
+     *  @param inputIndex Index of input that could be broadcast.
+     * 
+     *  For each input whose tensor is semantically broadcast across a batch,
+     *  TensorRT calls this method before calling configurePlugin.
+     *  If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
+     *  i.e., there will be a single copy that the plugin should share across the batch.
+     *  If it returns false, TensorRT will replicate the input tensor
+     *  so that it appears like a non-broadcasted tensor.
+     * 
+     *  This method is called only for inputs that can be broadcast.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean canBroadcastInputAcrossBatch(int inputIndex);
+
+    /**
+     *  \brief Configure the layer with input and output data types.
+     * 
+     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+     *  algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
+     * 
+     *  @param inputDims The input tensor dimensions.
+     *  @param nbInputs The number of inputs.
+     *  @param outputDims The output tensor dimensions.
+     *  @param nbOutputs The number of outputs.
+     *  @param inputTypes The data types selected for the plugin inputs.
+     *  @param outputTypes The data types selected for the plugin outputs.
+     *  @param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
+     *  @param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
+     *  @param floatFormat The format selected for the engine for the floating point inputs/outputs.
+     *  @param maxBatchSize The maximum batch size.
+     * 
+     *  The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
+     *  3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
+     *  that input or output should be treated as if it is one.
+     *  \ref inputIsBroadcast[i] is true only if the input is semantically broadcast across the batch and
+     *  \ref canBroadcastInputAcrossBatch(i) returned true.
+     *  \ref outputIsBroadcast[i] is true only if \ref isOutputBroadcastAcrossBatch(i) returns true.
+     * 
+     *  \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
+     *  PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
+     *  PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
+     *  */
+
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") IntPointer inputTypes, @Cast("nvinfer1::DataType*") IntPointer outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
+            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") IntBuffer inputTypes, @Cast("nvinfer1::DataType*") IntBuffer outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
+            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") int[] inputTypes, @Cast("nvinfer1::DataType*") int[] outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
+            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") IntPointer inputTypes, @Cast("nvinfer1::DataType*") IntPointer outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
+            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") IntBuffer inputTypes, @Cast("nvinfer1::DataType*") IntBuffer outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
+            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
+    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
+            @Cast("nvinfer1::DataType*") int[] inputTypes, @Cast("nvinfer1::DataType*") int[] outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
+            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
+
+    /**
+     *  \brief Attach the plugin object to an execution context and grant the plugin the access to some context resource.
+     * 
+     *  @param cudnn The CUDNN context handle of the execution context
+     *  @param cublas The cublas context handle of the execution context
+     *  @param allocator The allocator used by the execution context
+     * 
+     *  This function is called automatically for each plugin when a new execution context is created. If the context
+     *  was created without resources, this method is not called until the resources are assigned. It is also called if
+     *  new resources are assigned to the context.
+     * 
+     *  If the plugin needs per-context resource, it can be allocated here.
+     *  The plugin can also get context-owned CUDNN and CUBLAS context here.
+     * 
+     *  \note In the automotive safety context, the CUDNN and CUBLAS parameters will be nullptr because CUDNN and CUBLAS
+     *        is not used by the safe runtime.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void attachToContext(cudnnContext arg0, cublasContext arg1, IGpuAllocator arg2);
+
+    /**
+     *  \brief Detach the plugin object from its execution context.
+     * 
+     *  This function is called automatically for each plugin when a execution context is destroyed or the context
+     *  resources are unassigned from the context.
+     * 
+     *  If the plugin owns per-context resource, it can be released here.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void detachFromContext();
+
+    /**
+     *  \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin object with these parameters.
+     *  If the source plugin is pre-configured with configurePlugin(), the returned object should also be pre-configured. The returned object should allow attachToContext() with a new execution context.
+     *  Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object (e.g. via ref-counting) to avoid duplication.
+     *  */
+    public native @NoException(true) IPluginV2Ext clone();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
new file mode 100644
index 00000000000..4024615c5c1
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
@@ -0,0 +1,99 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IPluginV2IOExt
+ * 
+ *  \brief Plugin class for user-implemented layers.
+ * 
+ *  Plugins are a mechanism for applications to implement custom layers. This interface provides additional
+ *  capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
+ * 
+ *  @see IPluginV2Ext
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginV2IOExt extends IPluginV2Ext {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginV2IOExt(Pointer p) { super(p); }
+
+    /**
+     *  \brief Configure the layer.
+     * 
+     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
+     *  algorithm choices on the basis of I/O PluginTensorDesc and the maximum batch size.
+     * 
+     *  @param in The input tensors attributes that are used for configuration.
+     *  @param nbInput Number of input tensors.
+     *  @param out The output tensors attributes that are used for configuration.
+     *  @param nbOutput Number of output tensors.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void configurePlugin(
+            @Const PluginTensorDesc in, int nbInput, @Const PluginTensorDesc out, int nbOutput);
+
+    /**
+     *  \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
+     * 
+     *  For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
+     *  Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs-1.
+     * 
+     *  TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
+     *  by inOut[pos].format and inOut[pos].type. The override should return true if that format/datatype at inOut[pos]
+     *  are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
+     *  make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
+     *  that the plugin supports. The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
+     *  which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
+     * 
+     *  Some examples:
+     * 
+     *  * A definition for a plugin that supports only FP16 NCHW:
+     * 
+     *          return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
+     * 
+     *  * A definition for a plugin that supports only FP16 NCHW for its two inputs,
+     *    and FP32 NCHW for its single output:
+     * 
+     *          return inOut.format[pos] == TensorFormat::kLINEAR &&
+     *                 (inOut.type[pos] == pos < 2 ?  DataType::kHALF : DataType::kFLOAT);
+     * 
+     *  * A definition for a "polymorphic" plugin with two inputs and one output that supports
+     *    any format or type, but the inputs and output must have the same format and type:
+     * 
+     *          return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
+     * 
+     *  Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
+     *  */
+    public native @Cast("bool") @NoException(true) boolean supportsFormatCombination(
+            int pos, @Const PluginTensorDesc inOut, int nbInputs, int nbOutputs);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
new file mode 100644
index 00000000000..52aa6126a81
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
@@ -0,0 +1,43 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IPluginV2Layer
+ * 
+ *  \brief Layer type for pluginV2
+ * 
+ *  @see IPluginV2
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPluginV2Layer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginV2Layer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Get the plugin for the layer.
+     * 
+     *  @see IPluginV2
+     *  */
+    public native @ByRef @NoException(true) IPluginV2 getPlugin();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
new file mode 100644
index 00000000000..c3c82b8929e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
@@ -0,0 +1,433 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/** \class IPoolingLayer
+ * 
+ *  \brief A Pooling layer in a network definition.
+ * 
+ *  The layer applies a reduction operation within a window over the input.
+ * 
+ *  \warning When running pooling layer with DeviceType::kDLA in Int8 mode, the dynamic ranges
+ *  for input and output tensors must be equal.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IPoolingLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPoolingLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the type of activation to be performed.
+     * 
+     *  DLA only supports kMAX and kAVERAGE pooling types.
+     * 
+     *  @see getPoolingType(), PoolingType
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPoolingType(PoolingType type);
+    public native @NoException(true) void setPoolingType(@Cast("nvinfer1::PoolingType") int type);
+
+    /**
+     *  \brief Get the type of activation to be performed.
+     * 
+     *  @see setPoolingType(), PoolingType
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) PoolingType getPoolingType();
+
+    /**
+     *  \brief Set the window size for pooling.
+     * 
+     *  If executing this layer on DLA, both height and width of window size must be in the range [1,8].
+     * 
+     *  @see getWindowSize()
+     * 
+     *  @deprecated Superseded by setWindowSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setWindowSize(@ByVal DimsHW windowSize);
+
+    /**
+     *  \brief Get the window size for pooling.
+     * 
+     *  @see setWindowSize()
+     * 
+     *  @deprecated Superseded by getWindowSizeNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getWindowSize();
+
+    /**
+     *  \brief Set the stride for pooling.
+     * 
+     *  Default: 1
+     * 
+     *  If executing this layer on DLA, both height and width of stride must be in the range [1,16].
+     * 
+     *  @see getStride()
+     * 
+     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
+
+    /**
+     *  \brief Get the stride for pooling.
+     * 
+     *  @see setStride()
+     * 
+     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
+
+    /**
+     *  \brief Set the padding for pooling.
+     * 
+     *  Default: 0
+     * 
+     *  If executing this layer on DLA, both height and width of padding must be in the range [0,7].
+     * 
+     *  @see getPadding()
+     * 
+     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
+
+    /**
+     *  \brief Get the padding for pooling.
+     * 
+     *  Default: 0
+     * 
+     *  @see setPadding()
+     * 
+     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
+
+    /**
+     *  \brief Set the blending factor for the max_average_blend mode:
+     *  max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
+     *  blendFactor is a user value in [0,1] with the default value of 0.0
+     *  This value only applies for the kMAX_AVERAGE_BLEND mode.
+     * 
+     *  Since DLA does not support kMAX_AVERAGE_BLEND, blendFactor is ignored on the DLA.
+     * 
+     *  @see getBlendFactor()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBlendFactor(float blendFactor);
+
+    /**
+     *  \brief Get the blending factor for the max_average_blend mode:
+     *  max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
+     *  blendFactor is a user value in [0,1] with the default value of 0.0
+     *  In modes other than kMAX_AVERAGE_BLEND, blendFactor is ignored.
+     * 
+     *  @see setBlendFactor()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) float getBlendFactor();
+
+    /**
+     *  \brief Set whether average pooling uses as a denominator the overlap area between the window
+     *  and the unpadded input.
+     *  If this is not set, the denominator is the overlap between the pooling window and the padded input.
+     * 
+     *  Default: true
+     * 
+     *  \note DLA supports only inclusive padding, and thus when executing this layer on DLA, this must be explicitly
+     *  set to false.
+     * 
+     *  @see getAverageCountExcludesPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAverageCountExcludesPadding(@Cast("bool") boolean exclusive);
+
+    /**
+     *  \brief Get whether average pooling uses as a denominator the overlap area between the window
+     *  and the unpadded input.
+     * 
+     *  @see setAverageCountExcludesPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getAverageCountExcludesPadding();
+
+    /**
+     *  \brief Set the multi-dimension pre-padding for pooling.
+     * 
+     *  The start of the input will be padded by this number of elements in each dimension.
+     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,7].
+     * 
+     *  @see getPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the pre-padding.
+     * 
+     *  @see setPrePadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+
+    /**
+     *  \brief Set the multi-dimension post-padding for pooling.
+     * 
+     *  The end of the input will be padded by this number of elements in each dimension.
+     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,7].
+     * 
+     *  @see getPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the padding.
+     * 
+     *  @see setPostPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+
+    /**
+     *  \brief Set the padding mode.
+     * 
+     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see getPaddingMode() */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+
+    /**
+     *  \brief Get the padding mode.
+     * 
+     *  Default: kEXPLICIT_ROUND_DOWN
+     * 
+     *  @see setPaddingMode() */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) PaddingMode getPaddingMode();
+
+    /**
+     *  \brief Set the multi-dimension window size for pooling.
+     * 
+     *  If executing this layer on DLA, only support 2D window size, both height and width of window size must be in the
+     *  range [1,8].
+     * 
+     *  @see getWindowSizeNd() setWindowSize() getWindowSize()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setWindowSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+
+    /**
+     *  \brief Get the multi-dimension window size for pooling.
+     * 
+     *  @see setWindowSizeNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getWindowSizeNd();
+
+    /**
+     *  \brief Set the multi-dimension stride for pooling.
+     * 
+     *  Default: (1, 1, ..., 1)
+     * 
+     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
+     *  [1,16].
+     * 
+     *  @see getStrideNd() setStride() getStride()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+
+    /**
+     *  \brief Get the multi-dimension stride for pooling.
+     * 
+     *  @see setStrideNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+
+    /**
+     *  \brief Set the multi-dimension padding for pooling.
+     * 
+     *  The input will be padded by this number of elements in each dimension.
+     *  Padding is symmetric.
+     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
+     * 
+     *  Default: (0, 0, ..., 0)
+     * 
+     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
+     *  [0,7].
+     * 
+     *  @see getPaddingNd() setPadding() getPadding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+
+    /**
+     *  \brief Get the multi-dimension padding for pooling.
+     * 
+     *  If the padding is asymmetric, the pre-padding is returned.
+     * 
+     *  @see setPaddingNd()
+     *  */
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
new file mode 100644
index 00000000000..29ad4ca232a
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
@@ -0,0 +1,57 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IProfiler
+ * 
+ *  \brief Application-implemented interface for profiling.
+ * 
+ *  When this class is added to an execution context, the profiler will be called once per layer for each invocation of execute().
+ *  Note that enqueue() does not currently support profiling.
+ * 
+ *  The profiler will only be called after execution is complete. It has a small impact on execution time.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IProfiler extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IProfiler() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IProfiler(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IProfiler(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IProfiler position(long position) {
+        return (IProfiler)super.position(position);
+    }
+    @Override public IProfiler getPointer(long i) {
+        return new IProfiler((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Layer time reporting callback.
+     * 
+     *  @param layerName The name of the layer, set when constructing the network definition.
+     *  @param ms The time in milliseconds to execute the layer.
+     *  */
+    @Virtual(true) public native @NoException(true) void reportLayerTime(String layerName, float ms);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
new file mode 100644
index 00000000000..8cbb7639197
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
@@ -0,0 +1,106 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IQuantizeLayer
+ * 
+ *  \brief A Quantize layer in a network definition.
+ * 
+ *  This layer accepts a floating-point data input tensor, and uses the scale and zeroPt inputs to
+ *  quantize the data to an 8-bit signed integer according to:
+ *  \p output = clamp(round(\p input / \p scale) + \p zeroPt)
+ * 
+ *  Rounding type is rounding-to-nearest ties-to-even (https://en.wikipedia.org/wiki/Rounding#Round_half_to_even).
+ *  Clamping is in the range [-128, 127].
+ * 
+ *  The first input (index 0) is the tensor to be quantized.
+ *  The second (index 1) and third (index 2) are the scale and zero point respectively.
+ *  Each of \p scale and \p zeroPt must be either a scalar, or a 1D tensor.
+ * 
+ *  The \p zeroPt tensor is optional, and if not set, will be assumed to be zero.  Its data type must be
+ *  DataType::kINT8. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
+ *  supported.
+ *  The \p scale value must be either a scalar for per-tensor quantization, or a 1D tensor for per-channel
+ *  quantization. All \p scale coefficients must have positive values.  The size of the 1-D \p scale tensor must match
+ *  the size of the quantization axis. The size of the \p scale must match the size of the \p zeroPt.
+ * 
+ *  The subgraph which terminates with the \p scale tensor must be a build-time constant.  The same restrictions apply
+ *  to the \p zeroPt.
+ *  The output type, if constrained, must be constrained to DataType::kINT8. The input type, if constrained, must be
+ *  constrained to DataType::kFLOAT (FP16 input is not supported).
+ *  The output size is the same as the input size. The quantization axis is in reference to the input tensor's
+ *  dimensions.
+ * 
+ *  IQuantizeLayer only supports DataType::kFLOAT precision and will default to this precision during instantiation.
+ *  IQuantizeLayer only supports DataType::kINT8 output.
+ * 
+ *  As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
+ *  single scale coefficient (referred to as per-tensor quantization):
+ *      For each n in N:
+ *          For each c in C:
+ *              For each h in H:
+ *                  For each w in W:
+ *                      output[n,c,h,w] = clamp(round(\p input[n,c,h,w] / \p scale) + \p zeroPt)
+ * 
+ *  Per-channel quantization is supported only for weight inputs. Thus, Activations cannot be quantized per-channel.
+ *  As an example of per-channel operation, imagine a 4D KCRS weights input and K (dimension 0) as the quantization
+ *  axis. The scale is an array of coefficients, and must have the same size as the quantization axis.
+ *      For each k in K:
+ *          For each c in C:
+ *              For each r in R:
+ *                  For each s in S:
+ *                      output[k,c,r,s] = clamp(round(\p input[k,c,r,s] / \p scale[k]) + \p zeroPt[k])
+ * 
+ *  \note Only symmetric quantization is supported.
+ *  \note Currently the only allowed build-time constant \p scale and \zeroPt subgraphs are:
+ *  1. Constant -> Quantize
+ *  2. Constant -> Cast -> Quantize
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IQuantizeLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IQuantizeLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Get the quantization axis.
+     * 
+     *  @return axis parameter set by setAxis().
+     *  The return value is the index of the quantization axis in the input tensor's dimensions.
+     *  A value of -1 indicates per-tensor quantization.
+     *  The default value is -1.
+     *  */
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getAxis();
+    /**
+     *  \brief Set the quantization axis.
+     * 
+     *  Set the index of the quantization axis (with reference to the input tensor's dimensions).
+     *  The axis must be a valid axis if the scale tensor has more than one coefficient.
+     *  The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
+     *  */
+    public native @NoException(true) void setAxis(int axis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
new file mode 100644
index 00000000000..5a29e8edb16
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
@@ -0,0 +1,339 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IRNNv2Layer
+ * 
+ *  \brief An RNN layer in a network definition, version 2.
+ * 
+ *  This layer supersedes IRNNLayer.
+ * 
+ *  @deprecated IRNNv2Layer will be removed in TensorRT 9.0, use INetworkDefinition::addLoop instead.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IRNNv2Layer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IRNNv2Layer(Pointer p) { super(p); }
+
+    /** Get the layer count of the RNN. */
+    public native @NoException(true) int getLayerCount();
+    /** Get the hidden size of the RNN. */
+    public native @NoException(true) int getHiddenSize();
+    /** Get the maximum sequence length of the RNN. */
+    public native @NoException(true) int getMaxSeqLength();
+    /** Get the maximum data length of the RNN.
+    <p>
+    //!
+    //!
+    //!
+    //!
+    //!
+    //! */
+    public native @NoException(true) int getDataLength();
+
+    /**
+     *  \brief Specify individual sequence lengths in the batch with the ITensor pointed to by
+     *  \p seqLengths.
+     * 
+     *  The \p seqLengths ITensor should be a {N1, ..., Np} tensor, where N1..Np are the index dimensions
+     *  of the input tensor to the RNN.
+     * 
+     *  If this is not specified, then the RNN layer assumes all sequences are size getMaxSeqLength().
+     * 
+     *  All sequence lengths in \p seqLengths should be in the range [1, getMaxSeqLength()].  Zero-length
+     *  sequences are not supported.
+     * 
+     *  This tensor must be of type DataType::kINT32.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setSequenceLengths(@ByRef ITensor seqLengths);
+
+    /**
+     *  \brief Get the sequence lengths specified for the RNN.
+     * 
+     *  @return nullptr if no sequence lengths were specified, the sequence length data otherwise.
+     * 
+     *  @see setSequenceLengths()
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) ITensor getSequenceLengths();
+
+    /**
+     *  \brief Set the operation of the RNN layer.
+     *  @see getOperation(), RNNOperation
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setOperation(RNNOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::RNNOperation") int op);
+
+    /**
+     *  \brief Get the operation of the RNN layer.
+     *  @see setOperation(), RNNOperation
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) RNNOperation getOperation();
+
+    /**
+     *  \brief Set the input mode of the RNN layer.
+     *  @see getInputMode(), RNNInputMode
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setInputMode(RNNInputMode op);
+    public native @NoException(true) void setInputMode(@Cast("nvinfer1::RNNInputMode") int op);
+
+    /**
+     *  \brief Get the input mode of the RNN layer.
+     *  @see setInputMode(), RNNInputMode
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) RNNInputMode getInputMode();
+
+    /**
+     *  \brief Set the direction of the RNN layer.
+     * 
+     *  The direction determines if the RNN is run as a unidirectional(left to right) or
+     *  bidirectional(left to right and right to left).
+     *  In the ::kBIDIRECTION case the output is concatenated together, resulting
+     *  in output size of 2x getHiddenSize().
+     * 
+     *  @see getDirection(), RNNDirection
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setDirection(RNNDirection op);
+    public native @NoException(true) void setDirection(@Cast("nvinfer1::RNNDirection") int op);
+
+    /**
+     *  \brief Get the direction of the RNN layer.
+     *  @see setDirection(), RNNDirection
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) RNNDirection getDirection();
+
+    /**
+     *  \brief Set the weight parameters for an individual gate in the RNN.
+     * 
+     *  The #DataType for this structure must be ::kFLOAT or ::kHALF, and must be the same
+     *  datatype as the input tensor.
+     * 
+     *  Each parameter matrix is row-major in memory, and has the following dimensions:
+     * 
+     *  ~~~
+     *      Let K := { ::kUNIDIRECTION => 1
+     *               { ::kBIDIRECTION => 2
+     *          l := layer index (as described above)
+     *          H := getHiddenSize()
+     *          E := getDataLength() (the embedding length)
+     *          isW := true if the matrix is an input (W) matrix, and false if
+     *                 the matrix is a recurrent input (R) matrix.
+     * 
+     *     if isW:
+     *        if l < K and ::kSKIP:
+     *           (numRows, numCols) := (0, 0) # input matrix is skipped
+     *        elif l < K and ::kLINEAR:
+     *           (numRows, numCols) := (H, E) # input matrix acts on input data size E
+     *        elif l >= K:
+     *           (numRows, numCols) := (H, K * H) # input matrix acts on previous hidden state
+     *     else: # not isW
+     *        (numRows, numCols) := (H, H)
+     *  ~~~
+     * 
+     *  In other words, the input weights of the first layer of the RNN (if
+     *  not skipped) transform a {@code getDataLength()}-size column
+     *  vector into a {@code getHiddenSize()}-size column vector.  The input
+     *  weights of subsequent layers transform a {@code K*getHiddenSize()}-size
+     *  column vector into a {@code getHiddenSize()}-size column vector.  {@code K=2} in
+     *  the bidirectional case to account for the full hidden state being
+     *  the concatenation of the forward and backward RNN hidden states.
+     * 
+     *  The recurrent weight matrices for all layers all have shape {@code (H, H)},
+     *  both in the unidirectional and bidirectional cases.  (In the
+     *  bidirectional case, each recurrent weight matrix for the (forward or
+     *  backward) RNN cell operates on the previous (forward or
+     *  backward) RNN cell's hidden state, which is size {@code H}).
+     * 
+     *  @param layerIndex The index of the layer that contains this gate.  See the section
+     *  @param gate The name of the gate within the RNN layer.  The gate name must correspond
+     *         to one of the gates used by this layer's #RNNOperation.
+     *  @param isW True if the weight parameters are for the input matrix W[g]
+     *         and false if they are for the recurrent input matrix R[g].  See
+     *         #RNNOperation for equations showing how these matrices are used
+     *         in the RNN gate.
+     *  @param weights The weight structure holding the weight parameters, which are stored
+     *         as a row-major 2D matrix.  See \ref setRNNWeightsLayout "the layout of elements within a weight matrix"
+     *         in IRNNLayer::setWeights() for documentation on the expected
+     *         dimensions of this matrix.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights weights);
+    public native @NoException(true) void setWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights weights);
+
+    /**
+     *  \brief Get the weight parameters for an individual gate in the RNN.
+     *  @see setWeightsForGate()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
+    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
+
+    /**
+     *  \brief Set the bias parameters for an individual gate in the RNN.
+     * 
+     *  The #DataType for this structure must be ::kFLOAT or ::kHALF, and must be the same
+     *  datatype as the input tensor.
+     * 
+     *  Each bias vector has a fixed size, getHiddenSize().
+     * 
+     *  @param layerIndex The index of the layer that contains this gate.  See the section
+     *         \ref setRNNWeightsOrder "Order of weight matrices" in IRNNLayer::setWeights()
+     *         for a description of the layer index.
+     *  @param gate The name of the gate within the RNN layer.  The gate name must correspond
+     *         to one of the gates used by this layer's #RNNOperation.
+     *  @param isW True if the bias parameters are for the input bias Wb[g]
+     *         and false if they are for the recurrent input bias Rb[g].  See
+     *         #RNNOperation for equations showing how these bias vectors are used
+     *         in the RNN gate.
+     *  @param bias The weight structure holding the bias parameters, which should be an
+     *         array of size getHiddenSize().
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights bias);
+    public native @NoException(true) void setBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights bias);
+
+    /**
+     *  \brief Get the bias parameters for an individual gate in the RNN.
+     *  @see setBiasForGate()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
+    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
+
+    /**
+     *  \brief Set the initial hidden state of the RNN with the provided \p hidden ITensor.
+     * 
+     *  The \p hidden ITensor should have the dimensions {@code {N1, ..., Np, L, H}}, where:
+     * 
+     *   - {@code N1..Np} are the index dimensions specified by the input tensor
+     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
+     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
+     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
+     *      final backward hidden state is stored in {@code L= 2*l + 1}.
+     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setHiddenState(@ByRef ITensor hidden);
+
+    /**
+     *  \brief Get the initial hidden state of the RNN.
+     *  @see setHiddenState()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ITensor getHiddenState();
+
+    /**
+     *  \brief Set the initial cell state of the LSTM with the provided \p cell ITensor.
+     * 
+     *  The \p cell ITensor should have the dimensions {@code {N1, ..., Np, L, H}}, where:
+     * 
+     *   - {@code N1..Np} are the index dimensions specified by the input tensor
+     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
+     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
+     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
+     *      final backward hidden state is stored in {@code L= 2*l + 1}.
+     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
+     * 
+     *  It is an error to call setCellState() on an RNN layer that is not configured with RNNOperation::kLSTM.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setCellState(@ByRef ITensor cell);
+
+    /**
+     *  \brief Get the initial cell state of the RNN.
+     *  @see setCellState()
+     *  */
+    public native @NoException(true) ITensor getCellState();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
new file mode 100644
index 00000000000..a6ad767ac64
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
@@ -0,0 +1,42 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IRaggedSoftMaxLayer
+ * 
+ *  \brief A RaggedSoftmax layer in a network definition.
+ * 
+ *  This layer takes a ZxS input tensor and an additional Zx1 bounds tensor
+ *  holding the lengths of the Z sequences.
+ * 
+ *  This layer computes a softmax across each of the Z sequences.
+ * 
+ *  The output tensor is of the same size as the input tensor.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IRaggedSoftMaxLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IRaggedSoftMaxLayer(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
new file mode 100644
index 00000000000..b5f5b10c35d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
@@ -0,0 +1,46 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IRecurrenceLayer extends ILoopBoundaryLayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IRecurrenceLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor */
+    //
+    /** Sets the input tensor for the given index.
+    /**
+    /** For a recurrence layer, the values 0 and 1 are valid.
+    /** The indices are as follows:
+    /**
+    /** - 0: The initial value of the output tensor. The value must come from outside the loop.
+    /** - 1: The next value of the output tensor. The value usually comes from inside the loop, and must have the same
+    /** dimensions as input 0.
+    /**
+    /** If this function is called with a value 1, then the function getNbInputs() changes
+    /** from returning 1 to 2.
+    /** */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
new file mode 100644
index 00000000000..e3984032189
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
@@ -0,0 +1,102 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IReduceLayer
+ * 
+ *  \brief Layer that represents a reduction operator across Shape, Int32, Float, and Half tensors.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IReduceLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IReduceLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the reduce operation for the layer.
+     * 
+     *  @see getOperation(), ReduceOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOperation(ReduceOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::ReduceOperation") int op);
+
+    /**
+     *  \brief Get the reduce operation for the layer.
+     * 
+     *  @see setOperation(), ReduceOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) ReduceOperation getOperation();
+
+    /**
+     *  \brief Set the axes over which to reduce.
+     * 
+     *  @see getReduceAxes
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
+
+    /**
+     *  \brief Get the axes over which to reduce for the layer.
+     * 
+     *  @see setReduceAxes
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
+
+    /**
+     *  \brief Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
+     * 
+     *  @see getKeepDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setKeepDimensions(@Cast("bool") boolean keepDimensions);
+
+    /**
+     *  \brief Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
+     * 
+     *  @see setKeepDimensions
+     *  */
+    public native @Cast("bool") @NoException(true) boolean getKeepDimensions();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
new file mode 100644
index 00000000000..ae46eb33e9c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
@@ -0,0 +1,336 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IRefitter
+ * 
+ *  \brief Updates weights in an engine.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IRefitter extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IRefitter() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IRefitter(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IRefitter(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IRefitter position(long position) {
+        return (IRefitter)super.position(position);
+    }
+    @Override public IRefitter getPointer(long i) {
+        return new IRefitter((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Specify new weights for a layer of given name.
+     *  Returns true on success, or false if new weights are rejected.
+     *  Possible reasons for rejection are:
+     * 
+     *  * There is no such layer by that name.
+     *  * The layer does not have weights with the specified role.
+     *  * The number of weights is inconsistent with the layer’s original specification.
+     * 
+     *  Modifying the weights before method refit() completes will result in undefined behavior. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setWeights(String layerName, WeightsRole role, @ByVal Weights weights);
+    public native @Cast("bool") @NoException(true) boolean setWeights(@Cast("const char*") BytePointer layerName, @Cast("nvinfer1::WeightsRole") int role, @ByVal Weights weights);
+
+    /**
+     *  \brief Updates associated engine.  Return true if successful.
+     * 
+     *  Failure occurs if getMissing() != 0 before the call.
+     * 
+     *  The behavior is undefined if the engine has pending enqueued work.
+     * 
+     *  Extant IExecutionContexts associated with the engine should not be used afterwards.
+     *  Instead, create new IExecutionContexts after refitting.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean refitCudaEngine();
+
+    /**
+     *  \brief Get description of missing weights.
+     * 
+     *  For example, if some Weights have been set, but the engine was optimized
+     *  in a way that combines weights, any unsupplied Weights in the combination
+     *  are considered missing.
+     * 
+     *  @param size The number of items that can be safely written to a non-null layerNames or roles.
+     *  @param layerNames Where to write the layer names.
+     *  @param roles Where to write the weights roles.
+     * 
+     *  @return The number of missing Weights.
+     * 
+     *  If layerNames!=nullptr, each written pointer points to a string owned by
+     *  the engine being refit, and becomes invalid when the engine is destroyed.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
+
+    /**
+     *  \brief Get description of all weights that could be refit.
+     * 
+     *  @param size The number of items that can be safely written to a non-null layerNames or roles.
+     *  @param layerNames Where to write the layer names.
+     *  @param roles Where to write the weights roles.
+     * 
+     *  @return The number of Weights that could be refit.
+     * 
+     *  If layerNames!=nullptr, each written pointer points to a string owned by
+     *  the engine being refit, and becomes invalid when the engine is destroyed.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getAll(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
+
+    /**
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  Update dynamic range for a tensor.
+     * 
+     *  @param tensorName The name of an ITensor in the network.
+     *  @param min The minimum of the dynamic range for the tensor.
+     *  @param max The maximum of the dynamic range for the tensor.
+     * 
+     *  @return True if successful; false otherwise.
+     * 
+     *  Returns false if there is no Int8 engine tensor derived from
+     *  a network tensor of that name.  If successful, then getMissing
+     *  may report that some weights need to be supplied. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(String tensorName, float min, float max);
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(@Cast("const char*") BytePointer tensorName, float min, float max);
+
+    /**
+     *  \brief Get minimum of dynamic range.
+     * 
+     *  @return Minimum of dynamic range.
+     * 
+     *  If the dynamic range was never set, returns the minimum computed during calibration.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) float getDynamicRangeMin(String tensorName);
+    public native @NoException(true) float getDynamicRangeMin(@Cast("const char*") BytePointer tensorName);
+
+    /**
+     *  \brief Get maximum of dynamic range.
+     * 
+     *  @return Maximum of dynamic range.
+     * 
+     *  If the dynamic range was never set, returns the maximum computed during calibration.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) float getDynamicRangeMax(String tensorName);
+    public native @NoException(true) float getDynamicRangeMax(@Cast("const char*") BytePointer tensorName);
+
+    /**
+     *  \brief Get names of all tensors that have refittable dynamic ranges.
+     * 
+     *  @param size The number of items that can be safely written to a non-null tensorNames.
+     *  @param tensorNames Where to write the layer names.
+     * 
+     *  @return The number of Weights that could be refit.
+     * 
+     *  If tensorNames!=nullptr, each written pointer points to a string owned by
+     *  the engine being refit, and becomes invalid when the engine is destroyed.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") PointerPointer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr BytePointer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr ByteBuffer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr byte[] tensorNames);
+
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief Get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+     *  an error handler has not been set.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Specify new weights of given name.
+     * 
+     *  @param name The name of the weights to be refit.
+     *  @param weights The new weights to associate with the name.
+     * 
+     *  Returns true on success, or false if new weights are rejected.
+     *  Possible reasons for rejection are:
+     * 
+     *  * The name of weights is nullptr or does not correspond to any refittable weights.
+     *  * The number of weights is inconsistent with the original specification.
+     * 
+     *  Modifying the weights before method refitCudaEngine() completes will result in undefined behavior. */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setNamedWeights(String name, @ByVal Weights weights);
+    public native @Cast("bool") @NoException(true) boolean setNamedWeights(@Cast("const char*") BytePointer name, @ByVal Weights weights);
+
+    /**
+     *  \brief Get names of missing weights.
+     * 
+     *  For example, if some Weights have been set, but the engine was optimized
+     *  in a way that combines weights, any unsupplied Weights in the combination
+     *  are considered missing.
+     * 
+     *  @param size The number of weights names that can be safely written to.
+     *  @param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
+     * 
+     *  @return The number of missing Weights.
+     * 
+     *  If layerNames!=nullptr, each written pointer points to a string owned by
+     *  the engine being refit, and becomes invalid when the engine is destroyed.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") PointerPointer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
+
+    /**
+     *  \brief Get names of all weights that could be refit.
+     * 
+     *  @param size The number of weights names that can be safely written to.
+     *  @param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
+     * 
+     *  @return The number of Weights that could be refit.
+     * 
+     *  If layerNames!=nullptr, each written pointer points to a string owned by
+     *  the engine being refit, and becomes invalid when the engine is destroyed.
+     *  */
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") PointerPointer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
new file mode 100644
index 00000000000..69533ae4a00
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
@@ -0,0 +1,323 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/** \class IResizeLayer
+ * 
+ *  \brief A resize layer in a network definition.
+ * 
+ *  Resize layer can be used for resizing a N-D tensor.
+ * 
+ *  Resize layer currently supports the following configurations:
+ *      -   ResizeMode::kNEAREST - resizes innermost {@code m} dimensions of N-D, where 0 < m <= min(8, N) and N > 0
+ *      -   ResizeMode::kLINEAR - resizes innermost {@code m} dimensions of N-D, where 0 < m <= min(3, N) and N > 0
+ * 
+ *  Default resize mode is ResizeMode::kNEAREST.
+ * 
+ *  Resize layer provides two ways to resize tensor dimensions.
+ *      -   Set output dimensions directly. It can be done for static as well as dynamic resize layer.
+ *          Static resize layer requires output dimensions to be known at build-time.
+ *          Dynamic resize layer requires output dimensions to be set as one of the input tensors.
+ *      -   Set scales for resize. Each output dimension is calculated as floor(input dimension * scale).
+ *          Only static resize layer allows setting scales where the scales are known at build-time.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IResizeLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IResizeLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the output dimensions.
+     * 
+     *  @param dimensions The output dimensions. Number of output dimensions must be the same as the number of input
+     *  dimensions.
+     * 
+     *  If there is a second input, i.e. resize layer is dynamic,
+     *  calling setOutputDimensions() is an error and does not update the
+     *  dimensions.
+     * 
+     *  Output dimensions can be specified directly, or via scale factors relative to input dimensions.
+     *  Scales for resize can be provided using setScales().
+     * 
+     *  @see setScales
+     *  @see getOutputDimensions
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOutputDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the output dimensions.
+     * 
+     *  @return The output dimensions.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions();
+
+    /**
+     *  \brief Set the resize scales.
+     * 
+     *  @param scales An array of resize scales.
+     *  @param nbScales Number of scales. Number of scales must be equal to the number of input dimensions.
+     * 
+     *  If there is a second input, i.e. resize layer is dynamic,
+     *  calling setScales() is an error and does not update the scales.
+     * 
+     *  Output dimensions are calculated as follows:
+     *  outputDims[i] = floor(inputDims[i] * scales[i])
+     * 
+     *  Output dimensions can be specified directly, or via scale factors relative to input dimensions.
+     *  Output dimensions can be provided directly using setOutputDimensions().
+     * 
+     *  @see setOutputDimensions
+     *  @see getScales
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setScales(@Const FloatPointer scales, int nbScales);
+    public native @NoException(true) void setScales(@Const FloatBuffer scales, int nbScales);
+    public native @NoException(true) void setScales(@Const float[] scales, int nbScales);
+
+    /**
+     *  \brief Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were set.
+     * 
+     *  @param size The number of scales to get. If size != nbScales, no scales will be copied.
+     * 
+     *  @param scales Pointer to where to copy the scales. Scales will be copied only if
+     *                size == nbScales and scales != nullptr.
+     * 
+     *  In case the size is not known consider using size = 0 and scales = nullptr. This method will return
+     *  the number of resize scales.
+     * 
+     *  @return The number of resize scales i.e. nbScales if scales were set.
+     *          Return -1 in case no scales were set or resize layer is used in dynamic mode.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getScales(int size, FloatPointer scales);
+    public native @NoException(true) int getScales(int size, FloatBuffer scales);
+    public native @NoException(true) int getScales(int size, float[] scales);
+
+    /**
+     *  \brief Set resize mode for an input tensor.
+     * 
+     *  Supported resize modes are Nearest Neighbor and Linear.
+     * 
+     *  @see ResizeMode
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setResizeMode(ResizeMode resizeMode);
+    public native @NoException(true) void setResizeMode(@Cast("nvinfer1::ResizeMode") int resizeMode);
+
+    /**
+     *  \brief Get resize mode for an input tensor.
+     * 
+     *  @return The resize mode.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ResizeMode getResizeMode();
+
+    /**
+     *  \brief Set whether to align corners while resizing.
+     * 
+     *  If true, the centers of the 4 corner pixels of both input and output
+     *  tensors are aligned i.e. preserves the values of corner
+     *  pixels.
+     * 
+     *  Default: false.
+     * 
+     *  @deprecated Superseded by IResizeLayer::setCoordinateTransformation() and
+     *  will be removed in TensorRT 10.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void setAlignCorners(@Cast("bool") boolean alignCorners);
+
+    /**
+     *  \brief True if align corners has been set.
+     * 
+     *  @return True if align corners has been set, false otherwise.
+     * 
+     *  @deprecated Superseded by IResizeLayer::getCoordinateTransformation() and
+     *  will be removed in TensorRT 10.0.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @Deprecated @NoException(true) boolean getAlignCorners();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor.
+     * 
+     *  Sets the input tensor for the given index. The index must be 0 for a static resize layer.
+     *  A static resize layer is converted to a dynamic resize layer by calling setInput with an index 1.
+     *  A dynamic resize layer cannot be converted back to a static resize layer.
+     * 
+     *  For a dynamic resize layer, the values 0 and 1 are valid.
+     *  The indices in the dynamic case are as follows:
+     * 
+     *  - 0: Data or Shape tensor to be resized.
+     *  - 1: The output dimensions, as a 1D Int32 shape tensor.
+     * 
+     *  If this function is called with a value 1, then the function getNbInputs() changes
+     *  from returning 1 to 2.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+
+    /**
+     *  \brief Set coordinate transformation function.
+     * 
+     *  We have different functions mapping the coordinate in output tensor to the coordinate in input tensor.
+     * 
+     *  Default is ResizeCoordinateTransformation::kASYMMETRIC.
+     * 
+     *  @see ResizeCoordinateTransformation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform);
+    public native @NoException(true) void setCoordinateTransformation(@Cast("nvinfer1::ResizeCoordinateTransformation") int coordTransform);
+
+    /**
+     *  \brief Get coordinate transformation function.
+     * 
+     *  @return The coordinate transformation function.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ResizeCoordinateTransformation getCoordinateTransformation();
+
+    /**
+     *  \brief Set coordinate selector function when resized to single pixel.
+     * 
+     *  When resize to single pixel image, use this function to decide how to map the coordinate in the original
+     *  image.
+     * 
+     *  Default is ResizeSelector::kFORMULA.
+     * 
+     *  @see ResizeSelector
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setSelectorForSinglePixel(ResizeSelector selector);
+    public native @NoException(true) void setSelectorForSinglePixel(@Cast("nvinfer1::ResizeSelector") int selector);
+
+    /**
+     *  \brief Get the coordinate selector function when resized to single pixel.
+     * 
+     *  @return The selector function.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) ResizeSelector getSelectorForSinglePixel();
+
+    /**
+     *  \brief Set rounding mode for nearest neighbor resize.
+     * 
+     *  This value is used for nearest neighbor interpolation rounding. It is applied after coordinate transformation.
+     * 
+     *  Default is kFLOOR.
+     * 
+     *  @see ResizeRoundMode
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setNearestRounding(ResizeRoundMode value);
+    public native @NoException(true) void setNearestRounding(@Cast("nvinfer1::ResizeRoundMode") int value);
+
+    /**
+     *  \brief Get rounding mode for nearest neighbor resize.
+     * 
+     *  @return The rounding mode.
+     *  */
+    public native @NoException(true) ResizeRoundMode getNearestRounding();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
new file mode 100644
index 00000000000..049629cab49
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
@@ -0,0 +1,191 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IRuntime
+ * 
+ *  \brief Allows a serialized functionally unsafe engine to be deserialized.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IRuntime extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public IRuntime() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public IRuntime(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IRuntime(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public IRuntime position(long position) {
+        return (IRuntime)super.position(position);
+    }
+    @Override public IRuntime getPointer(long i) {
+        return new IRuntime((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Deserialize an engine from a stream.
+     * 
+     *  If an error recorder has been set for the runtime, it will also be passed to the engine.
+     * 
+     *  @param blob The memory that holds the serialized engine.
+     *  @param size The size of the memory in bytes.
+     *  @param pluginFactory The plugin factory, if any plugins are used by the network, otherwise nullptr.
+     * 
+     *  @return The engine, or nullptr if it could not be deserialized.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning IPluginFactory is no longer supported, therefore pluginFactory must be a nullptr.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) ICudaEngine deserializeCudaEngine(
+            @Const Pointer blob, @Cast("std::size_t") long size, IPluginFactory pluginFactory);
+
+    /**
+     *  \brief Set the DLA core that the deserialized engine must execute on.
+     *  @param dlaCore The DLA core to execute the engine on (0 to N-1, where N is the maximum number of DLA's present
+     *  on the device). Default value is 0. @see getDLACore()
+     * 
+     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDLACore(int dlaCore);
+
+    /**
+     *  \brief Get the DLA core that the engine executes on.
+     *  @return If setDLACore is called, returns DLA core from 0 to N-1, else returns 0.
+     * 
+     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getDLACore();
+
+    /**
+     *  \brief Returns number of DLA hardware cores accessible.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getNbDLACores();
+
+    /**
+     *  \brief Destroy this object.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Set the GPU allocator.
+     *  @param allocator Set the GPU allocator to be used by the runtime. All GPU memory acquired will use this
+     *  allocator. If NULL is passed, the default allocator will be used.
+     * 
+     *  Default: uses cudaMalloc/cudaFree.
+     * 
+     *  If nullptr is passed, the default allocator will be used.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
+
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
+     *  an error handler has not been set.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+
+    /**
+     *  \brief Deserialize an engine from a stream.
+     * 
+     *  @param blob The memory that holds the serialized engine.
+     *  @param size The size of the memory.
+     * 
+     *  @return The engine, or nullptr if it could not be deserialized.
+     *  */
+    public native @NoException(true) ICudaEngine deserializeCudaEngine(@Const Pointer blob, @Cast("std::size_t") long size);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
new file mode 100644
index 00000000000..5f133650109
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
@@ -0,0 +1,190 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IScaleLayer
+ * 
+ *  \brief A Scale layer in a network definition.
+ * 
+ *  This layer applies a per-element computation to its input:
+ * 
+ *  \p output = (\p input* \p scale + \p shift)^ \p power
+ * 
+ *  The coefficients can be applied on a per-tensor, per-channel, or per-element basis.
+ * 
+ *  \note If the number of weights is 0, then a default value is used for shift, power, and scale.
+ *        The default shift is 0, the default power is 1, and the default scale is 1.
+ * 
+ *  The output size is the same as the input size.
+ * 
+ *  \note The input tensor for this layer is required to have a minimum of 3 dimensions in implicit batch mode
+ *        and a minimum of 4 dimensions in explicit batch mode.
+ * 
+ *  A scale layer may be used as an INT8 quantization node in a graph, if the output is constrained to INT8 and
+ *  the input to FP32. Quantization rounds ties to even, and clamps to [-128, 127].
+ * 
+ *  @see ScaleMode
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IScaleLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IScaleLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the scale mode.
+     * 
+     *  @see getMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMode(ScaleMode mode);
+    public native @NoException(true) void setMode(@Cast("nvinfer1::ScaleMode") int mode);
+
+    /**
+     *  \brief Get the scale mode.
+     * 
+     *  @see setMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) ScaleMode getMode();
+
+    /**
+     *  \brief Set the shift value.
+     * 
+     *  @see getShift()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setShift(@ByVal Weights shift);
+
+    /**
+     *  \brief Get the shift value.
+     * 
+     *  @see setShift()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getShift();
+
+    /**
+     *  \brief Set the scale value.
+     * 
+     *  @see getScale()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setScale(@ByVal Weights scale);
+
+    /**
+     *  \brief Get the scale value.
+     * 
+     *  @see setScale()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getScale();
+
+    /**
+     *  \brief Set the power value.
+     * 
+     *  @see getPower()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setPower(@ByVal Weights power);
+
+    /**
+     *  \brief Get the power value.
+     * 
+     *  @see setPower()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Weights getPower();
+
+    /**
+     *  \brief Get the channel axis.
+     * 
+     *  @return channelAxis parameter passed to addScaleNd() or set by setChannelAxis()
+     * 
+     *  The value is the index of the channel axis in the input tensor's dimensions.
+     *  Scaling happens along the channel axis when ScaleMode::kCHANNEL is enabled.
+     * 
+     *  @see addScaleNd()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) int getChannelAxis();
+
+    /**
+     *  \brief Set the channel axis.
+     * 
+     *  The value is the index of the channel axis in the input tensor's dimensions.
+     * 
+     *  For ScaleMode::kCHANNEL, there can be distinct scale, shift, and power weights for each channel coordinate.
+     *  For ScaleMode::kELEMENTWISE, there can be distinct scale, shift, and power weights for each combination of
+     *  coordinates from the channel axis and axes after it.
+     * 
+     *  For example, suppose the input tensor has dimensions [10,20,30,40] and the channel axis is 1.
+     *  Let [n,c,h,w] denote an input coordinate.
+     *  For ScaleMode::kCHANNEL, the scale, shift, and power weights are indexed by c.
+     *  For ScaleMode::kELEMENTWISE, the scale, shift, and power weights are indexed by [c,h,w].
+     * 
+     *  @see addScaleNd()
+     *  */
+    public native @NoException(true) void setChannelAxis(int channelAxis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
new file mode 100644
index 00000000000..217ac6055cb
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ISelectLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ISelectLayer(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
new file mode 100644
index 00000000000..7be7a1a07d5
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IShapeLayer
+ * 
+ *  \brief Layer type for getting shape of a tensor.
+ * 
+ *  This layer sets the output to a one-dimensional tensor with the dimensions of the input tensor.
+ * 
+ *  For example, if the input is a four-dimensional tensor (of any type) with
+ *  dimensions [2,3,5,7], the output tensor is a one-dimensional Int32 tensor
+ *  of length 4 containing the sequence 2, 3, 5, 7.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IShapeLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IShapeLayer(Pointer p) { super(p); }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
new file mode 100644
index 00000000000..205383de326
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
@@ -0,0 +1,225 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** \class IShuffleLayer
+ * 
+ *  \brief Layer type for shuffling data.
+ * 
+ *  This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
+ *  and a second transpose operation. The dimension types of the output are those of the reshape dimension.
+ * 
+ *  The layer has an optional second input.  If present, it must be a 1D Int32 shape tensor,
+ *  and the reshape dimensions are taken from it.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IShuffleLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IShuffleLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the permutation applied by the first transpose operation.
+     * 
+     *  @param permutation The dimension permutation applied before the reshape.
+     * 
+     *  The default is the identity permutation.
+     * 
+     *  @see getFirstTranspose
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setFirstTranspose(@ByVal Permutation permutation);
+
+    /**
+     *  \brief Get the permutation applied by the first transpose operation.
+     * 
+     *  @return The dimension permutation applied before the reshape.
+     * 
+     *  @see setFirstTranspose
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Permutation getFirstTranspose();
+
+    /**
+     *  \brief Set the reshaped dimensions.
+     * 
+     *  @param dimensions The reshaped dimensions.
+     * 
+     *  Two special values can be used as dimensions.
+     * 
+     *  Value 0 copies the corresponding dimension from input. This special value
+     *  can be used more than once in the dimensions. If number of reshape
+     *  dimensions is less than input, 0s are resolved by aligning the most
+     *  significant dimensions of input.
+     * 
+     *  Value -1 infers that particular dimension by looking at input and rest
+     *  of the reshape dimensions. Note that only a maximum of one dimension is
+     *  permitted to be specified as -1.
+     * 
+     *  The product of the new dimensions must be equal to the product of the old.
+     * 
+     *  If a second input had been used to create this layer, that input is reset to null by this method.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setReshapeDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the reshaped dimensions.
+     * 
+     *  @return The reshaped dimensions.
+     * 
+     *  If a second input is present and non-null, or setReshapeDimensions has
+     *  not yet been called, this function returns Dims with nbDims == -1.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getReshapeDimensions();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor */
+    //
+    /** Sets the input tensor for the given index. The index must be 0 for a static shuffle layer.
+    /** A static shuffle layer is converted to a dynamic shuffle layer by calling setInput with an index 1.
+    /** A dynamic shuffle layer cannot be converted back to a static shuffle layer.
+    /**
+    /** For a dynamic shuffle layer, the values 0 and 1 are valid.
+    /** The indices in the dynamic case are as follows:
+    /**
+    /** - 0: Data or Shape tensor to be shuffled.
+    /** - 1: The dimensions for the reshape operation, as a 1D Int32 shape tensor.
+    /**
+    /** If this function is called with a value 1, then the function getNbInputs() changes
+    /** from returning 1 to 2.
+    /**
+    /** The reshape dimensions are treated identically to how they are treated if set statically
+    /** via setReshapeDimensions. In particular, a -1 is treated as a wildcard even if dynamically
+    /** supplied at runtime, and a 0 is treated as a placeholder if getZeroIsPlaceholder() = true,
+    /** which is the default. If the placeholder interpretation of 0 is unwanted because the
+    /** runtime dimension should be 0 when the reshape dimension is 0, be sure to call
+    /** setZeroIsPlacholder(false) on the IShuffleLayer.
+    /**
+    /** @see setReshapeDimensions.
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+
+    /**
+     *  \brief Set the permutation applied by the second transpose operation.
+     * 
+     *  @param permutation The dimension permutation applied after the reshape.
+     * 
+     *  The default is the identity permutation.
+     * 
+     *  The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
+     *  permute from CHW order to HWC order, the required permutation is [1, 2, 0].
+     * 
+     *  @see getSecondTranspose
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setSecondTranspose(@ByVal Permutation permutation);
+
+    /**
+     *  \brief Get the permutation applied by the second transpose operation.
+     * 
+     *  @return The dimension permutation applied after the reshape.
+     * 
+     *  @see setSecondTranspose
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @NoException(true) Permutation getSecondTranspose();
+
+    /**
+     *  \brief Set meaning of 0 in reshape dimensions.
+     * 
+     *  If true, then a 0 in the reshape dimensions denotes copying the corresponding
+     *  dimension from the first input tensor.  If false, then a 0 in the reshape
+     *  dimensions denotes a zero-length dimension.
+     * 
+     *  Default: true
+     * 
+     *  @see getZeroIsPlaceholder();
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setZeroIsPlaceholder(@Cast("bool") boolean zeroIsPlaceholder);
+
+    /**
+     *  \brief Get meaning of 0 in reshape dimensions.
+     * 
+     *  @return true if 0 is placeholder for corresponding input dimension,
+     *          false if 0 denotes a zero-length dimension.
+     * 
+     *  @see setZeroIsPlaceholder
+     *  */
+    public native @Cast("bool") @NoException(true) boolean getZeroIsPlaceholder();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
new file mode 100644
index 00000000000..6790a570055
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
@@ -0,0 +1,213 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \brief Slices an input tensor into an output tensor based on the offset and strides.
+ * 
+ *  The slice layer has two variants, static and dynamic. Static slice specifies the start, size, and stride
+ *  dimensions at layer creation time via Dims and can use the get/set accessor functions of the ISliceLayer.
+ *  Dynamic slice specifies one or more of start, size or stride as ITensors, by using ILayer::setTensor to add
+ *  a second, third, or fourth input respectively. The corresponding Dims are used if an input
+ *  is missing or null.
+ * 
+ *  An application can determine if the ISliceLayer has a dynamic output shape based on whether
+ *  the size input (third input) is present and non-null.
+ * 
+ *  The slice layer selects for each dimension a start location from within the input tensor, and
+ *  copies elements to the output tensor using the specified stride across the input tensor.
+ *  Start, size, and stride tensors must be 1D Int32 shape tensors if not specified via Dims.
+ * 
+ *  A slice layer can produce a shape tensor if the following conditions are met:
+ * 
+ *  * start, size, and stride are build time constants, either as static Dims, or computable by constant folding.
+ *  * The number of elements in the output tensor does not exceed 2*Dims::MAX_DIMS.
+ * 
+ *  For example using slice on a tensor:
+ *  input = {{0, 2, 4}, {1, 3, 5}}
+ *  start = {1, 0}
+ *  size = {1, 2}
+ *  stride = {1, 2}
+ *  output = {{1, 5}}
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ISliceLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ISliceLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the start offset that the slice layer uses to create the output slice.
+     * 
+     *  @param start The start offset to read data from the input tensor.
+     * 
+     *  If a second input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getStart
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setStart(@ByVal @Cast("nvinfer1::Dims*") Dims32 start);
+
+    /**
+     *  \brief Get the start offset for the slice layer.
+     * 
+     *  @return The start offset, or an invalid Dims structure.
+     * 
+     *  If the second input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setStart
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStart();
+
+    /**
+     *  \brief Set the dimensions of the output slice.
+     * 
+     *  @param size The dimensions of the output slice.
+     * 
+     *  If a third input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getSize
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setSize(@ByVal @Cast("nvinfer1::Dims*") Dims32 size);
+
+    /**
+     *  \brief Get dimensions of the output slice.
+     * 
+     *  @return The output dimension, or an invalid Dims structure.
+     * 
+     *  If the third input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setSize
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getSize();
+
+    /**
+     *  \brief Set the stride for computing the output slice data.
+     * 
+     *  @param stride The dimensions of the stride to compute the values to store in the output slice.
+     * 
+     *  If a fourth input had been used to create this layer, that input is reset to null by this method.
+     * 
+     *  @see getStride
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setStride(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+
+    /**
+     *  \brief Get the stride for the output slice.
+     * 
+     *  @return The slicing stride, or an invalid Dims structure.
+     * 
+     *  If the fourth input is present and non-null,
+     *  this function returns a Dims with nbDims = -1.
+     * 
+     *  @see setStride
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStride();
+
+    /**
+     *  \brief Set the slice mode.
+     * 
+     *  @see getMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setMode(SliceMode mode);
+    public native @NoException(true) void setMode(@Cast("nvinfer1::SliceMode") int mode);
+
+    /**
+     *  \brief Get the slice mode.
+     * 
+     *  @see setMode()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) SliceMode getMode();
+
+    /**
+     *  \brief Append or replace an input of this layer with a specific tensor
+     * 
+     *  @param index the index of the input to modify.
+     *  @param tensor the new input tensor
+     * 
+     *  For a slice layer, the values 0-3 are valid. The values 1-3 override start, size or stride
+     *  dimensions, respectively. Conversely, this input tensor can be overridden via appropriate set call.
+     *  The indices are as follows:
+     * 
+     *  - 0: Data or Shape tensor to be sliced.
+     *  - 1: The start tensor to begin slicing, as a 1D Int32 shape tensor.
+     *  - 2: The size tensor of the resulting slice, as a 1D Int32 shape tensor.
+     *  - 3: The stride of the slicing operation, as a 1D Int32 shape tensor.
+     * 
+     *  If this function is called with a value greater than 0, then the function getNbInputs() changes
+     *  from returning 1 to index + 1.
+     *  */
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
new file mode 100644
index 00000000000..66f0458e303
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
@@ -0,0 +1,82 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ISoftMaxLayer
+ * 
+ *  \brief A Softmax layer in a network definition.
+ * 
+ *  This layer applies a per-channel softmax to its input.
+ * 
+ *  The output size is the same as the input size.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ISoftMaxLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ISoftMaxLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the axis along which softmax is computed. Currently, only one axis can be set.
+     * 
+     *  The axis is specified by setting the bit corresponding to the axis to 1.
+     *  Let's say we have an NCHW tensor as input (three non-batch dimensions).
+     * 
+     *  In implicit mode :
+     *  Bit 0 corresponds to the C dimension boolean.
+     *  Bit 1 corresponds to the H dimension boolean.
+     *  Bit 2 corresponds to the W dimension boolean.
+     *  By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
+     *  there are fewer than 3 non-batch axes. For example, if the input is NCHW, the default axis is C. If the input
+     *  is NHW, then the default axis is H.
+     * 
+     *  In explicit mode :
+     *  Bit 0 corresponds to the N dimension boolean.
+     *  Bit 1 corresponds to the C dimension boolean.
+     *  Bit 2 corresponds to the H dimension boolean.
+     *  Bit 3 corresponds to the W dimension boolean.
+     *  By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
+     *  there are fewer than 3 axes. For example, if the input is NCHW, the default axis is C. If the input
+     *  is NHW, then the default axis is N.
+     * 
+     *  For example, to perform softmax on axis R of a NPQRCHW input, set bit 2 with implicit batch mode,
+     *  set bit 3 with explicit batch mode.
+     * 
+     *  @param axes The axis along which softmax is computed.
+     *         Here axes is a bitmap. For example, when doing softmax along axis 0, bit 0 is set to 1, axes = 1 << axis
+     *         = 1.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAxes(@Cast("uint32_t") int axes);
+
+    /**
+     *  \brief Get the axis along which softmax occurs.
+     * 
+     *  @see setAxes()
+     *  */
+    public native @Cast("uint32_t") @NoException(true) int getAxes();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
new file mode 100644
index 00000000000..fd6cb0282b0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
@@ -0,0 +1,414 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class ITensor
+ * 
+ *  \brief A tensor in a network definition.
+ * 
+ *  To remove a tensor from a network definition, use INetworkDefinition::removeTensor().
+ * 
+ *  When using the DLA, the cumulative size of all Tensors that are not marked as Network Input or Output tensors,
+ *  must be less than 1GB in size to fit into a single subgraph. If the build option kGPU_FALLBACK is specified, then
+ *  multiple subgraphs can be created, with each subgraph limited to less than 1GB of internal tensors data.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ITensor extends INoCopy {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ITensor(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the tensor name.
+     * 
+     *  For a network input, the name is assigned by the application. For tensors which are layer outputs,
+     *  a default name is assigned consisting of the layer name followed by the index of the output in brackets.
+     * 
+     *  This method copies the name string.
+     * 
+     *  @param name The name.
+     * 
+     *  @see getName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+
+    /**
+     *  \brief Get the tensor name.
+     * 
+     *  @return The name, as a pointer to a NULL-terminated character sequence.
+     * 
+     *  @see setName()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) String getName();
+
+    /**
+     *  \brief Set the dimensions of a tensor.
+     * 
+     *  For a network input, the dimensions are assigned by the application. For a network output, the dimensions are
+     *  computed based on the layer parameters and the inputs to the layer. If a tensor size or a parameter is modified
+     *  in the network, the dimensions of all dependent tensors will be recomputed.
+     * 
+     *  This call is only legal for network input tensors, since the dimensions of layer output tensors are inferred
+     *  based on layer inputs and parameters.
+     * 
+     *  @param dimensions The dimensions of the tensor.
+     * 
+     *  @see getDimensions()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+
+    /**
+     *  \brief Get the dimensions of a tensor.
+     * 
+     *  @return The dimensions of the tensor.
+     * 
+     *  \warning getDimensions() returns a -1 for dimensions that are derived from a wildcard dimension.
+     *  @see setDimensions()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+
+    /**
+     *  \brief Set the data type of a tensor.
+     * 
+     *  @param type The data type of the tensor.
+     * 
+     *  The type is unchanged if the tensor is not a network input tensor, or marked as an output tensor or shape
+     *  output tensor.
+     * 
+     *  @see getType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setType(DataType type);
+    public native @NoException(true) void setType(@Cast("nvinfer1::DataType") int type);
+
+    /**
+     *  \brief Get the data type of a tensor.
+     * 
+     *  @return The data type of the tensor.
+     * 
+     *  @see setType()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getType();
+
+    /**
+     *  \brief Set dynamic range for the tensor
+     * 
+     *  Currently, only symmetric ranges are supported.
+     *  Therefore, the larger of the absolute values of the provided bounds is used.
+     * 
+     *  @return Whether the dynamic range was set successfully.
+     * 
+     *  Requires that min and max be finite, and min <= max.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(float min, float max);
+
+    /**
+     *  \brief Whether the tensor is a network input.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isNetworkInput();
+
+    /**
+     *  \brief Whether the tensor is a network output.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isNetworkOutput();
+
+    /**
+     *  \brief Set whether to enable broadcast of tensor across the batch.
+     * 
+     *  When a tensor is broadcast across a batch, it has the same value for every member in the batch.
+     *  Memory is only allocated once for the single member.
+     * 
+     *  This method is only valid for network input tensors, since the flags of layer output tensors are inferred based
+     *  on layer inputs and parameters.
+     *  If this state is modified for a tensor in the network, the states of all dependent tensors will be recomputed.
+     *  If the tensor is for an explicit batch network, then this function does nothing.
+     * 
+     *  \warning The broadcast flag is ignored when using explicit batch network mode.
+     * 
+     *  @param broadcastAcrossBatch Whether to enable broadcast of tensor across the batch.
+     * 
+     *  @see getBroadcastAcrossBatch()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setBroadcastAcrossBatch(@Cast("bool") boolean broadcastAcrossBatch);
+
+    /**
+     *  \brief Check if tensor is broadcast across the batch.
+     * 
+     *  When a tensor is broadcast across a batch, it has the same value for every member in the batch.
+     *  Memory is only allocated once for the single member. If the network is in explicit batch mode,
+     *  this function returns true if the leading dimension is 1.
+     * 
+     *  @return True if tensor is broadcast across the batch, false otherwise.
+     * 
+     *  @see setBroadcastAcrossBatch()
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean getBroadcastAcrossBatch();
+
+    /**
+     *  \brief Get the storage location of a tensor.
+     *  @return The location of tensor data.
+     *  @see setLocation()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) TensorLocation getLocation();
+
+    /**
+     *  \brief Set the storage location of a tensor
+     *  @param location the location of tensor data
+     * 
+     *  Only network input tensors for storing sequence lengths for RNNv2 are supported.
+     *  Using host storage for layers that do not support it will generate
+     *  errors at build time.
+     * 
+     *  @see getLocation()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setLocation(TensorLocation location);
+    public native @NoException(true) void setLocation(@Cast("nvinfer1::TensorLocation") int location);
+
+    /**
+     *  \brief Query whether dynamic range is set.
+     * 
+     *  @return True if dynamic range is set, false otherwise.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean dynamicRangeIsSet();
+
+    /**
+     *  \brief Undo effect of setDynamicRange.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void resetDynamicRange();
+
+    /**
+     *  \brief Get minimum of dynamic range.
+     * 
+     *  @return Minimum of dynamic range, or quiet NaN if range was not set.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) float getDynamicRangeMin();
+
+    /**
+     *  \brief Get maximum of dynamic range.
+     * 
+     *  @return Maximum of dynamic range, or quiet NaN if range was not set.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) float getDynamicRangeMax();
+
+    /**
+     *  \brief Set allowed formats for this tensor. By default all formats are allowed.
+     *         Shape tensors (for which isShapeTensor() returns true) may only have row major linear format.
+     * 
+     *  When running network on DLA and allowGPUFallback is disabled, if DLA format(kCHW4 with Int8, kCHW4 with
+     *  FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format with
+     *  line stride requirement. Input/output binding with these format should have correct layout during
+     *  inference.
+     * 
+     *  @param formats A bitmask of TensorFormat values that are supported for this tensor.
+     * 
+     *  @see ITensor::getAllowedFormats()
+     *  @see TensorFormats
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setAllowedFormats(@Cast("nvinfer1::TensorFormats") int formats);
+
+    /**
+     *  \brief Get a bitmask of TensorFormat values that the tensor supports.
+     *         For a shape tensor, only row major linear format is allowed.
+     * 
+     *  @return The value specified by setAllowedFormats or all possible formats.
+     * 
+     *  @see ITensor::setAllowedFormats()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("nvinfer1::TensorFormats") @NoException(true) int getAllowedFormats();
+
+    /**
+     *  \brief Whether the tensor is a shape tensor.
+     * 
+     *  A shape tensor is a tensor that is related to shape calculations.
+     *  It must be 0D or 1D, have type Int32 or Bool, and its shape must be determinable at build time.
+     *  Furthermore, it must be needed as a shape tensor, either marked as a network shape
+     *  output via markOutputForShapes(), or as an input that is required to be a shape
+     *  tensor, such as the second input to IShuffleLayer. Some layers are "polymorphic" in
+     *  this respect. For example, the inputs to IElementWiseLayer must be shape tensors
+     *  if the output is a shape tensor.
+     * 
+     *  The TensorRT Developer Guide give the formal rules for what tensors are shape tensors.
+     * 
+     *  The result of isShapeTensor() is reliable only when network construction is complete.
+     *  For example, if a partially built network sums two tensors T1 and T2 to create
+     *  tensor T3, and none are yet needed as shape tensors, isShapeTensor() returns false
+     *  for all three tensors.  Setting the second input of IShuffleLayer to be T3 would
+     *  cause all three tensors to be shape tensors, because IShuffleLayer requires that its
+     *  second optional input be a shape tensor, and IElementWiseLayer is "polymorphic".
+     * 
+     *  If a tensor is a shape tensor and becomes an engine input or output,
+     *  then ICudaEngine::isShapeBinding will be true for that tensor.
+     * 
+     *  It is possible for a tensor to be both a shape tensor and an execution tensor.
+     * 
+     *  @return True if tensor is a shape tensor, false otherwise.
+     * 
+     *  @see INetworkDefinition::markOutputForShapes(), ICudaEngine::isShapeBinding()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isShapeTensor();
+
+    /**
+     *  \brief Whether the tensor is an execution tensor.
+     * 
+     *  Tensors are usually execution tensors.  The exceptions are tensors used
+     *  solely for shape calculations or whose contents not needed to compute the outputs.
+     * 
+     *  The result of isExecutionTensor() is reliable only when network construction is complete.
+     *  For example, if a partially built network has no path from a tensor to a network output,
+     *  isExecutionTensor() returns false. Completing the path would cause it to become true.
+     * 
+     *  If a tensor is an execution tensor and becomes an engine input or output,
+     *  then ICudaEngine::isExecutionBinding will be true for that tensor.
+     * 
+     *  A tensor with isShapeTensor() == false and isExecutionTensor() == false
+     *  can still show up as an input to the engine if its dimensions are required.
+     *  In that case, only its dimensions need to be set at runtime and a nullptr
+     *  can be passed instead of a pointer to its contents.
+     *  */
+    public native @Cast("bool") @NoException(true) boolean isExecutionTensor();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
new file mode 100644
index 00000000000..0259fd2a6fa
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
@@ -0,0 +1,123 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ITimingCache
+ * 
+ *  \brief Class to handle tactic timing info collected from builder.
+ * 
+ *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
+ *  to accelerate the builder wallclock time.
+ * 
+ *  @see IBuilderConfig
+ * 
+ * 
+ *  \class ITimingCache
+ * 
+ *  \brief Class to handle tactic timing info collected from builder.
+ * 
+ *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
+ *  to accelerate the builder wallclock time.
+ * 
+ *  @see IBuilderConfig
+ * 
+ * 
+ *  \class ITimingCache
+ * 
+ *  \brief Class to handle tactic timing info collected from builder.
+ * 
+ *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
+ *  to accelerate the builder wallclock time.
+ * 
+ *  @see IBuilderConfig
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ITimingCache extends INoCopy {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public ITimingCache() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public ITimingCache(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ITimingCache(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public ITimingCache position(long position) {
+        return (ITimingCache)super.position(position);
+    }
+    @Override public ITimingCache getPointer(long i) {
+        return new ITimingCache((Pointer)this).offsetAddress(i);
+    }
+
+
+    /**
+     *  \brief Serialize a timing cache to IHostMemory object.
+     * 
+     *  This function allows serialization of current timing cache.
+     * 
+     *  @return A pointer to a IHostMemory object that contains a serialized timing cache.
+     * 
+     *  @see IHostMemory
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IHostMemory serialize();
+
+    /**
+     *  \brief Combine input timing cache into local instance.
+     * 
+     *  This function allows combining entries in the input timing cache to local cache object.
+     * 
+     *  @param inputCache The input timing cache.
+     *  @param ignoreMismatch Whether or not to allow cache verification header mismatch.
+     * 
+     *  @return True if combined successfully, false otherwise.
+     * 
+     *  Append entries in input cache to local cache. Conflicting entries will be skipped
+     *  The input cache must be generated by a TensorRT build of exact same version, otherwise
+     *  combine will be skipped and return false.
+     *  ignoreMismatch must be set to true if combining a timing cache created from a
+     *  different device.
+     * 
+     *  \warning Combining caches generated from devices with different device properties may
+     *           lead to functional/performance bugs!
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean combine(@Const @ByRef ITimingCache inputCache, @Cast("bool") boolean ignoreMismatch);
+
+    /**
+     *  \brief Empty the timing cache
+     * 
+     *  @return True if reset successfully, false otherwise.
+     *  */
+    public native @Cast("bool") @NoException(true) boolean reset();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
new file mode 100644
index 00000000000..d593f4b8bd1
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
@@ -0,0 +1,105 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class ITopKLayer
+ * 
+ *  \brief Layer that represents a TopK reduction.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ITopKLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ITopKLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the operation for the layer.
+     * 
+     *  @see getOperation(), TopKOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOperation(TopKOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::TopKOperation") int op);
+
+    /**
+     *  \brief Get the operation for the layer.
+     * 
+     *  @see setOperation(), TopKOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) TopKOperation getOperation();
+
+    /**
+     *  \brief Set the k value for the layer.
+     * 
+     *  Currently only values up to 3840 are supported.
+     * 
+     *  @see getK()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setK(int k);
+
+    /**
+     *  \brief Get the k value for the layer.
+     * 
+     *  @see setK()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) int getK();
+
+    /**
+     *  \brief Set which axes to reduce for the layer.
+     * 
+     *  @see getReduceAxes()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
+
+    /**
+     *  \brief Get the axes to reduce for the layer.
+     * 
+     *  @see setReduceAxes()
+     *  */
+    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
new file mode 100644
index 00000000000..34bd0b3683c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
@@ -0,0 +1,29 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class ITripLimitLayer extends ILoopBoundaryLayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ITripLimitLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) TripLimit getTripLimit();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
new file mode 100644
index 00000000000..3a9a335de5a
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class IUnaryLayer
+ * 
+ *  \brief Layer that represents an unary operation.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class IUnaryLayer extends ILayer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IUnaryLayer(Pointer p) { super(p); }
+
+    /**
+     *  \brief Set the unary operation for the layer.
+     * 
+     *  @see getOperation(), UnaryOperation
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) void setOperation(UnaryOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::UnaryOperation") int op);
+
+    /**
+     *  \brief Get the unary operation for the layer.
+     * 
+     *  @see setOperation(), UnaryOperation
+     *  */
+    public native @NoException(true) UnaryOperation getOperation();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
new file mode 100644
index 00000000000..8c0ab84d5b8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Permutation extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public Permutation() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Permutation(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Permutation(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public Permutation position(long position) {
+        return (Permutation)super.position(position);
+    }
+    @Override public Permutation getPointer(long i) {
+        return new Permutation((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  The elements of the permutation.
+     *  The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
+     *  permute from CHW order to HWC order, the required permutation is [1, 2, 0], and to permute
+     *  from HWC to CHW, the required permutation is [2, 0, 1].
+     *  */
+    public native int order(int i); public native Permutation order(int i, int setter);
+    @MemberGetter public native IntPointer order();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
new file mode 100644
index 00000000000..5e09fbc5198
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
@@ -0,0 +1,81 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \class PluginField
+ * 
+ *  \brief Structure containing plugin attribute field names and associated data
+ *  This information can be parsed to decode necessary plugin metadata
+ * 
+ *  */
+@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class PluginField extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public PluginField(Pointer p) { super(p); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public PluginField(long size) { super((Pointer)null); allocateArray(size); }
+    private native void allocateArray(long size);
+    @Override public PluginField position(long position) {
+        return (PluginField)super.position(position);
+    }
+    @Override public PluginField getPointer(long i) {
+        return new PluginField((Pointer)this).offsetAddress(i);
+    }
+
+    /**
+     *  \brief Plugin field attribute name
+     *  */
+    
+    //!
+    //!
+    public native String name(); public native PluginField name(String setter);
+    /**
+     *  \brief Plugin field attribute data
+     *  */
+    
+    //!
+    //!
+    public native @Const Pointer data(); public native PluginField data(Pointer setter);
+    /**
+     *  \brief Plugin field attribute type
+     *  @see PluginFieldType
+     *  */
+    
+    //!
+    //!
+    public native PluginFieldType type(); public native PluginField type(PluginFieldType setter);
+    /**
+     *  \brief Number of data entries in the Plugin attribute
+     *  */
+    public native int length(); public native PluginField length(int setter);
+
+    public PluginField(String name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
+            PluginFieldType type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/) { super((Pointer)null); allocate(name_, data_, type_, length_); }
+    @NoException(true) private native void allocate(String name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
+            PluginFieldType type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/);
+    public PluginField() { super((Pointer)null); allocate(); }
+    @NoException(true) private native void allocate();
+    public PluginField(@Cast("const char*") BytePointer name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
+            @Cast("nvinfer1::PluginFieldType") int type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/) { super((Pointer)null); allocate(name_, data_, type_, length_); }
+    @NoException(true) private native void allocate(@Cast("const char*") BytePointer name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
+            @Cast("nvinfer1::PluginFieldType") int type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
new file mode 100644
index 00000000000..504b941791e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
@@ -0,0 +1,45 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/** Plugin field collection struct. */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class PluginFieldCollection extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public PluginFieldCollection() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public PluginFieldCollection(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public PluginFieldCollection(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public PluginFieldCollection position(long position) {
+        return (PluginFieldCollection)super.position(position);
+    }
+    @Override public PluginFieldCollection getPointer(long i) {
+        return new PluginFieldCollection((Pointer)this).offsetAddress(i);
+    }
+
+    /** Number of PluginField entries. */
+    public native int nbFields(); public native PluginFieldCollection nbFields(int setter);
+    /** Pointer to PluginField entries. */
+    public native @Const PluginField fields(); public native PluginFieldCollection fields(PluginField setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
new file mode 100644
index 00000000000..e34ac288bf8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
@@ -0,0 +1,58 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/** \struct PluginTensorDesc
+ * 
+ *  \brief Fields that a plugin might see for an input or output.
+ * 
+ *  Scale is only valid when data type is DataType::kINT8. TensorRT will set
+ *  the value to -1.0f if it is invalid.
+ * 
+ *  @see IPluginV2IOExt::supportsFormatCombination
+ *  @see IPluginV2IOExt::configurePlugin
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class PluginTensorDesc extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public PluginTensorDesc() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public PluginTensorDesc(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public PluginTensorDesc(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public PluginTensorDesc position(long position) {
+        return (PluginTensorDesc)super.position(position);
+    }
+    @Override public PluginTensorDesc getPointer(long i) {
+        return new PluginTensorDesc((Pointer)this).offsetAddress(i);
+    }
+
+    /** Dimensions. */
+    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 dims(); public native PluginTensorDesc dims(Dims32 setter);
+    /** \warning DataType:kBOOL not supported. */
+    public native DataType type(); public native PluginTensorDesc type(DataType setter);
+    /** Tensor format. */
+    public native TensorFormat format(); public native PluginTensorDesc format(TensorFormat setter);
+    /** Scale for INT8 data type. */
+    public native float scale(); public native PluginTensorDesc scale(float setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
new file mode 100644
index 00000000000..eb723e73d62
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VActivationLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VActivationLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setActivationType(ActivationType type);
+    public native @NoException(true) void setActivationType(@Cast("nvinfer1::ActivationType") int type);
+    public native @NoException(true) ActivationType getActivationType();
+    public native @NoException(true) void setAlpha(float alpha);
+    public native @NoException(true) void setBeta(float beta);
+    public native @NoException(true) float getAlpha();
+    public native @NoException(true) float getBeta();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
new file mode 100644
index 00000000000..e95bb61ccdf
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
@@ -0,0 +1,33 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VAlgorithm extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VAlgorithm(Pointer p) { super(p); }
+
+    public native @Const @ByRef @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfo(int index);
+    public native @Const @ByRef @NoException(true) IAlgorithmVariant getAlgorithmVariant();
+    public native @NoException(true) float getTimingMSec();
+    public native @Cast("std::size_t") @NoException(true) long getWorkspaceSize();
+    public native @Const @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfoByIndex(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
new file mode 100644
index 00000000000..492cee4c440
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
@@ -0,0 +1,33 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VAlgorithmContext extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VAlgorithmContext(Pointer p) { super(p); }
+
+    public native @NoException(true) String getName();
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, @Cast("nvinfer1::OptProfileSelector") int select);
+    public native @NoException(true) int getNbInputs();
+    public native @NoException(true) int getNbOutputs();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
new file mode 100644
index 00000000000..5efc7c65c44
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VAlgorithmIOInfo extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VAlgorithmIOInfo(Pointer p) { super(p); }
+
+    public native @NoException(true) TensorFormat getTensorFormat();
+    public native @NoException(true) DataType getDataType();
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
new file mode 100644
index 00000000000..f788a26a219
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VAlgorithmVariant extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VAlgorithmVariant(Pointer p) { super(p); }
+
+    public native @Cast("int64_t") @NoException(true) long getImplementation();
+    public native @Cast("int64_t") @NoException(true) long getTactic();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
new file mode 100644
index 00000000000..f3f5c2b5fc8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
@@ -0,0 +1,45 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VBuilder extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VBuilder(Pointer p) { super(p); }
+
+    public native @NoException(true) void setMaxBatchSize(int batchSize);
+    public native @NoException(true) int getMaxBatchSize();
+    public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
+    public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
+    public native @NoException(true) int getMaxDLABatchSize();
+    public native @NoException(true) int getNbDLACores();
+    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
+    public native @NoException(true) IBuilderConfig createBuilderConfig();
+    public native @NoException(true) ICudaEngine buildEngineWithConfig(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
+    public native @NoException(true) INetworkDefinition createNetworkV2(@Cast("nvinfer1::NetworkDefinitionCreationFlags") int flags);
+    public native @NoException(true) IOptimizationProfile createOptimizationProfile();
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+    public native @NoException(true) void reset();
+    public native @Cast("bool") @NoException(true) boolean platformHasTf32();
+    public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
+    public native @Cast("bool") @NoException(true) boolean isNetworkSupported(@Const @ByRef INetworkDefinition network, @Const @ByRef IBuilderConfig config);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
new file mode 100644
index 00000000000..c7dbdfb63bd
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
@@ -0,0 +1,83 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VBuilderConfig extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VBuilderConfig(Pointer p) { super(p); }
+
+    public native @NoException(true) void setMinTimingIterations(int minTiming);
+    public native @NoException(true) int getMinTimingIterations();
+    public native @NoException(true) void setAvgTimingIterations(int avgTiming);
+    public native @NoException(true) int getAvgTimingIterations();
+    public native @NoException(true) void setEngineCapability(EngineCapability capability);
+    public native @NoException(true) void setEngineCapability(@Cast("nvinfer1::EngineCapability") int capability);
+    public native @NoException(true) EngineCapability getEngineCapability();
+    public native @NoException(true) void setInt8Calibrator(IInt8Calibrator calibrator);
+    public native @NoException(true) IInt8Calibrator getInt8Calibrator();
+    public native @NoException(true) void setMaxWorkspaceSize(@Cast("std::size_t") long workspaceSize);
+    public native @Cast("std::size_t") @NoException(true) long getMaxWorkspaceSize();
+    public native @NoException(true) void setFlags(@Cast("nvinfer1::BuilderFlags") int builderFlags);
+    public native @Cast("nvinfer1::BuilderFlags") @NoException(true) int getFlags();
+    public native @NoException(true) void clearFlag(BuilderFlag builderFlag);
+    public native @NoException(true) void clearFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+    public native @NoException(true) void setFlag(BuilderFlag builderFlag);
+    public native @NoException(true) void setFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+    public native @Cast("bool") @NoException(true) boolean getFlag(BuilderFlag builderFlag);
+    public native @Cast("bool") @NoException(true) boolean getFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
+    public native @NoException(true) void setDeviceType(@Const ILayer layer, DeviceType deviceType);
+    public native @NoException(true) void setDeviceType(@Const ILayer layer, @Cast("nvinfer1::DeviceType") int deviceType);
+    public native @NoException(true) DeviceType getDeviceType(@Const ILayer layer);
+    public native @Cast("bool") @NoException(true) boolean isDeviceTypeSet(@Const ILayer layer);
+    public native @NoException(true) void resetDeviceType(@Const ILayer layer);
+    public native @Cast("bool") @NoException(true) boolean canRunOnDLA(@Const ILayer layer);
+    public native @NoException(true) void setDLACore(int dlaCore);
+    public native @NoException(true) int getDLACore();
+    public native @NoException(true) void setDefaultDeviceType(DeviceType deviceType);
+    public native @NoException(true) void setDefaultDeviceType(@Cast("nvinfer1::DeviceType") int deviceType);
+    public native @NoException(true) DeviceType getDefaultDeviceType();
+    public native @NoException(true) void reset();
+    public native @NoException(true) void setProfileStream(CUstream_st stream);
+    public native @NoException(true) CUstream_st getProfileStream();
+    public native @NoException(true) int addOptimizationProfile(@Const IOptimizationProfile profile);
+    public native @NoException(true) int getNbOptimizationProfiles();
+    public native @NoException(true) void setProfilingVerbosity(ProfilingVerbosity verbosity);
+    public native @NoException(true) void setProfilingVerbosity(@Cast("nvinfer1::ProfilingVerbosity") int verbosity);
+    public native @NoException(true) ProfilingVerbosity getProfilingVerbosity();
+    public native @NoException(true) void setAlgorithmSelector(IAlgorithmSelector selector);
+    public native @NoException(true) IAlgorithmSelector getAlgorithmSelector();
+    public native @Cast("bool") @NoException(true) boolean setCalibrationProfile(@Const IOptimizationProfile profile);
+    public native @Const @NoException(true) IOptimizationProfile getCalibrationProfile();
+    public native @NoException(true) void setQuantizationFlags(@Cast("nvinfer1::QuantizationFlags") int flags);
+    public native @Cast("nvinfer1::QuantizationFlags") @NoException(true) int getQuantizationFlags();
+    public native @NoException(true) void clearQuantizationFlag(QuantizationFlag flag);
+    public native @NoException(true) void clearQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+    public native @NoException(true) void setQuantizationFlag(QuantizationFlag flag);
+    public native @NoException(true) void setQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(QuantizationFlag flag);
+    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
+    public native @Cast("bool") @NoException(true) boolean setTacticSources(@Cast("nvinfer1::TacticSources") int tacticSources);
+    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
+    public native @NoException(true) ITimingCache createTimingCache(@Const Pointer blob, @Cast("std::size_t") long size);
+    public native @Cast("bool") @NoException(true) boolean setTimingCache(@Const @ByRef ITimingCache cache, @Cast("bool") boolean ignoreMismatch);
+    public native @Const @NoException(true) ITimingCache getTimingCache();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
new file mode 100644
index 00000000000..533ed40a161
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VConcatenationLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VConcatenationLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setAxis(int axis);
+    public native @NoException(true) int getAxis();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
new file mode 100644
index 00000000000..c8f2ae9c965
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
@@ -0,0 +1,32 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VConstantLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VConstantLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getWeights();
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
new file mode 100644
index 00000000000..dd23ae4aef2
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
@@ -0,0 +1,59 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VConvolutionLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VConvolutionLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
+    public native @ByVal @NoException(true) DimsHW getKernelSize();
+    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
+    public native @NoException(true) int getNbOutputMaps();
+    public native @NoException(true) void setStride(@ByVal DimsHW stride);
+    public native @ByVal @NoException(true) DimsHW getStride();
+    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
+    public native @ByVal @NoException(true) DimsHW getPadding();
+    public native @NoException(true) void setNbGroups(int nbGroups);
+    public native @NoException(true) int getNbGroups();
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+    public native @NoException(true) void setDilation(@ByVal DimsHW dilation);
+    public native @ByVal @NoException(true) DimsHW getDilation();
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+    public native @NoException(true) PaddingMode getPaddingMode();
+    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
new file mode 100644
index 00000000000..a4122e055c3
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
@@ -0,0 +1,63 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VCudaEngine extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VCudaEngine(Pointer p) { super(p); }
+
+    public native @NoException(true) int getNbBindings();
+    public native @NoException(true) int getBindingIndex(String name);
+    public native @NoException(true) int getBindingIndex(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getBindingName(int bindingIndex);
+    public native @Cast("bool") @NoException(true) boolean bindingIsInput(int bindingIndex);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
+    public native @NoException(true) DataType getBindingDataType(int bindingIndex);
+    public native @NoException(true) int getMaxBatchSize();
+    public native @NoException(true) int getNbLayers();
+    public native @NoException(true) IHostMemory serialize();
+    public native @NoException(true) IExecutionContext createExecutionContext();
+    public native @NoException(true) TensorLocation getLocation(int bindingIndex);
+    public native @NoException(true) IExecutionContext createExecutionContextWithoutDeviceMemory();
+    public native @Cast("size_t") @NoException(true) long getDeviceMemorySize();
+    public native @Cast("bool") @NoException(true) boolean isRefittable();
+    public native @NoException(true) int getBindingBytesPerComponent(int bindingIndex);
+    public native @NoException(true) int getBindingComponentsPerElement(int bindingIndex);
+    public native @NoException(true) TensorFormat getBindingFormat(int bindingIndex);
+    public native @NoException(true) String getBindingFormatDesc(int bindingIndex);
+    public native @NoException(true) int getBindingVectorizedDim(int bindingIndex);
+    public native @NoException(true) String getName();
+    public native @NoException(true) int getNbOptimizationProfiles();
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, @Cast("nvinfer1::OptProfileSelector") int select);
+    public native @Const @NoException(true) IntPointer getProfileShapeValues(
+            int profileIndex, int inputIndex, OptProfileSelector select);
+    public native @Const @NoException(true) IntBuffer getProfileShapeValues(
+            int profileIndex, int inputIndex, @Cast("nvinfer1::OptProfileSelector") int select);
+    public native @Cast("bool") @NoException(true) boolean isShapeBinding(int bindingIndex);
+    public native @Cast("bool") @NoException(true) boolean isExecutionBinding(int bindingIndex);
+    public native @NoException(true) EngineCapability getEngineCapability();
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
+    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
new file mode 100644
index 00000000000..8db00f02e62
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
@@ -0,0 +1,57 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VDeconvolutionLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VDeconvolutionLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
+    public native @ByVal @NoException(true) DimsHW getKernelSize();
+    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
+    public native @NoException(true) int getNbOutputMaps();
+    public native @NoException(true) void setStride(@ByVal DimsHW stride);
+    public native @ByVal @NoException(true) DimsHW getStride();
+    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
+    public native @ByVal @NoException(true) DimsHW getPadding();
+    public native @NoException(true) void setNbGroups(int nbGroups);
+    public native @NoException(true) int getNbGroups();
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+    public native @NoException(true) PaddingMode getPaddingMode();
+    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
new file mode 100644
index 00000000000..fb9889da7a9
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VDequantizeLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VDequantizeLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) int getAxis();
+    public native @NoException(true) void setAxis(int axis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
new file mode 100644
index 00000000000..dca88dec6cb
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VDimensionExpr extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VDimensionExpr(Pointer p) { super(p); }
+
+    public native @Cast("bool") boolean isConstant();
+    public native int getConstantValue();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
new file mode 100644
index 00000000000..22f02ebcf50
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VElementWiseLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VElementWiseLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOperation(ElementWiseOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::ElementWiseOperation") int op);
+    public native @NoException(true) ElementWiseOperation getOperation();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
new file mode 100644
index 00000000000..d8422f1a895
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
@@ -0,0 +1,63 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VExecutionContext extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VExecutionContext(Pointer p) { super(p); }
+
+    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") PointerPointer bindings);
+    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings);
+    public native @Cast("bool") @NoException(true) boolean enqueue(
+            int batchSize, @Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @Cast("bool") @NoException(true) boolean enqueue(
+            int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @NoException(true) void setDebugSync(@Cast("bool") boolean sync);
+    public native @Cast("bool") @NoException(true) boolean getDebugSync();
+    public native @NoException(true) void setProfiler(IProfiler arg0);
+    public native @NoException(true) IProfiler getProfiler();
+    public native @Const @ByRef @NoException(true) ICudaEngine getEngine();
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getName();
+    public native @NoException(true) void setDeviceMemory(Pointer memory);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides(int bindingIndex);
+    public native @Cast("bool") @NoException(true) boolean setOptimizationProfile(int profileIndex);
+    public native @NoException(true) int getOptimizationProfile();
+    public native @Cast("bool") @NoException(true) boolean setBindingDimensions(int bindingIndex, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntPointer data);
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntBuffer data);
+    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const int[] data);
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntPointer data);
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntBuffer data);
+    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, int[] data);
+    public native @Cast("bool") @NoException(true) boolean allInputDimensionsSpecified();
+    public native @Cast("bool") @NoException(true) boolean allInputShapesSpecified();
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") PointerPointer bindings);
+    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") @ByPtrPtr Pointer bindings);
+    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
+    public native @Cast("bool") @NoException(true) boolean setOptimizationProfileAsync(int profileIndex, CUstream_st stream);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
new file mode 100644
index 00000000000..a71ca934abb
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
@@ -0,0 +1,33 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VExprBuilder extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VExprBuilder(Pointer p) { super(p); }
+
+    public native @Const IDimensionExpr constant(int value);
+    public native @Const IDimensionExpr operation(
+            DimensionOperation op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
+    public native @Const IDimensionExpr operation(
+            @Cast("nvinfer1::DimensionOperation") int op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
new file mode 100644
index 00000000000..5d1bb1e60c8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
@@ -0,0 +1,37 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VFillLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VFillLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+    public native @NoException(true) void setOperation(FillOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::FillOperation") int op);
+    public native @NoException(true) FillOperation getOperation();
+    public native @NoException(true) void setAlpha(double alpha);
+    public native @NoException(true) double getAlpha();
+    public native @NoException(true) void setBeta(double beta);
+    public native @NoException(true) double getBeta();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
new file mode 100644
index 00000000000..4113e407b10
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
@@ -0,0 +1,34 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VFullyConnectedLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VFullyConnectedLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setNbOutputChannels(int nbOutputs);
+    public native @NoException(true) int getNbOutputChannels();
+    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getKernelWeights();
+    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getBiasWeights();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
new file mode 100644
index 00000000000..a624cf5335a
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
@@ -0,0 +1,32 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VGatherLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VGatherLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setGatherAxis(int axis);
+    public native @NoException(true) int getGatherAxis();
+    public native @NoException(true) void setNbElementWiseDims(int k);
+    public native @NoException(true) int getNbElementWiseDims();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
new file mode 100644
index 00000000000..c1532e6c9d7
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VHostMemory extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VHostMemory(Pointer p) { super(p); }
+
+    public native @NoException(true) Pointer data();
+    public native @Cast("std::size_t") @NoException(true) long size();
+    public native @NoException(true) DataType type();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
new file mode 100644
index 00000000000..1d7f8c0440c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VIdentityLayer extends VRoot {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VIdentityLayer() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VIdentityLayer(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VIdentityLayer(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VIdentityLayer position(long position) {
+        return (VIdentityLayer)super.position(position);
+    }
+    @Override public VIdentityLayer getPointer(long i) {
+        return new VIdentityLayer((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
new file mode 100644
index 00000000000..50bf6c92357
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
@@ -0,0 +1,32 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VIteratorLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VIteratorLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setAxis(int axis);
+    public native @NoException(true) int getAxis();
+    public native @NoException(true) void setReverse(@Cast("bool") boolean reverse);
+    public native @Cast("bool") @NoException(true) boolean getReverse();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
new file mode 100644
index 00000000000..6d65b4b3af0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
@@ -0,0 +1,36 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VLRNLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VLRNLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setWindowSize(int windowSize);
+    public native @NoException(true) int getWindowSize();
+    public native @NoException(true) void setAlpha(float alpha);
+    public native @NoException(true) float getAlpha();
+    public native @NoException(true) void setBeta(float beta);
+    public native @NoException(true) float getBeta();
+    public native @NoException(true) void setK(float k);
+    public native @NoException(true) float getK();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
new file mode 100644
index 00000000000..f3b92ef20df
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
@@ -0,0 +1,46 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) LayerType getType();
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getName();
+    public native @NoException(true) int getNbInputs();
+    public native @NoException(true) ITensor getInput(int index);
+    public native @NoException(true) int getNbOutputs();
+    public native @NoException(true) ITensor getOutput(int index);
+    public native @NoException(true) void setInput(int index, @ByRef ITensor tensor);
+    public native @NoException(true) void setPrecision(DataType dataType);
+    public native @NoException(true) void setPrecision(@Cast("nvinfer1::DataType") int dataType);
+    public native @NoException(true) DataType getPrecision();
+    public native @Cast("bool") @NoException(true) boolean precisionIsSet();
+    public native @NoException(true) void resetPrecision();
+    public native @NoException(true) void setOutputType(int index, DataType dataType);
+    public native @NoException(true) void setOutputType(int index, @Cast("nvinfer1::DataType") int dataType);
+    public native @NoException(true) DataType getOutputType(int index);
+    public native @Cast("bool") @NoException(true) boolean outputTypeIsSet(int index);
+    public native @NoException(true) void resetOutputType(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
new file mode 100644
index 00000000000..fde95340760
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
@@ -0,0 +1,39 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VLoop extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VLoop(Pointer p) { super(p); }
+
+    public native @NoException(true) IRecurrenceLayer addRecurrence(@ByRef ITensor initialValue);
+    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, TripLimit _limit);
+    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, @Cast("nvinfer1::TripLimit") int _limit);
+    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor, int axis/*=0*/, @Cast("bool") boolean reverse/*=false*/);
+    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind, int axis/*=0*/);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind, int axis/*=0*/);
+    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind);
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getName();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
new file mode 100644
index 00000000000..cba6b945a82
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
@@ -0,0 +1,29 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VLoopBoundaryLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VLoopBoundaryLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) ILoop getLoop();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
new file mode 100644
index 00000000000..f4c1c6d5ed6
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VLoopOutputLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VLoopOutputLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) LoopOutput getLoopOutput();
+    public native @NoException(true) void setAxis(int axis);
+    public native @NoException(true) int getAxis();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
new file mode 100644
index 00000000000..2adddb25764
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VMatrixMultiplyLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VMatrixMultiplyLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOperation(int index, MatrixOperation op);
+    public native @NoException(true) void setOperation(int index, @Cast("nvinfer1::MatrixOperation") int op);
+    public native @NoException(true) MatrixOperation getOperation(int index);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
new file mode 100644
index 00000000000..c5344534126
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
@@ -0,0 +1,113 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VNetworkDefinition extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VNetworkDefinition(Pointer p) { super(p); }
+
+    public native @NoException(true) ITensor addInput(String name, DataType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @NoException(true) ITensor addInput(@Cast("const char*") BytePointer name, @Cast("nvinfer1::DataType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @NoException(true) void markOutput(@ByRef ITensor tensor);
+    public native @NoException(true) IConvolutionLayer addConvolution(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+    public native @NoException(true) IFullyConnectedLayer addFullyConnected(
+            @ByRef ITensor input, int nbOutputs, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, ActivationType type);
+    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, @Cast("nvinfer1::ActivationType") int type);
+    public native @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, PoolingType type, @ByVal DimsHW windowSize);
+    public native @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal DimsHW windowSize);
+    public native @NoException(true) ILRNLayer addLRN(@ByRef ITensor input, int window, float alpha, float beta, float k);
+    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
+    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
+    public native @NoException(true) ISoftMaxLayer addSoftMax(@ByRef ITensor input);
+    public native @NoException(true) IConcatenationLayer addConcatenation(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs);
+    public native @NoException(true) IConcatenationLayer addConcatenation(@ByPtrPtr ITensor inputs, int nbInputs);
+    public native @NoException(true) IDeconvolutionLayer addDeconvolution(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, ElementWiseOperation op);
+    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, @Cast("nvinfer1::ElementWiseOperation") int op);
+    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, UnaryOperation operation);
+    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, @Cast("nvinfer1::UnaryOperation") int operation);
+    public native @NoException(true) IPaddingLayer addPadding(@ByRef ITensor input, @ByVal DimsHW prePadding, @ByVal DimsHW postPadding);
+    public native @NoException(true) IShuffleLayer addShuffle(@ByRef ITensor input);
+    public native @NoException(true) int getNbLayers();
+    public native @NoException(true) ILayer getLayer(int index);
+    public native @NoException(true) int getNbInputs();
+    public native @NoException(true) ITensor getInput(int index);
+    public native @NoException(true) int getNbOutputs();
+    public native @NoException(true) ITensor getOutput(int index);
+    public native @NoException(true) IReduceLayer addReduce(
+            @ByRef ITensor input, ReduceOperation operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
+    public native @NoException(true) IReduceLayer addReduce(
+            @ByRef ITensor input, @Cast("nvinfer1::ReduceOperation") int operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
+    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, TopKOperation op, int k, @Cast("uint32_t") int reduceAxes);
+    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, @Cast("nvinfer1::TopKOperation") int op, int k, @Cast("uint32_t") int reduceAxes);
+    public native @NoException(true) IGatherLayer addGather(@ByRef ITensor data, @ByRef ITensor indices, int axis);
+    public native @NoException(true) IRaggedSoftMaxLayer addRaggedSoftMax(@ByRef ITensor input, @ByRef ITensor bounds);
+    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
+            @ByRef ITensor input0, MatrixOperation op0, @ByRef ITensor input1, MatrixOperation op1);
+    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
+            @ByRef ITensor input0, @Cast("nvinfer1::MatrixOperation") int op0, @ByRef ITensor input1, @Cast("nvinfer1::MatrixOperation") int op1);
+    public native @NoException(true) IConstantLayer addConstant(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @ByVal Weights weights);
+    public native @NoException(true) IRNNv2Layer addRNNv2(
+            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, RNNOperation op);
+    public native @NoException(true) IRNNv2Layer addRNNv2(
+            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, @Cast("nvinfer1::RNNOperation") int op);
+    public native @NoException(true) IIdentityLayer addIdentity(@ByRef ITensor input);
+    public native @NoException(true) void removeTensor(@ByRef ITensor tensor);
+    public native @NoException(true) void unmarkOutput(@ByRef ITensor tensor);
+    public native @NoException(true) IPluginV2Layer addPluginV2(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs, @ByRef IPluginV2 plugin);
+    public native @NoException(true) IPluginV2Layer addPluginV2(@ByPtrPtr ITensor inputs, int nbInputs, @ByRef IPluginV2 plugin);
+    public native @NoException(true) ISliceLayer addSlice(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 start, @ByVal @Cast("nvinfer1::Dims*") Dims32 size, @ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getName();
+    public native @NoException(true) IShapeLayer addShape(@ByRef ITensor input);
+    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
+    public native @Cast("bool") @NoException(true) boolean markOutputForShapes(@ByRef ITensor tensor);
+    public native @Cast("bool") @NoException(true) boolean unmarkOutputForShapes(@ByRef ITensor tensor);
+    public native @NoException(true) IParametricReLULayer addParametricReLU(@ByRef ITensor input, @ByRef ITensor slope);
+    public native @NoException(true) IConvolutionLayer addConvolutionNd(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, PoolingType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+    public native @NoException(true) IDeconvolutionLayer addDeconvolutionNd(
+            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
+    public native @NoException(true) IScaleLayer addScaleNd(
+            @ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
+    public native @NoException(true) IScaleLayer addScaleNd(
+            @ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
+    public native @NoException(true) IResizeLayer addResize(@ByRef ITensor input);
+    public native @Cast("bool") @NoException(true) boolean hasExplicitPrecision();
+    public native @NoException(true) ILoop addLoop();
+    public native @NoException(true) ISelectLayer addSelect(@ByRef ITensor condition, @ByRef ITensor thenInput, @ByRef ITensor elseInput);
+    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, FillOperation op);
+    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @Cast("nvinfer1::FillOperation") int op);
+    public native @NoException(true) IPaddingLayer addPaddingNd(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 prePadding, @ByVal @Cast("nvinfer1::Dims*") Dims32 postPadding);
+    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, String name);
+    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, @Cast("const char*") BytePointer name);
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+    public native @NoException(true) IDequantizeLayer addDequantize(@ByRef ITensor input, @ByRef ITensor scale);
+    public native @NoException(true) IQuantizeLayer addQuantize(@ByRef ITensor input, @ByRef ITensor scale);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
new file mode 100644
index 00000000000..9a3302f4610
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
@@ -0,0 +1,51 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VOptimizationProfile extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VOptimizationProfile(Pointer p) { super(p); }
+
+    public native @Cast("bool") @NoException(true) boolean setDimensions(String inputName, OptProfileSelector select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
+    public native @Cast("bool") @NoException(true) boolean setDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(String inputName, OptProfileSelector select);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const IntPointer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntBuffer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const int[] values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntPointer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            String inputName, OptProfileSelector select, @Const IntBuffer values, int nbValues);
+    public native @Cast("bool") @NoException(true) boolean setShapeValues(
+            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const int[] values, int nbValues);
+    public native @NoException(true) int getNbShapeValues(String inputName);
+    public native @NoException(true) int getNbShapeValues(@Cast("const char*") BytePointer inputName);
+    public native @Const @NoException(true) IntPointer getShapeValues(String inputName, OptProfileSelector select);
+    public native @Const @NoException(true) IntBuffer getShapeValues(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
+    public native @Cast("bool") @NoException(true) boolean setExtraMemoryTarget(float target);
+    public native @NoException(true) float getExtraMemoryTarget();
+    public native @Cast("bool") @NoException(true) boolean isValid();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
new file mode 100644
index 00000000000..bfdc87e0c17
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
@@ -0,0 +1,36 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VPaddingLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VPaddingLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setPrePadding(@ByVal DimsHW padding);
+    public native @ByVal @NoException(true) DimsHW getPrePadding();
+    public native @NoException(true) void setPostPadding(@ByVal DimsHW padding);
+    public native @ByVal @NoException(true) DimsHW getPostPadding();
+    public native @NoException(true) void setPrePaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePaddingNd();
+    public native @NoException(true) void setPostPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPaddingNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
new file mode 100644
index 00000000000..c612b39358d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VParametricReLULayer extends VRoot {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VParametricReLULayer() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VParametricReLULayer(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VParametricReLULayer(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VParametricReLULayer position(long position) {
+        return (VParametricReLULayer)super.position(position);
+    }
+    @Override public VParametricReLULayer getPointer(long i) {
+        return new VParametricReLULayer((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
new file mode 100644
index 00000000000..ce3fd8b65c9
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
@@ -0,0 +1,29 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VPluginLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VPluginLayer(Pointer p) { super(p); }
+
+    public native @ByRef @NoException(true) IPlugin getPlugin();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
new file mode 100644
index 00000000000..a37c9691fd2
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
@@ -0,0 +1,29 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VPluginV2Layer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VPluginV2Layer(Pointer p) { super(p); }
+
+    public native @ByRef @NoException(true) IPluginV2 getPlugin();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
new file mode 100644
index 00000000000..36020d02e1b
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VPoolingLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VPoolingLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setPoolingType(PoolingType type);
+    public native @NoException(true) void setPoolingType(@Cast("nvinfer1::PoolingType") int type);
+    public native @NoException(true) PoolingType getPoolingType();
+    public native @NoException(true) void setWindowSize(@ByVal DimsHW windowSize);
+    public native @ByVal @NoException(true) DimsHW getWindowSize();
+    public native @NoException(true) void setStride(@ByVal DimsHW stride);
+    public native @ByVal @NoException(true) DimsHW getStride();
+    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
+    public native @ByVal @NoException(true) DimsHW getPadding();
+    public native @NoException(true) void setBlendFactor(float blendFactor);
+    public native @NoException(true) float getBlendFactor();
+    public native @NoException(true) void setAverageCountExcludesPadding(@Cast("bool") boolean exclusive);
+    public native @Cast("bool") @NoException(true) boolean getAverageCountExcludesPadding();
+    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
+    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
+    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
+    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
+    public native @NoException(true) PaddingMode getPaddingMode();
+    public native @NoException(true) void setWindowSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getWindowSizeNd();
+    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
+    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
new file mode 100644
index 00000000000..6a8e3b83a7c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VQuantizeLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VQuantizeLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) int getAxis();
+    public native @NoException(true) void setAxis(int axis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
new file mode 100644
index 00000000000..f13567922da
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
@@ -0,0 +1,55 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRNNv2Layer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRNNv2Layer(Pointer p) { super(p); }
+
+    public native @NoException(true) int getLayerCount();
+    public native @NoException(true) int getHiddenSize();
+    public native @NoException(true) int getMaxSeqLength();
+    public native @NoException(true) int getDataLength();
+    public native @NoException(true) void setSequenceLengths(@ByRef ITensor seqLengths);
+    public native @NoException(true) ITensor getSequenceLengths();
+    public native @NoException(true) void setOperation(RNNOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::RNNOperation") int op);
+    public native @NoException(true) RNNOperation getOperation();
+    public native @NoException(true) void setInputMode(RNNInputMode op);
+    public native @NoException(true) void setInputMode(@Cast("nvinfer1::RNNInputMode") int op);
+    public native @NoException(true) RNNInputMode getInputMode();
+    public native @NoException(true) void setDirection(RNNDirection op);
+    public native @NoException(true) void setDirection(@Cast("nvinfer1::RNNDirection") int op);
+    public native @NoException(true) RNNDirection getDirection();
+    public native @NoException(true) void setWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights weights);
+    public native @NoException(true) void setWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights weights);
+    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
+    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
+    public native @NoException(true) void setBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights bias);
+    public native @NoException(true) void setBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights bias);
+    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
+    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
+    public native @NoException(true) void setHiddenState(@ByRef ITensor hidden);
+    public native @NoException(true) ITensor getHiddenState();
+    public native @NoException(true) void setCellState(@ByRef ITensor cell);
+    public native @NoException(true) ITensor getCellState();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
new file mode 100644
index 00000000000..0a32c06e847
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRaggedSoftMaxLayer extends VRoot {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VRaggedSoftMaxLayer() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VRaggedSoftMaxLayer(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRaggedSoftMaxLayer(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VRaggedSoftMaxLayer position(long position) {
+        return (VRaggedSoftMaxLayer)super.position(position);
+    }
+    @Override public VRaggedSoftMaxLayer getPointer(long i) {
+        return new VRaggedSoftMaxLayer((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
new file mode 100644
index 00000000000..24a7052c6dc
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRecurrenceLayer extends VRoot {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VRecurrenceLayer() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VRecurrenceLayer(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRecurrenceLayer(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VRecurrenceLayer position(long position) {
+        return (VRecurrenceLayer)super.position(position);
+    }
+    @Override public VRecurrenceLayer getPointer(long i) {
+        return new VRecurrenceLayer((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
new file mode 100644
index 00000000000..85979041534
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VReduceLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VReduceLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOperation(ReduceOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::ReduceOperation") int op);
+    public native @NoException(true) ReduceOperation getOperation();
+    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
+    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
+    public native @NoException(true) void setKeepDimensions(@Cast("bool") boolean keepDimensions);
+    public native @Cast("bool") @NoException(true) boolean getKeepDimensions();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
new file mode 100644
index 00000000000..1b2a606d472
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
@@ -0,0 +1,61 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRefitter extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRefitter(Pointer p) { super(p); }
+
+    public native @Cast("bool") @NoException(true) boolean setWeights(String layerName, WeightsRole role, @Const @ByVal Weights weights);
+    public native @Cast("bool") @NoException(true) boolean setWeights(@Cast("const char*") BytePointer layerName, @Cast("nvinfer1::WeightsRole") int role, @Const @ByVal Weights weights);
+    public native @Cast("bool") @NoException(true) boolean refitCudaEngine();
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
+    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
+    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(String tensorName, float min, float max);
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(@Cast("const char*") BytePointer tensorName, float min, float max);
+    public native @NoException(true) float getDynamicRangeMin(String tensorName);
+    public native @NoException(true) float getDynamicRangeMin(@Cast("const char*") BytePointer tensorName);
+    public native @NoException(true) float getDynamicRangeMax(String tensorName);
+    public native @NoException(true) float getDynamicRangeMax(@Cast("const char*") BytePointer tensorName);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") PointerPointer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr BytePointer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr ByteBuffer tensorNames);
+    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr byte[] tensorNames);
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+    public native @Cast("bool") @NoException(true) boolean setNamedWeights(String name, @ByVal Weights weights);
+    public native @Cast("bool") @NoException(true) boolean setNamedWeights(@Cast("const char*") BytePointer name, @ByVal Weights weights);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") PointerPointer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
+    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") PointerPointer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
+    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
new file mode 100644
index 00000000000..bb7eaa1dc4e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
@@ -0,0 +1,50 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VResizeLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VResizeLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOutputDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions();
+    public native @NoException(true) void setScales(@Const FloatPointer scales, int nbScales);
+    public native @NoException(true) void setScales(@Const FloatBuffer scales, int nbScales);
+    public native @NoException(true) void setScales(@Const float[] scales, int nbScales);
+    public native @NoException(true) int getScales(int size, FloatPointer scales);
+    public native @NoException(true) int getScales(int size, FloatBuffer scales);
+    public native @NoException(true) int getScales(int size, float[] scales);
+    public native @NoException(true) void setResizeMode(ResizeMode resizeMode);
+    public native @NoException(true) void setResizeMode(@Cast("nvinfer1::ResizeMode") int resizeMode);
+    public native @NoException(true) ResizeMode getResizeMode();
+    public native @NoException(true) void setAlignCorners(@Cast("bool") boolean alignCorners);
+    public native @Cast("bool") @NoException(true) boolean getAlignCorners();
+    public native @NoException(true) void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform);
+    public native @NoException(true) void setCoordinateTransformation(@Cast("nvinfer1::ResizeCoordinateTransformation") int coordTransform);
+    public native @NoException(true) ResizeCoordinateTransformation getCoordinateTransformation();
+    public native @NoException(true) void setSelectorForSinglePixel(ResizeSelector selector);
+    public native @NoException(true) void setSelectorForSinglePixel(@Cast("nvinfer1::ResizeSelector") int selector);
+    public native @NoException(true) ResizeSelector getSelectorForSinglePixel();
+    public native @NoException(true) void setNearestRounding(ResizeRoundMode value);
+    public native @NoException(true) void setNearestRounding(@Cast("nvinfer1::ResizeRoundMode") int value);
+    public native @NoException(true) ResizeRoundMode getNearestRounding();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
new file mode 100644
index 00000000000..fa5dc93c028
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+/**
+ *  \file NvInferImpl.h
+ * 
+ *  This file contains definitions for API methods that cross the shared library boundary. These
+ *  methods must not be called directly by applications; they should only be called through the
+ *  API classes.
+ *  */
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRoot extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VRoot() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VRoot(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRoot(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VRoot position(long position) {
+        return (VRoot)super.position(position);
+    }
+    @Override public VRoot getPointer(long i) {
+        return new VRoot((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
new file mode 100644
index 00000000000..8f0bf9484fe
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
@@ -0,0 +1,36 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VRuntime extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VRuntime(Pointer p) { super(p); }
+
+    public native @NoException(true) ICudaEngine deserializeCudaEngine(
+            @Const Pointer blob, @Cast("std::size_t") long size, IPluginFactory pluginFactory);
+    public native @NoException(true) void setDLACore(int dlaCore);
+    public native @NoException(true) int getDLACore();
+    public native @NoException(true) int getNbDLACores();
+    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
new file mode 100644
index 00000000000..da9f76ffd30
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
@@ -0,0 +1,39 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VScaleLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VScaleLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setMode(ScaleMode mode);
+    public native @NoException(true) void setMode(@Cast("nvinfer1::ScaleMode") int mode);
+    public native @NoException(true) ScaleMode getMode();
+    public native @NoException(true) void setShift(@ByVal Weights shift);
+    public native @ByVal @NoException(true) Weights getShift();
+    public native @NoException(true) void setScale(@ByVal Weights scale);
+    public native @ByVal @NoException(true) Weights getScale();
+    public native @NoException(true) void setPower(@ByVal Weights power);
+    public native @ByVal @NoException(true) Weights getPower();
+    public native @NoException(true) int getChannelAxis();
+    public native @NoException(true) void setChannelAxis(int channelAxis);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
new file mode 100644
index 00000000000..87819d1bfdd
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
@@ -0,0 +1,27 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+@Namespace("nvinfer1::apiv") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VSelectLayer extends VRoot {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public VSelectLayer() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VSelectLayer(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
new file mode 100644
index 00000000000..cfddc2c83d3
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
@@ -0,0 +1,40 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VShapeLayer extends VRoot {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public VShapeLayer() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public VShapeLayer(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VShapeLayer(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public VShapeLayer position(long position) {
+        return (VShapeLayer)super.position(position);
+    }
+    @Override public VShapeLayer getPointer(long i) {
+        return new VShapeLayer((Pointer)this).offsetAddress(i);
+    }
+
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
new file mode 100644
index 00000000000..f19ceb6538f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
@@ -0,0 +1,36 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VShuffleLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VShuffleLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setFirstTranspose(@Const @ByRef Permutation permutation);
+    public native @Const @ByRef @NoException(true) Permutation getFirstTranspose();
+    public native @NoException(true) void setReshapeDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getReshapeDimensions();
+    public native @NoException(true) void setSecondTranspose(@Const @ByRef Permutation permutation);
+    public native @Const @ByRef @NoException(true) Permutation getSecondTranspose();
+    public native void setZeroIsPlaceholder(@Cast("bool") boolean zeroIsPlaceholder);
+    public native @Cast("bool") boolean getZeroIsPlaceholder();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
new file mode 100644
index 00000000000..61eafa3b4fa
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
@@ -0,0 +1,37 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VSliceLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VSliceLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setStart(@ByVal @Cast("nvinfer1::Dims*") Dims32 start);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStart();
+    public native @NoException(true) void setSize(@ByVal @Cast("nvinfer1::Dims*") Dims32 size);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getSize();
+    public native @NoException(true) void setStride(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStride();
+    public native @NoException(true) void setMode(SliceMode mode);
+    public native @NoException(true) void setMode(@Cast("nvinfer1::SliceMode") int mode);
+    public native @NoException(true) SliceMode getMode();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
new file mode 100644
index 00000000000..b1dab0e9cba
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
@@ -0,0 +1,30 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VSoftMaxLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VSoftMaxLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setAxes(@Cast("uint32_t") int axes);
+    public native @Cast("uint32_t") @NoException(true) int getAxes();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
new file mode 100644
index 00000000000..1bbb65af97d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
@@ -0,0 +1,52 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VTensor extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VTensor(Pointer p) { super(p); }
+
+    public native @NoException(true) void setName(String name);
+    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
+    public native @NoException(true) String getName();
+    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
+    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
+    public native @NoException(true) void setType(DataType type);
+    public native @NoException(true) void setType(@Cast("nvinfer1::DataType") int type);
+    public native @NoException(true) DataType getType();
+    public native @Cast("bool") @NoException(true) boolean setDynamicRange(float min, float max);
+    public native @Cast("bool") @NoException(true) boolean isNetworkInput();
+    public native @Cast("bool") @NoException(true) boolean isNetworkOutput();
+    public native @NoException(true) void setBroadcastAcrossBatch(@Cast("bool") boolean broadcastAcrossBatch);
+    public native @Cast("bool") @NoException(true) boolean getBroadcastAcrossBatch();
+    public native @NoException(true) TensorLocation getLocation();
+    public native @NoException(true) void setLocation(TensorLocation location);
+    public native @NoException(true) void setLocation(@Cast("nvinfer1::TensorLocation") int location);
+    public native @Cast("bool") @NoException(true) boolean dynamicRangeIsSet();
+    public native @NoException(true) void resetDynamicRange();
+    public native @NoException(true) float getDynamicRangeMin();
+    public native @NoException(true) float getDynamicRangeMax();
+    public native @NoException(true) void setAllowedFormats(@Cast("nvinfer1::TensorFormats") int formats);
+    public native @Cast("nvinfer1::TensorFormats") @NoException(true) int getAllowedFormats();
+    public native @Cast("bool") @NoException(true) boolean isShapeTensor();
+    public native @Cast("bool") @NoException(true) boolean isExecutionTensor();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
new file mode 100644
index 00000000000..bf747271a0d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VTimingCache extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VTimingCache(Pointer p) { super(p); }
+
+    public native @NoException(true) IHostMemory serialize();
+    public native @Cast("bool") @NoException(true) boolean combine(@Const @ByRef ITimingCache inputCache, @Cast("bool") boolean ignoreMismatch);
+    public native @Cast("bool") @NoException(true) boolean reset();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
new file mode 100644
index 00000000000..53e8945c5c8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VTopKLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VTopKLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOperation(TopKOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::TopKOperation") int op);
+    public native @NoException(true) TopKOperation getOperation();
+    public native @NoException(true) void setK(int k);
+    public native @NoException(true) int getK();
+    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
+    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
new file mode 100644
index 00000000000..03352af105e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
@@ -0,0 +1,29 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VTripLimitLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VTripLimitLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) TripLimit getTripLimit();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
new file mode 100644
index 00000000000..e7d4939d5fe
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
@@ -0,0 +1,31 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+
+@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class VUnaryLayer extends VRoot {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public VUnaryLayer(Pointer p) { super(p); }
+
+    public native @NoException(true) void setOperation(UnaryOperation op);
+    public native @NoException(true) void setOperation(@Cast("nvinfer1::UnaryOperation") int op);
+    public native @NoException(true) UnaryOperation getOperation();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
new file mode 100644
index 00000000000..fc02f313395
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
@@ -0,0 +1,58 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+ // namespace impl
+
+/**
+ *  \class Weights
+ * 
+ *  \brief An array of weights used as a layer parameter.
+ * 
+ *  When using the DLA, the cumulative size of all Weights used in a network
+ *  must be less than 512MB in size. If the build option kGPU_FALLBACK is specified,
+ *  then multiple DLA sub-networks may be generated from the single original network.
+ * 
+ *  The weights are held by reference until the engine has been built. Therefore the data referenced
+ *  by \p values field should be preserved until the build is complete.
+ *  */
+@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class Weights extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public Weights() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Weights(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Weights(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public Weights position(long position) {
+        return (Weights)super.position(position);
+    }
+    @Override public Weights getPointer(long i) {
+        return new Weights((Pointer)this).offsetAddress(i);
+    }
+
+    /** The type of the weights. */
+    public native DataType type(); public native Weights type(DataType setter);
+    /** The weight values, in a contiguous array. */
+    public native @Const Pointer values(); public native Weights values(Pointer values);
+    /** The number of weights in the array. */
+    public native @Cast("int64_t") long count(); public native Weights count(long setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
new file mode 100644
index 00000000000..23b6b350151
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
@@ -0,0 +1,28 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+    /** Forward declaration of cublasContext to use in other interfaces */
+    @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class cublasContext extends Pointer {
+        /** Empty constructor. Calls {@code super((Pointer)null)}. */
+        public cublasContext() { super((Pointer)null); }
+        /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+        public cublasContext(Pointer p) { super(p); }
+    }
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
new file mode 100644
index 00000000000..87e352e38d0
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
@@ -0,0 +1,28 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+    /** Forward declaration of cudnnContext to use in other interfaces */
+    @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
+public class cudnnContext extends Pointer {
+        /** Empty constructor. Calls {@code super((Pointer)null)}. */
+        public cudnnContext() { super((Pointer)null); }
+        /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+        public cudnnContext(Pointer p) { super(p); }
+    }
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
new file mode 100644
index 00000000000..eb4eb56906d
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
@@ -0,0 +1,75 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief The DetectionOutput plugin layer generates the detection output based on location and confidence predictions by doing non maximum suppression.
+ *  This plugin first decodes the bounding boxes based on the anchors generated. It then performs non_max_suppression on the decoded bounding boxes.
+ *  DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
+ *  It contains:
+ *  @param shareLocation If true, bounding box are shared among different classes.
+ *  @param varianceEncodedInTarget If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.
+ *  @param backgroundLabelId Background label ID. If there is no background class, set it as -1.
+ *  @param numClasses Number of classes to be predicted.
+ *  @param topK Number of boxes per image with top confidence scores that are fed into the NMS algorithm.
+ *  @param keepTopK Number of total bounding boxes to be kept per image after NMS step.
+ *  @param confidenceThreshold Only consider detections whose confidences are larger than a threshold.
+ *  @param nmsThreshold Threshold to be used in NMS.
+ *  @param codeType Type of coding method for bbox.
+ *  @param inputOrder Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
+ *  @param confSigmoid Set to true to calculate sigmoid of confidence scores.
+ *  @param isNormalized Set to true if bounding box data is normalized by the network.
+ *  @param isBatchAgnostic Defaults to true. Set to false if prior boxes are unique per batch
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class DetectionOutputParameters extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public DetectionOutputParameters() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public DetectionOutputParameters(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public DetectionOutputParameters(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public DetectionOutputParameters position(long position) {
+        return (DetectionOutputParameters)super.position(position);
+    }
+    @Override public DetectionOutputParameters getPointer(long i) {
+        return new DetectionOutputParameters((Pointer)this).offsetAddress(i);
+    }
+
+    public native @Cast("bool") boolean shareLocation(); public native DetectionOutputParameters shareLocation(boolean setter);
+    public native @Cast("bool") boolean varianceEncodedInTarget(); public native DetectionOutputParameters varianceEncodedInTarget(boolean setter);
+    public native int backgroundLabelId(); public native DetectionOutputParameters backgroundLabelId(int setter);
+    public native int numClasses(); public native DetectionOutputParameters numClasses(int setter);
+    public native int topK(); public native DetectionOutputParameters topK(int setter);
+    public native int keepTopK(); public native DetectionOutputParameters keepTopK(int setter);
+    public native float confidenceThreshold(); public native DetectionOutputParameters confidenceThreshold(float setter);
+    public native float nmsThreshold(); public native DetectionOutputParameters nmsThreshold(float setter);
+    public native CodeTypeSSD codeType(); public native DetectionOutputParameters codeType(CodeTypeSSD setter);
+    public native int inputOrder(int i); public native DetectionOutputParameters inputOrder(int i, int setter);
+    @MemberGetter public native IntPointer inputOrder();
+    public native @Cast("bool") boolean confSigmoid(); public native DetectionOutputParameters confSigmoid(boolean setter);
+    public native @Cast("bool") boolean isNormalized(); public native DetectionOutputParameters isNormalized(boolean setter);
+    public native @Cast("bool") boolean isBatchAgnostic(); public native DetectionOutputParameters isBatchAgnostic(boolean setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
new file mode 100644
index 00000000000..b0391a3d4ae
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
@@ -0,0 +1,63 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+
+/**
+ *  \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
+ *  GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
+ *  It contains:
+ *  @param minScale Scale of anchors corresponding to finest resolution.
+ *  @param maxScale Scale of anchors corresponding to coarsest resolution.
+ *  @param aspectRatios List of aspect ratios to place on each grid point.
+ *  @param numAspectRatios Number of elements in aspectRatios.
+ *  @param H Height of feature map to generate anchors for.
+ *  @param W Width of feature map to generate anchors for.
+ *  @param variance Variance for adjusting the prior boxes.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class GridAnchorParameters extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public GridAnchorParameters() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public GridAnchorParameters(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public GridAnchorParameters(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public GridAnchorParameters position(long position) {
+        return (GridAnchorParameters)super.position(position);
+    }
+    @Override public GridAnchorParameters getPointer(long i) {
+        return new GridAnchorParameters((Pointer)this).offsetAddress(i);
+    }
+
+    public native float minSize(); public native GridAnchorParameters minSize(float setter);
+    public native float maxSize(); public native GridAnchorParameters maxSize(float setter);
+    public native FloatPointer aspectRatios(); public native GridAnchorParameters aspectRatios(FloatPointer setter);
+    public native int numAspectRatios(); public native GridAnchorParameters numAspectRatios(int setter);
+    public native int H(); public native GridAnchorParameters H(int setter);
+    public native int W(); public native GridAnchorParameters W(int setter);
+    public native float variance(int i); public native GridAnchorParameters variance(int i, float setter);
+    @MemberGetter public native FloatPointer variance();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
new file mode 100644
index 00000000000..190a633add1
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
@@ -0,0 +1,67 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief The NMSParameters are used by the BatchedNMSPlugin for performing
+ *  the non_max_suppression operation over boxes for object detection networks.
+ *  @param shareLocation If set to true, the boxes inputs are shared across all
+ *         classes. If set to false, the boxes input should account for per class box data.
+ *  @param backgroundLabelId Label ID for the background class. If there is no background class, set it as -1
+ *  @param numClasses Number of classes in the network.
+ *  @param topK Number of bounding boxes to be fed into the NMS step.
+ *  @param keepTopK Number of total bounding boxes to be kept per image after NMS step.
+ *         Should be less than or equal to the topK value.
+ *  @param scoreThreshold Scalar threshold for score (low scoring boxes are removed).
+ *  @param iouThreshold scalar threshold for IOU (new boxes that have high IOU overlap
+ *         with previously selected boxes are removed).
+ *  @param isNormalized Set to false, if the box coordinates are not
+ *         normalized, i.e. not in the range [0,1]. Defaults to false.
+ *  */
+
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class NMSParameters extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public NMSParameters() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public NMSParameters(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public NMSParameters(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public NMSParameters position(long position) {
+        return (NMSParameters)super.position(position);
+    }
+    @Override public NMSParameters getPointer(long i) {
+        return new NMSParameters((Pointer)this).offsetAddress(i);
+    }
+
+    public native @Cast("bool") boolean shareLocation(); public native NMSParameters shareLocation(boolean setter);
+    public native int backgroundLabelId(); public native NMSParameters backgroundLabelId(int setter);
+    public native int numClasses(); public native NMSParameters numClasses(int setter);
+    public native int topK(); public native NMSParameters topK(int setter);
+    public native int keepTopK(); public native NMSParameters keepTopK(int setter);
+    public native float scoreThreshold(); public native NMSParameters scoreThreshold(float setter);
+    public native float iouThreshold(); public native NMSParameters iouThreshold(float setter);
+    public native @Cast("bool") boolean isNormalized(); public native NMSParameters isNormalized(boolean setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
new file mode 100644
index 00000000000..666b28a8c8b
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
@@ -0,0 +1,77 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
+ *  dimensions (H x W). PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer. It
+ *  contains:
+ *  @param minSize Minimum box size in pixels. Can not be nullptr.
+ *  @param maxSize Maximum box size in pixels. Can be nullptr.
+ *  @param aspectRatios Aspect ratios of the boxes. Can be nullptr.
+ *  @param numMinSize Number of elements in minSize. Must be larger than 0.
+ *  @param numMaxSize Number of elements in maxSize. Can be 0 or same as numMinSize.
+ *  @param numAspectRatios Number of elements in aspectRatios. Can be 0.
+ *  @param flip If true, will flip each aspect ratio. For example, if there is an aspect ratio "r", the aspect ratio
+ *  "1.0/r" will be generated as well.
+ *  @param clip If true, will clip the prior so that it is within [0,1].
+ *  @param variance Variance for adjusting the prior boxes.
+ *  @param imgH Image height. If 0, then the H dimension of the data tensor will be used.
+ *  @param imgW Image width. If 0, then the W dimension of the data tensor will be used.
+ *  @param stepH Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
+ *  @param stepW Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
+ *  @param offset Offset to the top left corner of each cell.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class PriorBoxParameters extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public PriorBoxParameters() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public PriorBoxParameters(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public PriorBoxParameters(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public PriorBoxParameters position(long position) {
+        return (PriorBoxParameters)super.position(position);
+    }
+    @Override public PriorBoxParameters getPointer(long i) {
+        return new PriorBoxParameters((Pointer)this).offsetAddress(i);
+    }
+
+    public native FloatPointer minSize(); public native PriorBoxParameters minSize(FloatPointer setter);
+    public native FloatPointer maxSize(); public native PriorBoxParameters maxSize(FloatPointer setter);
+    public native FloatPointer aspectRatios(); public native PriorBoxParameters aspectRatios(FloatPointer setter);
+    public native int numMinSize(); public native PriorBoxParameters numMinSize(int setter);
+    public native int numMaxSize(); public native PriorBoxParameters numMaxSize(int setter);
+    public native int numAspectRatios(); public native PriorBoxParameters numAspectRatios(int setter);
+    public native @Cast("bool") boolean flip(); public native PriorBoxParameters flip(boolean setter);
+    public native @Cast("bool") boolean clip(); public native PriorBoxParameters clip(boolean setter);
+    public native float variance(int i); public native PriorBoxParameters variance(int i, float setter);
+    @MemberGetter public native FloatPointer variance();
+    public native int imgH(); public native PriorBoxParameters imgH(int setter);
+    public native int imgW(); public native PriorBoxParameters imgW(int setter);
+    public native float stepH(); public native PriorBoxParameters stepH(float setter);
+    public native float stepW(); public native PriorBoxParameters stepW(float setter);
+    public native float offset(); public native PriorBoxParameters offset(float setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
new file mode 100644
index 00000000000..62f70100ec8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
@@ -0,0 +1,49 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief The Permute plugin layer permutes the input tensor by changing the memory order of the data.
+ *  Quadruple defines a structure that contains an array of 4 integers. They can represent the permute orders or the
+ *  strides in each dimension.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class Quadruple extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public Quadruple() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public Quadruple(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public Quadruple(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public Quadruple position(long position) {
+        return (Quadruple)super.position(position);
+    }
+    @Override public Quadruple getPointer(long i) {
+        return new Quadruple((Pointer)this).offsetAddress(i);
+    }
+
+    public native int data(int i); public native Quadruple data(int i, int setter);
+    @MemberGetter public native IntPointer data();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
new file mode 100644
index 00000000000..4d3661a87c3
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
@@ -0,0 +1,67 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief RPROIParams is used to create the RPROIPlugin instance.
+ *  It contains:
+ *  @param poolingH Height of the output in pixels after ROI pooling on feature map.
+ *  @param poolingW Width of the output in pixels after ROI pooling on feature map.
+ *  @param featureStride Feature stride; ratio of input image size to feature map size. Assuming that max pooling layers
+ *  in the neural network use square filters.
+ *  @param preNmsTop Number of proposals to keep before applying NMS.
+ *  @param nmsMaxOut Number of remaining proposals after applying NMS.
+ *  @param anchorsRatioCount Number of anchor box ratios.
+ *  @param anchorsScaleCount Number of anchor box scales.
+ *  @param iouThreshold IoU (Intersection over Union) threshold used for the NMS step.
+ *  @param minBoxSize Minimum allowed bounding box size before scaling, used for anchor box calculation.
+ *  @param spatialScale Spatial scale between the input image and the last feature map.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class RPROIParams extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public RPROIParams() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public RPROIParams(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public RPROIParams(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public RPROIParams position(long position) {
+        return (RPROIParams)super.position(position);
+    }
+    @Override public RPROIParams getPointer(long i) {
+        return new RPROIParams((Pointer)this).offsetAddress(i);
+    }
+
+    public native int poolingH(); public native RPROIParams poolingH(int setter);
+    public native int poolingW(); public native RPROIParams poolingW(int setter);
+    public native int featureStride(); public native RPROIParams featureStride(int setter);
+    public native int preNmsTop(); public native RPROIParams preNmsTop(int setter);
+    public native int nmsMaxOut(); public native RPROIParams nmsMaxOut(int setter);
+    public native int anchorsRatioCount(); public native RPROIParams anchorsRatioCount(int setter);
+    public native int anchorsScaleCount(); public native RPROIParams anchorsScaleCount(int setter);
+    public native float iouThreshold(); public native RPROIParams iouThreshold(float setter);
+    public native float minBoxSize(); public native RPROIParams minBoxSize(float setter);
+    public native float spatialScale(); public native RPROIParams spatialScale(float setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
new file mode 100644
index 00000000000..3d1ef39a84c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
@@ -0,0 +1,56 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief The Region plugin layer performs region proposal calculation: generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
+ *  For each box, calculating its probablities of objects detections from 80 pre-defined classifications (yolo9000 has 9418 pre-defined classifications,
+ *  and these 9418 items are organized as work-tree structure).
+ *  RegionParameters defines a set of parameters for creating the Region plugin layer.
+ *  @param num Number of predicted bounding box for each grid cell.
+ *  @param coords Number of coordinates for a bounding box.
+ *  @param classes Number of classifications to be predicted.
+ *  @param smTree Helping structure to do softmax on confidence scores.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class RegionParameters extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public RegionParameters() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public RegionParameters(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public RegionParameters(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public RegionParameters position(long position) {
+        return (RegionParameters)super.position(position);
+    }
+    @Override public RegionParameters getPointer(long i) {
+        return new RegionParameters((Pointer)this).offsetAddress(i);
+    }
+
+    public native int num(); public native RegionParameters num(int setter);
+    public native int coords(); public native RegionParameters coords(int setter);
+    public native int classes(); public native RegionParameters classes(int setter);
+    public native softmaxTree smTree(); public native RegionParameters smTree(softmaxTree setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
new file mode 100644
index 00000000000..a4041359efc
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
@@ -0,0 +1,56 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvinfer_plugin;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+
+/**
+ *  \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores, for element to get the precise classification through word-tree structured classification definition.
+ *  */
+@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
+public class softmaxTree extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public softmaxTree() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public softmaxTree(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public softmaxTree(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public softmaxTree position(long position) {
+        return (softmaxTree)super.position(position);
+    }
+    @Override public softmaxTree getPointer(long i) {
+        return new softmaxTree((Pointer)this).offsetAddress(i);
+    }
+
+    public native IntPointer leaf(); public native softmaxTree leaf(IntPointer setter);
+    public native int n(); public native softmaxTree n(int setter);
+    public native IntPointer parent(); public native softmaxTree parent(IntPointer setter);
+    public native IntPointer child(); public native softmaxTree child(IntPointer setter);
+    public native IntPointer group(); public native softmaxTree group(IntPointer setter);
+    public native @Cast("char*") BytePointer name(int i); public native softmaxTree name(int i, BytePointer setter);
+    public native @Cast("char**") PointerPointer name(); public native softmaxTree name(PointerPointer setter);
+
+    public native int groups(); public native softmaxTree groups(int setter);
+    public native IntPointer groupSize(); public native softmaxTree groupSize(IntPointer setter);
+    public native IntPointer groupOffset(); public native softmaxTree groupOffset(IntPointer setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
new file mode 100644
index 00000000000..61ff91ac5f1
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
@@ -0,0 +1,134 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvonnxparser;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+
+
+/** \class IParser
+ *
+ * \brief an object for parsing ONNX models into a TensorRT network definition
+ */
+@Namespace("nvonnxparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
+public class IParser extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IParser(Pointer p) { super(p); }
+
+    /** \brief Parse a serialized ONNX model into the TensorRT network.
+     *         This method has very limited diagnostics. If parsing the serialized model
+     *         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
+     *         it the user responsibility to intercept and report the error.
+     *         To obtain a better diagnostic, use the parseFromFile method below.
+     *
+     * @param serialized_onnx_model Pointer to the serialized ONNX model
+     * @param serialized_onnx_model_size Size of the serialized ONNX model
+     *        in bytes
+     * @param model_path Absolute path to the model file for loading external weights if required
+     * @return true if the model was parsed successfully
+     * @see getNbErrors() getError()
+     */
+    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
+                           @Cast("size_t") long serialized_onnx_model_size,
+                           String model_path/*=nullptr*/);
+    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
+                           @Cast("size_t") long serialized_onnx_model_size);
+    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
+                           @Cast("size_t") long serialized_onnx_model_size,
+                           @Cast("const char*") BytePointer model_path/*=nullptr*/);
+
+    /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
+     *         calls parse method inside.
+     *
+     * @param File name
+     * @param Verbosity Level
+     *
+     * @return true if the model was parsed successfully
+     *
+     */
+    public native @Cast("bool") boolean parseFromFile(String onnxModelFile, int verbosity);
+    public native @Cast("bool") boolean parseFromFile(@Cast("const char*") BytePointer onnxModelFile, int verbosity);
+
+    /** \brief Check whether TensorRT supports a particular ONNX model
+     *
+     * @param serialized_onnx_model Pointer to the serialized ONNX model
+     * @param serialized_onnx_model_size Size of the serialized ONNX model
+     *        in bytes
+     * @param sub_graph_collection Container to hold supported subgraphs
+     * @param model_path Absolute path to the model file for loading external weights if required
+     * @return true if the model is supported
+     */
+    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
+                                   @Cast("size_t") long serialized_onnx_model_size,
+                                   @ByRef SubGraphCollection_t sub_graph_collection,
+                                   String model_path/*=nullptr*/);
+    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
+                                   @Cast("size_t") long serialized_onnx_model_size,
+                                   @ByRef SubGraphCollection_t sub_graph_collection);
+    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
+                                   @Cast("size_t") long serialized_onnx_model_size,
+                                   @ByRef SubGraphCollection_t sub_graph_collection,
+                                   @Cast("const char*") BytePointer model_path/*=nullptr*/);
+
+    /** \brief Parse a serialized ONNX model into the TensorRT network
+     * with consideration of user provided weights
+     *
+     * @param serialized_onnx_model Pointer to the serialized ONNX model
+     * @param serialized_onnx_model_size Size of the serialized ONNX model
+     *        in bytes
+     * @return true if the model was parsed successfully
+     * @see getNbErrors() getError()
+     */
+    public native @Cast("bool") boolean parseWithWeightDescriptors(
+            @Const Pointer serialized_onnx_model, @Cast("size_t") long serialized_onnx_model_size);
+
+    /** \brief Returns whether the specified operator may be supported by the
+     *         parser.
+     *
+     * Note that a result of true does not guarantee that the operator will be
+     * supported in all cases (i.e., this function may return false-positives).
+     *
+     * @param op_name The name of the ONNX operator to check for support
+     */
+    public native @Cast("bool") boolean supportsOperator(String op_name);
+    public native @Cast("bool") boolean supportsOperator(@Cast("const char*") BytePointer op_name);
+    /** \brief destroy this object
+     *
+     * \warning deprecated and planned on being removed in TensorRT 10.0
+     */
+    public native @Deprecated void destroy();
+    /** \brief Get the number of errors that occurred during prior calls to
+     *         \p parse
+     *
+     * @see getError() clearErrors() IParserError
+     */
+    public native int getNbErrors();
+    /** \brief Get an error that occurred during prior calls to \p parse
+     *
+     * @see getNbErrors() clearErrors() IParserError
+     */
+    public native @Const IParserError getError(int index);
+    /** \brief Clear errors from prior calls to \p parse
+     *
+     * @see getNbErrors() getError() IParserError
+     */
+    public native void clearErrors();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
new file mode 100644
index 00000000000..387bd577431
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvonnxparser;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+
+
+/** \class IParserError
+ *
+ * \brief an object containing information about an error
+ */
+@Namespace("nvonnxparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
+public class IParserError extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IParserError(Pointer p) { super(p); }
+
+    /** \brief the error code
+     */
+    public native org.bytedeco.tensorrt.global.nvonnxparser.ErrorCode code();
+    /** \brief description of the error
+     */
+    public native String desc();
+    /** \brief source file in which the error occurred
+     */
+    public native String file();
+    /** \brief source line at which the error occurred
+     */
+    public native int line();
+    /** \brief source function in which the error occurred
+     */
+    public native String func();
+    /** \brief index of the ONNX model node in which the error occurred
+     */
+    public native int node();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
new file mode 100644
index 00000000000..3e681c73aad
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
@@ -0,0 +1,93 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvonnxparser;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+
+@Name("std::vector<SubGraph_t>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
+public class SubGraphCollection_t extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public SubGraphCollection_t(Pointer p) { super(p); }
+    public SubGraphCollection_t(SubGraph_t value) { this(1); put(0, value); }
+    public SubGraphCollection_t(SubGraph_t ... array) { this(array.length); put(array); }
+    public SubGraphCollection_t()       { allocate();  }
+    public SubGraphCollection_t(long n) { allocate(n); }
+    private native void allocate();
+    private native void allocate(@Cast("size_t") long n);
+    public native @Name("operator =") @ByRef SubGraphCollection_t put(@ByRef SubGraphCollection_t x);
+
+    public boolean empty() { return size() == 0; }
+    public native long size();
+    public void clear() { resize(0); }
+    public native void resize(@Cast("size_t") long n);
+
+    @Index(function = "at") public native @ByRef SubGraph_t get(@Cast("size_t") long i);
+    public native SubGraphCollection_t put(@Cast("size_t") long i, SubGraph_t value);
+
+    public native @ByVal Iterator insert(@ByVal Iterator pos, @ByRef SubGraph_t value);
+    public native @ByVal Iterator erase(@ByVal Iterator pos);
+    public native @ByVal Iterator begin();
+    public native @ByVal Iterator end();
+    @NoOffset @Name("iterator") public static class Iterator extends Pointer {
+        public Iterator(Pointer p) { super(p); }
+        public Iterator() { }
+
+        public native @Name("operator ++") @ByRef Iterator increment();
+        public native @Name("operator ==") boolean equals(@ByRef Iterator it);
+        public native @Name("operator *") @ByRef @Const SubGraph_t get();
+    }
+
+    public SubGraph_t[] get() {
+        SubGraph_t[] array = new SubGraph_t[size() < Integer.MAX_VALUE ? (int)size() : Integer.MAX_VALUE];
+        for (int i = 0; i < array.length; i++) {
+            array[i] = get(i);
+        }
+        return array;
+    }
+    @Override public String toString() {
+        return java.util.Arrays.toString(get());
+    }
+
+    public SubGraph_t pop_back() {
+        long size = size();
+        SubGraph_t value = get(size - 1);
+        resize(size - 1);
+        return value;
+    }
+    public SubGraphCollection_t push_back(SubGraph_t value) {
+        long size = size();
+        resize(size + 1);
+        return put(size, value);
+    }
+    public SubGraphCollection_t put(SubGraph_t value) {
+        if (size() != 1) { resize(1); }
+        return put(0, value);
+    }
+    public SubGraphCollection_t put(SubGraph_t ... array) {
+        if (size() != array.length) { resize(array.length); }
+        for (int i = 0; i < array.length; i++) {
+            put(i, array[i]);
+        }
+        return this;
+    }
+}
+
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
new file mode 100644
index 00000000000..57e1927cb7b
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
@@ -0,0 +1,45 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvonnxparser;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+
+@NoOffset @Name("std::pair<std::vector<size_t>,bool>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
+public class SubGraph_t extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public SubGraph_t(Pointer p) { super(p); }
+    public SubGraph_t(SizeTPointer firstValue, boolean secondValue) { this(); put(firstValue, secondValue); }
+    public SubGraph_t()       { allocate();  }
+    private native void allocate();
+    public native @Name("operator =") @ByRef SubGraph_t put(@ByRef SubGraph_t x);
+
+
+    @MemberGetter public native @StdVector SizeTPointer first(); public native SubGraph_t first(SizeTPointer first);
+    @MemberGetter public native @Cast("bool") boolean second();  public native SubGraph_t second(boolean second);
+
+    public SubGraph_t put(SizeTPointer firstValue, boolean secondValue) {
+        first(firstValue);
+        second(secondValue);
+        return this;
+    }
+}
+
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
new file mode 100644
index 00000000000..6e452afd6b8
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
@@ -0,0 +1,46 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+@Namespace("nvuffparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class FieldCollection extends Pointer {
+    static { Loader.load(); }
+    /** Default native constructor. */
+    public FieldCollection() { super((Pointer)null); allocate(); }
+    /** Native array allocator. Access with {@link Pointer#position(long)}. */
+    public FieldCollection(long size) { super((Pointer)null); allocateArray(size); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public FieldCollection(Pointer p) { super(p); }
+    private native void allocate();
+    private native void allocateArray(long size);
+    @Override public FieldCollection position(long position) {
+        return (FieldCollection)super.position(position);
+    }
+    @Override public FieldCollection getPointer(long i) {
+        return new FieldCollection((Pointer)this).offsetAddress(i);
+    }
+
+    public native int nbFields(); public native FieldCollection nbFields(int setter);
+    public native @Const FieldMap fields(); public native FieldCollection fields(FieldMap setter);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
new file mode 100644
index 00000000000..2902faaba83
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+/**
+ *  \class FieldMap
+ * 
+ *  \brief An array of field params used as a layer parameter for plugin layers.
+ * 
+ *  The node fields are passed by the parser to the API through the plugin
+ *  constructor. The implementation of the plugin should parse the contents of
+ *  the fieldMap as part of the plugin constructor
+ *  */
+@Namespace("nvuffparser") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class FieldMap extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public FieldMap(Pointer p) { super(p); }
+
+    public native String name(); public native FieldMap name(String setter);
+    public native @Const Pointer data(); public native FieldMap data(Pointer setter);
+    public native FieldType type(); public native FieldMap type(FieldType setter);
+    public native int length(); public native FieldMap length(int setter);
+
+    public FieldMap(String name, @Const Pointer data, FieldType type, int length/*=1*/) { super((Pointer)null); allocate(name, data, type, length); }
+    private native void allocate(String name, @Const Pointer data, FieldType type, int length/*=1*/);
+    public FieldMap(String name, @Const Pointer data, FieldType type) { super((Pointer)null); allocate(name, data, type); }
+    private native void allocate(String name, @Const Pointer data, FieldType type);
+    public FieldMap(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type, int length/*=1*/) { super((Pointer)null); allocate(name, data, type, length); }
+    private native void allocate(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type, int length/*=1*/);
+    public FieldMap(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type) { super((Pointer)null); allocate(name, data, type); }
+    private native void allocate(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
new file mode 100644
index 00000000000..7efdc74ee6c
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
@@ -0,0 +1,54 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+/**
+ *  \class IBinaryProtoBlob
+ * 
+ *  \brief Object used to store and query data extracted from a binaryproto file using the ICaffeParser.
+ * 
+ *  @see nvcaffeparser1::ICaffeParser
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class IBinaryProtoBlob extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IBinaryProtoBlob(Pointer p) { super(p); }
+
+    public native @Const @NoException(true) Pointer getData();
+    public native @ByVal @NoException(true) Dims4 getDimensions();
+    
+    //!
+    //!
+    //!
+    public native @NoException(true) DataType getDataType();
+    /**
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    public native @Deprecated @NoException(true) void destroy();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
new file mode 100644
index 00000000000..30d6017a14e
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
@@ -0,0 +1,51 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+/**
+ *  \class IBlobNameToTensor
+ * 
+ *  \brief Object used to store and query Tensors after they have been extracted from a Caffe model using the ICaffeParser.
+ * 
+ *  \note The lifetime of IBlobNameToTensor is the same as the lifetime of its parent ICaffeParser.
+ * 
+ *  @see nvcaffeparser1::ICaffeParser
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class IBlobNameToTensor extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IBlobNameToTensor(Pointer p) { super(p); }
+
+    /** \brief Given a blob name, returns a pointer to a ITensor object.
+     * 
+     *  @param name Caffe blob name for which the user wants the corresponding ITensor.
+     * 
+     *  @return ITensor* corresponding to the queried name. If no such ITensor exists, then nullptr is returned.
+     *  */
+    public native @NoException(true) ITensor find(String name);
+    public native @NoException(true) ITensor find(@Cast("const char*") BytePointer name);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
new file mode 100644
index 00000000000..2e8b185168f
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
@@ -0,0 +1,195 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+/**
+ *  \class ICaffeParser
+ * 
+ *  \brief Class used for parsing Caffe models.
+ * 
+ *  Allows users to export models trained using Caffe to TRT.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class ICaffeParser extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public ICaffeParser(Pointer p) { super(p); }
+
+    /**
+     *  \brief Parse a prototxt file and a binaryproto Caffe model to extract
+     *    network definition and weights associated with the network, respectively.
+     * 
+     *  @param deploy The plain text, prototxt file used to define the network definition.
+     *  @param model The binaryproto Caffe model that contains the weights associated with the network.
+     *  @param network Network in which the CaffeParser will fill the layers.
+     *  @param weightType The type to which the weights will transformed.
+     * 
+     *  @return A pointer to an IBlobNameToTensor object that contains the extracted data.
+     * 
+     *  @see nvcaffeparser1::IBlobNameToTensor
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Const @NoException(true) IBlobNameToTensor parse(String deploy, String model, @ByRef INetworkDefinition network,
+            DataType weightType);
+    public native @Const @NoException(true) IBlobNameToTensor parse(@Cast("const char*") BytePointer deploy, @Cast("const char*") BytePointer model, @ByRef INetworkDefinition network,
+            @Cast("nvinfer1::DataType") int weightType);
+
+    /**
+     *  \brief Parse a deploy prototxt and a binaryproto Caffe model from memory buffers to extract
+     *    network definition and weights associated with the network, respectively.
+     * 
+     *  @param deployBuffer The plain text deploy prototxt used to define the network definition.
+     *  @param deployLength The length of the deploy buffer.
+     *  @param modelBuffer The binaryproto Caffe memory buffer that contains the weights associated with the network.
+     *  @param modelLength The length of the model buffer.
+     *  @param network Network in which the CaffeParser will fill the layers.
+     *  @param weightType The type to which the weights will transformed.
+     * 
+     *  @return A pointer to an IBlobNameToTensor object that contains the extracted data.
+     * 
+     *  @see nvcaffeparser1::IBlobNameToTensor
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @Const @NoException(true) IBlobNameToTensor parseBuffers(String deployBuffer, @Cast("std::size_t") long deployLength,
+            String modelBuffer, @Cast("std::size_t") long modelLength, @ByRef INetworkDefinition network,
+            DataType weightType);
+    public native @Const @NoException(true) IBlobNameToTensor parseBuffers(@Cast("const char*") BytePointer deployBuffer, @Cast("std::size_t") long deployLength,
+            @Cast("const char*") BytePointer modelBuffer, @Cast("std::size_t") long modelLength, @ByRef INetworkDefinition network,
+            @Cast("nvinfer1::DataType") int weightType);
+
+    /**
+     *  \brief Parse and extract data stored in binaryproto file.
+     * 
+     *  The binaryproto file contains data stored in a binary blob. parseBinaryProto() converts it
+     *  to an IBinaryProtoBlob object which gives the user access to the data and meta-data about data.
+     * 
+     *  @param fileName Path to file containing binary proto.
+     * 
+     *  @return A pointer to an IBinaryProtoBlob object that contains the extracted data.
+     * 
+     *  @see nvcaffeparser1::IBinaryProtoBlob
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) IBinaryProtoBlob parseBinaryProto(String fileName);
+    public native @NoException(true) IBinaryProtoBlob parseBinaryProto(@Cast("const char*") BytePointer fileName);
+
+    /**
+     *  \brief Set buffer size for the parsing and storage of the learned model.
+     * 
+     *  @param size The size of the buffer specified as the number of bytes.
+     * 
+     *  \note  Default size is 2^30 bytes.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setProtobufBufferSize(@Cast("size_t") long size);
+
+    /**
+     *  \brief Destroy this ICaffeParser object.
+     * 
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     * 
+     *  \warning Calling destroy on a managed pointer will result in a double-free error.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Set the IPluginFactoryV2 used to create the user defined pluginV2 objects.
+     * 
+     *  @param factory Pointer to an instance of the user implementation of IPluginFactoryV2.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) void setPluginFactoryV2(IPluginFactoryV2 factory);
+
+    /**
+     *  \brief Set the namespace used to lookup and create plugins in the network.
+     *  */
+    public native @NoException(true) void setPluginNamespace(String libNamespace);
+    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer libNamespace);
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface.
+     * 
+     *  @see getErrorRecorder()
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A
+     *  nullptr will be returned if setErrorRecorder has not been called.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
new file mode 100644
index 00000000000..2f7209e4965
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
@@ -0,0 +1,66 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+/**
+ *  \class IPluginFactoryV2
+ * 
+ *  \brief Plugin factory used to configure plugins.
+ *  */
+@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class IPluginFactoryV2 extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IPluginFactoryV2(Pointer p) { super(p); }
+
+    /**
+     *  \brief A user implemented function that determines if a layer configuration is provided by an IPluginV2.
+     * 
+     *  @param layerName Name of the layer which the user wishes to validate.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean isPluginV2(String layerName);
+    public native @Cast("bool") @NoException(true) boolean isPluginV2(@Cast("const char*") BytePointer layerName);
+
+    /**
+     *  \brief Creates a plugin.
+     * 
+     *  @param layerName Name of layer associated with the plugin.
+     *  @param weights Weights used for the layer.
+     *  @param nbWeights Number of weights.
+     *  @param libNamespace Library Namespace associated with the plugin object
+     *  */
+    public native @NoException(true) IPluginV2 createPlugin(String layerName, @Const Weights weights,
+            int nbWeights, String libNamespace/*=""*/);
+    public native @NoException(true) IPluginV2 createPlugin(String layerName, @Const Weights weights,
+            int nbWeights);
+    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer layerName, @Const Weights weights,
+            int nbWeights, @Cast("const char*") BytePointer libNamespace/*=""*/);
+    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer layerName, @Const Weights weights,
+            int nbWeights);
+}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java
new file mode 100644
index 00000000000..47016d71da1
--- /dev/null
+++ b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java
@@ -0,0 +1,180 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tensorrt.nvparsers;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+
+/**
+ *  \class IUffParser
+ * 
+ *  \brief Class used for parsing models described using the UFF format.
+ * 
+ *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+ *  */
+@Namespace("nvuffparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
+public class IUffParser extends Pointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public IUffParser(Pointer p) { super(p); }
+
+    /**
+     *  \brief Register an input name of a UFF network with the associated Dimensions.
+     * 
+     *  @param inputName Input name.
+     *  @param inputDims Input dimensions.
+     *  @param inputOrder Input order on which the framework input was originally.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean registerInput(String inputName, @ByVal @Cast("nvinfer1::Dims*") Dims32 inputDims, UffInputOrder inputOrder);
+    public native @Cast("bool") @NoException(true) boolean registerInput(@Cast("const char*") BytePointer inputName, @ByVal @Cast("nvinfer1::Dims*") Dims32 inputDims, @Cast("nvuffparser::UffInputOrder") int inputOrder);
+
+    /**
+     *  \brief Register an output name of a UFF network.
+     * 
+     *  @param outputName Output name.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean registerOutput(String outputName);
+    public native @Cast("bool") @NoException(true) boolean registerOutput(@Cast("const char*") BytePointer outputName);
+
+    /**
+     *  \brief Parse a UFF file.
+     * 
+     *  @param file File name of the UFF file.
+     *  @param network Network in which the UFFParser will fill the layers.
+     *  @param weightsType The type on which the weights will transformed in.
+     *  */
+    
+    
+    //!
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean parse(String file, @ByRef INetworkDefinition network,
+            DataType weightsType/*=nvinfer1::DataType::kFLOAT*/);
+    public native @Cast("bool") @NoException(true) boolean parse(String file, @ByRef INetworkDefinition network);
+    public native @Cast("bool") @NoException(true) boolean parse(@Cast("const char*") BytePointer file, @ByRef INetworkDefinition network,
+            @Cast("nvinfer1::DataType") int weightsType/*=nvinfer1::DataType::kFLOAT*/);
+    public native @Cast("bool") @NoException(true) boolean parse(@Cast("const char*") BytePointer file, @ByRef INetworkDefinition network);
+
+    /**
+     *  \brief Parse a UFF buffer, useful if the file already live in memory.
+     * 
+     *  @param buffer Buffer of the UFF file.
+     *  @param size Size of buffer of the UFF file.
+     *  @param network Network in which the UFFParser will fill the layers.
+     *  @param weightsType The type on which the weights will transformed in.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Cast("bool") @NoException(true) boolean parseBuffer(String buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network,
+            DataType weightsType/*=nvinfer1::DataType::kFLOAT*/);
+    public native @Cast("bool") @NoException(true) boolean parseBuffer(String buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network);
+    public native @Cast("bool") @NoException(true) boolean parseBuffer(@Cast("const char*") BytePointer buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network,
+            @Cast("nvinfer1::DataType") int weightsType/*=nvinfer1::DataType::kFLOAT*/);
+    public native @Cast("bool") @NoException(true) boolean parseBuffer(@Cast("const char*") BytePointer buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network);
+
+    /**
+     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
+     *  */
+    
+    
+    //!
+    //!
+    public native @Deprecated @NoException(true) void destroy();
+
+    /**
+     *  \brief Return Version Major of the UFF.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getUffRequiredVersionMajor();
+
+    /**
+     *  \brief Return Version Minor of the UFF.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getUffRequiredVersionMinor();
+
+    /**
+     *  \brief Return Patch Version of the UFF.
+     *  */
+    
+    
+    //!
+    //!
+    public native @NoException(true) int getUffRequiredVersionPatch();
+
+    /**
+     *  \brief Set the namespace used to lookup and create plugins in the network.
+     *  */
+    public native @NoException(true) void setPluginNamespace(String libNamespace);
+    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer libNamespace);
+    /**
+     *  \brief Set the ErrorRecorder for this interface
+     * 
+     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
+     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
+     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
+     *  a recorder has been registered.
+     * 
+     *  If an error recorder is not set, messages will be sent to the global log stream.
+     * 
+     *  @param recorder The error recorder to register with this interface. */
+    //
+    /** @see getErrorRecorder()
+    /** */
+    
+    
+    //!
+    //!
+    //!
+    //!
+    //!
+    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
+
+    /**
+     *  \brief get the ErrorRecorder assigned to this interface.
+     * 
+     *  Retrieves the assigned error recorder object for the given class. A
+     *  nullptr will be returned if setErrorRecorder has not been called.
+     * 
+     *  @return A pointer to the IErrorRecorder object that has been registered.
+     * 
+     *  @see setErrorRecorder()
+     *  */
+    public native @NoException(true) IErrorRecorder getErrorRecorder();
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
new file mode 100644
index 00000000000..8840fbb4c7a
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -0,0 +1,4400 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.global;
+
+import org.bytedeco.tritonserver.tritonserver.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver {
+    static { Loader.load(); }
+
+// Parsed from tritonbackend.h
+
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+// #include "triton/core/tritonserver.h"
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONBACKEND
+// #if defined(_MSC_VER)
+public static native @MemberGetter int TRITONBACKEND_DECLSPEC();
+public static final int TRITONBACKEND_DECLSPEC = TRITONBACKEND_DECLSPEC();
+// #define TRITONBACKEND_ISPEC __declspec(dllimport)
+// #elif defined(__GNUC__)
+// #define TRITONBACKEND_ISPEC
+// #else
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONBACKEND_ISPEC __declspec(dllexport)
+// #else
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
+// Targeting ../tritonserver/TRITONBACKEND_MemoryManager.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Input.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Output.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Request.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_ResponseFactory.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Response.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Backend.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Model.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_ModelInstance.java
+
+
+
+/**
+ *  TRITONBACKEND API Version
+ * 
+ *  The TRITONBACKEND API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  backend should check that the API version used to compile the
+ *  backend is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the backend.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton backend API version does not support this backend");
+ *    }
+ *  */
+public static final int TRITONBACKEND_API_VERSION_MAJOR = 1;
+
+///
+public static final int TRITONBACKEND_API_VERSION_MINOR = 4;
+
+/** Get the TRITONBACKEND API version supported by Triton. This value
+ *  can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
+ *  TRITONBACKEND_API_VERSION_MINOR used to build the backend to
+ *  ensure that Triton is compatible with the backend.
+ * 
+ *  @param major Returns the TRITONBACKEND API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONBACKEND API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native IntBuffer TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native int[] TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONBACKEND_ArtifactType
+ * 
+ *  The ways that the files that make up a backend or model are
+ *  communicated to the backend.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
+ *      artifacts are made available to Triton via a locally
+ *      accessible filesystem. The backend can access these files
+ *      using an appropriate system API.
+ *  */
+public enum TRITONBACKEND_ArtifactType {
+  TRITONBACKEND_ARTIFACT_FILESYSTEM(0);
+
+    public final int value;
+    private TRITONBACKEND_ArtifactType(int v) { this.value = v; }
+    private TRITONBACKEND_ArtifactType(TRITONBACKEND_ArtifactType e) { this.value = e.value; }
+    public TRITONBACKEND_ArtifactType intern() { for (TRITONBACKEND_ArtifactType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+
+/**
+ *  TRITONBACKEND_MemoryManager
+ * 
+ *  Object representing an memory manager that is capable of
+ *  allocating and otherwise managing different memory types. For
+ *  improved performance Triton maintains pools for GPU and CPU-pinned
+ *  memory and the memory manager allows backends to access those
+ *  pools.
+ * 
+ <p>
+ *  Allocate a contiguous block of memory of a specific type using a
+ *  memory manager. Two error codes have specific interpretations for
+ *  this function:
+ * 
+ *    TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
+ *      incapable of allocating the requested memory type and memory
+ *      type ID. Requests for the memory type and ID will always fail
+ *      no matter 'byte_size' of the request.
+ * 
+ *    TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
+ *       allocate the memory type and ID but that currently it cannot
+ *       allocate a contiguous block of memory of the requested
+ *       'byte_size'.
+ * 
+ *  @param manager The memory manager.
+ *  @param buffer Returns the allocated memory.
+ *  @param memory_type The type of memory to allocate.
+ *  @param memory_type_id The ID associated with the memory type to
+ *  allocate. For GPU memory this indicates the device ID of the GPU
+ *  to allocate from.
+ *  @param byte_size The size of memory to allocate, in bytes.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
+    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+public static native IntPointer TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+
+/** Free a buffer that was previously allocated with
+ *  TRITONBACKEND_MemoryManagerAllocate. The call must provide the
+ *  same values for 'memory_type' and 'memory_type_id' as were used
+ *  when the buffer was allocate or else the behavior is undefined.
+ * 
+ *  @param manager The memory manager.
+ *  @param buffer The allocated memory buffer to free.
+ *  @param memory_type The type of memory of the buffer.
+ *  @param memory_type_id The ID associated with the memory type of
+ *  the buffer.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_MemoryManagerFree(
+    TRITONBACKEND_MemoryManager manager, Pointer buffer,
+    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id);
+
+/**
+ *  TRITONBACKEND_Input
+ * 
+ *  Object representing an input tensor.
+ * 
+ <p>
+ *  Get the name and properties of an input tensor. The returned
+ *  strings and other properties are owned by the input, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param input The input tensor.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBuffer.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") PointerPointer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native IntPointer TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr BytePointer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native IntBuffer TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native int[] TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr byte[] name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+
+/** Get the name and properties of an input tensor associated with a given
+ *  host policy. If there are no input buffers for the specified  host policy,
+ *  the properties of the fallback input buffers are returned. The returned
+ *  strings and other properties are owned by the input, not the caller, and so
+ *  should not be modified or freed.
+ * 
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input properties
+ *  will be return if nullptr is provided.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBufferForHostPolicy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") PointerPointer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native IntBuffer TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native int[] TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native IntBuffer TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native int[] TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+
+/** Get a buffer holding (part of) the tensor data for an input. For a
+ *  given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
+ *  returned buffer is owned by the input and so should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  matches that of the input and so the buffer should not be accessed
+ *  after the input tensor object is released.
+ * 
+ *  @param input The input tensor.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputProperties.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntPointer TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntBuffer TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongBuffer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native int[] TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") long[] buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
+
+/** Get a buffer holding (part of) the tensor data for an input for a specific
+ *  host policy. If there are no input buffers specified for this host policy,
+ *  the fallback input buffer is returned.
+ *  For a given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  The returned buffer is owned by the input and so should not be modified or
+ *  freed by the caller. The lifetime of the buffer matches that of the input
+ *  and so the buffer should not be accessed after the input tensor object is
+ *  released.
+ * 
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input buffer
+ *  will be return if nullptr is provided.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntBuffer TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native int[] TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
+public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntBuffer TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native int[] TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
+
+/**
+ *  TRITONBACKEND_Output
+ * 
+ *  Object representing a response output tensor.
+ * 
+ <p>
+ *  Get a buffer to use to hold the tensor data for the output. The
+ *  returned buffer is owned by the output and so should not be freed
+ *  by the caller. The caller can and should fill the buffer with the
+ *  output data for the tensor. The lifetime of the buffer matches
+ *  that of the output and so the buffer should not be accessed after
+ *  the output tensor object is released.
+ * 
+ *  @param buffer Returns a pointer to a buffer where the contents of
+ *  the output tensor should be placed.
+ *  @param buffer_byte_size The size, in bytes, of the buffer required
+ *  by the caller.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the caller.  Returns the
+ *  actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the caller. Returns
+ *  the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") PointerPointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntPointer TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native IntBuffer TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native int[] TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
+
+/**
+ *  TRITONBACKEND_Request
+ * 
+ *  Object representing an inference request.
+ * 
+ <p>
+ *  Get the ID of the request. Can be nullptr if request doesn't have
+ *  an ID. The returned string is owned by the request, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param request The inference request.
+ *  @param id Returns the ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") PointerPointer id);
+public static native IntPointer TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native IntBuffer TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native int[] TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr byte[] id);
+
+/** Get the correlation ID of the request. Zero indicates that the
+ *  request does not have a correlation ID.
+ * 
+ *  @param request The inference request.
+ *  @param id Returns the correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongPointer id);
+public static native IntBuffer TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongBuffer id);
+public static native int[] TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") long[] id);
+
+/** Get the number of input tensors specified in the request.
+ * 
+ *  @param request The inference request.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native IntBuffer TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native int[] TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+
+/** Get the name of an input tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input_name Returns the name of the input tensor
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer input_name);
+public static native IntPointer TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer input_name);
+public static native IntBuffer TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer input_name);
+public static native int[] TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] input_name);
+
+/** Get a named request input. The lifetime of the returned input
+ *  object matches that of the request and so the input object should
+ *  not be accessed after the request object is released.
+ * 
+ *  @param request The inference request.
+ *  @param name The name of the input.
+ *  @param input Returns the input corresponding to the name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native IntPointer TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native IntPointer TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @ByPtrPtr TRITONBACKEND_Input input);
+public static native IntBuffer TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, @Cast("const char*") BytePointer name,
+    @ByPtrPtr TRITONBACKEND_Input input);
+
+/** Get a request input by index. The order of inputs in a given
+ *  request is not necessarily consistent with other requests, even if
+ *  the requests are in the same batch. As a result, you can not
+ *  assume that an index obtained from one request will point to the
+ *  same input in a different request.
+ * 
+ *  The lifetime of the returned input object matches that of the
+ *  request and so the input object should not be accessed after the
+ *  request object is released.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input Returns the input corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native IntPointer TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @ByPtrPtr TRITONBACKEND_Input input);
+
+/** Get the number of output tensors requested to be returned in the
+ *  request.
+ * 
+ *  @param request The inference request.
+ *  @param count Returns the number of output tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native IntBuffer TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native int[] TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+
+/** Get the name of a requested output tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the requested output tensor. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_RequestOutputCount.
+ *  @param output_name Returns the name of the requested output tensor
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer output_name);
+public static native IntPointer TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer output_name);
+public static native IntBuffer TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer output_name);
+public static native int[] TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] output_name);
+
+/** Release the request. The request should be released when it is no
+ *  longer needed by the backend. If this call returns with an error
+ *  (i.e. non-nullptr) then the request was not released and ownership
+ *  remains with the backend. If this call returns with success, the
+ *  'request' object is no longer owned by the backend and must not be
+ *  used. Any tensor names, data types, shapes, input tensors,
+ *  etc. returned by TRITONBACKEND_Request* functions for this request
+ *  are no longer valid. If a persistent copy of that data is required
+ *  it must be created before calling this function.
+ * 
+ *  @param request The inference request.
+ *  @param release_flags Flags indicating what type of request release
+ *  should be performed. @see TRITONSERVER_RequestReleaseFlag. @see
+ *  TRITONSERVER_InferenceRequestReleaseFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_RequestRelease(
+    TRITONBACKEND_Request request, @Cast("uint32_t") int release_flags);
+
+/**
+ *  TRITONBACKEND_ResponseFactory
+ * 
+ *  Object representing an inference response factory. Using a
+ *  response factory is not required; instead a response can be
+ *  generated directly from a TRITONBACKEND_Request object using
+ *  TRITONBACKEND_ResponseNew(). A response factory allows a request
+ *  to be released before all responses have been sent. Releasing a
+ *  request as early as possible releases all input tensor data and
+ *  therefore may be desirable in some cases.
+ <p>
+ *  Create the response factory associated with a request.
+ * 
+ *  @param factory Returns the new response factory.
+ *  @param request The inference request.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseFactoryNew(
+    @Cast("TRITONBACKEND_ResponseFactory**") PointerPointer factory, TRITONBACKEND_Request request);
+public static native IntPointer TRITONBACKEND_ResponseFactoryNew(
+    @ByPtrPtr TRITONBACKEND_ResponseFactory factory, TRITONBACKEND_Request request);
+
+/** Destroy a response factory.
+ * 
+ *  @param factory The response factory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseFactoryDelete(
+    TRITONBACKEND_ResponseFactory factory);
+
+/** Send response flags without a corresponding response.
+ * 
+ *  @param factory The response factory.
+ *  @param send_flags Flags to send. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ResponseFactorySendFlags(
+    TRITONBACKEND_ResponseFactory factory, @Cast("const uint32_t") int send_flags);
+
+/**
+ *  TRITONBACKEND_Response
+ * 
+ *  Object representing an inference response. For a given request,
+ *  the backend must carefully manage the lifecycle of responses
+ *  generated for that request to ensure that the output tensor
+ *  buffers are allocated correctly. When a response is created with
+ *  TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
+ *  all the outputs and corresponding buffers must be created for that
+ *  response using TRITONBACKEND_ResponseOutput and
+ *  TRITONBACKEND_OutputBuffer *before* another response is created
+ *  for the request. For a given response, outputs can be created in
+ *  any order but they must be created sequentially/sychronously (for
+ *  example, the backend cannot use multiple threads to simultaneously
+ *  add multiple outputs to a response).
+ * 
+ *  The above requirement applies only to responses being generated
+ *  for a given request. The backend may generate responses in
+ *  parallel on multiple threads as long as those responses are for
+ *  different requests.
+ * 
+ *  This order of response creation must be strictly followed. But,
+ *  once response(s) are created they do not need to be sent
+ *  immediately, nor do they need to be sent in the order they were
+ *  created. The backend may even delete a created response instead of
+ *  sending it by using TRITONBACKEND_ResponseDelete.
+ <p>
+ *  Create a response for a request.
+ * 
+ *  @param response Returns the new response.
+ *  @param request The request.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseNew(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_Request request);
+public static native IntPointer TRITONBACKEND_ResponseNew(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_Request request);
+
+/** Create a response using a factory.
+ * 
+ *  @param response Returns the new response.
+ *  @param factory The response factory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseNewFromFactory(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_ResponseFactory factory);
+public static native IntPointer TRITONBACKEND_ResponseNewFromFactory(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_ResponseFactory factory);
+
+/** Destroy a response. It is not necessary to delete a response if
+ *  TRITONBACKEND_ResponseSend is called as that function transfers
+ *  ownership of the response object to Triton.
+ * 
+ *  @param response The response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseDelete(
+    TRITONBACKEND_Response response);
+
+/** Set a string parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, String name, String value);
+public static native IntBuffer TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const char*") BytePointer value);
+
+/** Set an integer parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const int64_t") long value);
+public static native IntBuffer TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const int64_t") long value);
+
+/** Set an boolean parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const bool") boolean value);
+public static native IntBuffer TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const bool") boolean value);
+
+/** Create an output tensor in the response. The lifetime of the
+ *  returned output tensor object matches that of the response and so
+ *  the output tensor object should not be accessed after the response
+ *  object is deleted.
+ * 
+ *  @param response The response.
+ *  @param output Returns the new response output.
+ *  @param name The name of the output tensor.
+ *  @param datatype The datatype of the output tensor.
+ *  @param shape The shape of the output tensor.
+ *  @param dims_count The number of dimensions in the output tensor
+ *  shape.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
+    String name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native IntPointer TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native IntBuffer TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native int[] TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
+public static native IntPointer TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native IntBuffer TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native int[] TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
+
+/** Send a response. Calling this function transfers ownership of the
+ *  response object to Triton. The caller must not access or delete
+ *  the response object after calling this function.
+ * 
+ *  @param response The response.
+ *  @param send_flags Flags associated with the response. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
+ *  @param error The TRITONSERVER_Error to send if the response is an
+ *  error, or nullptr if the response is successful.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ResponseSend(
+    TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
+    TRITONSERVER_Error error);
+
+/**
+ *  TRITONBACKEND_Backend
+ * 
+ *  Object representing a backend.
+ * 
+ <p>
+ *  TRITONBACKEND_ExecutionPolicy
+ * 
+ *  Types of execution policy that can be implemented by a backend.
+ * 
+ *    TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
+ *      blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
+ *      to handle another inference. Upon returning from
+ *      TRITONBACKEND_ModelInstanceExecute, Triton may immediately
+ *      call TRITONBACKEND_ModelInstanceExecute for the same instance
+ *      to execute a new batch of requests. Thus, most backends using
+ *      this policy will not return from
+ *      TRITONBACKEND_ModelInstanceExecute until all responses have
+ *      been sent and all requests have been released. This is the
+ *      default execution policy.
+ *  */
+public enum TRITONBACKEND_ExecutionPolicy {
+  TRITONBACKEND_EXECUTION_BLOCKING(0);
+
+    public final int value;
+    private TRITONBACKEND_ExecutionPolicy(int v) { this.value = v; }
+    private TRITONBACKEND_ExecutionPolicy(TRITONBACKEND_ExecutionPolicy e) { this.value = e.value; }
+    public TRITONBACKEND_ExecutionPolicy intern() { for (TRITONBACKEND_ExecutionPolicy e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the name of the backend. The caller does not own the returned
+ *  string and must not modify or delete it. The lifetime of the
+ *  returned string extends only as long as 'backend'.
+ * 
+ *  @param backend The backend.
+ *  @param name Returns the name of the backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") PointerPointer name);
+public static native IntPointer TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native IntBuffer TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native int[] TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the backend configuration.  The 'backend_config' message is
+ *  owned by Triton and should not be modified or freed by the caller.
+ * 
+ *  The backend configuration, as JSON, is:
+ * 
+ *    {
+ *      "cmdline" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param backend The backend.
+ *  @param backend_config Returns the backend configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @Cast("TRITONSERVER_Message**") PointerPointer backend_config);
+public static native IntPointer TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONSERVER_Message backend_config);
+
+/** Get the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
+ * 
+ *  @param backend The backend.
+ *  @param policy Returns the execution policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntPointer policy);
+public static native IntBuffer TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntBuffer policy);
+public static native int[] TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") int[] policy);
+
+/** Set the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
+ *  the backend's execution policy after calling
+ *  TRITONBACKEND_Initialize, so to be recognized changes to the
+ *  execution policy must be made in TRITONBACKEND_Initialize.
+ * 
+ *  @param backend The backend.
+ *  @param policy The execution policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native IntPointer TRITONBACKEND_BackendSetExecutionPolicy(
+    TRITONBACKEND_Backend backend, TRITONBACKEND_ExecutionPolicy policy);
+public static native IntBuffer TRITONBACKEND_BackendSetExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
+
+/** Get the location of the files that make up the backend
+ *  implementation. This location contains the backend shared library
+ *  and any other files located with the shared library. The
+ *  'location' communicated depends on how the backend is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
+ *      made available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory containing this
+ *      backend's artifacts. The returned string is owned by Triton,
+ *      not the caller, and so should not be modified or freed.
+ * 
+ *  @param backend The backend.
+ *  @param artifact_type Returns the artifact type for the backend.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native IntPointer TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native IntBuffer TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native int[] TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Get the memory manager associated with a backend.
+ * 
+ *  @param backend The backend.
+ *  @param manager Returns the memory manager.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_MemoryManager**") PointerPointer manager);
+public static native IntPointer TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONBACKEND_MemoryManager manager);
+
+/** Get the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param backend The backend.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") PointerPointer state);
+public static native IntPointer TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param backend The backend.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_BackendSetState(
+    TRITONBACKEND_Backend backend, Pointer state);
+
+/**
+ *  TRITONBACKEND_Model
+ * 
+ *  Object representing a model implemented using the backend.
+ * 
+ <p>
+ *  Get the name of the model. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param model The model.
+ *  @param name Returns the model name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") PointerPointer name);
+public static native IntPointer TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native IntBuffer TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native int[] TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the version of the model.
+ * 
+ *  @param model The model.
+ *  @param version Returns the model version.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native IntPointer TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongPointer version);
+public static native IntBuffer TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongBuffer version);
+public static native int[] TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") long[] version);
+
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
+ *      available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory in the model repository
+ *      that contains this model's artifacts. The returned string is
+ *      owned by Triton, not the caller, and so should not be modified
+ *      or freed.
+ * 
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the model.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native IntPointer TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native IntBuffer TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native int[] TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. The configuration is available via this call even
+ *  before the model is loaded and so can be used in
+ *  TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
+ *  returns equivalent information but is not useable until after the
+ *  model loads.
+ * 
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native IntPointer TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Whether the backend should attempt to auto-complete the model configuration.
+ *  If true, the model should fill the inputs, outputs, and max batch size in
+ *  the model configuration if incomplete. If the model configuration is
+ *  changed,  the new configuration must be reported to Triton using
+ *  TRITONBACKEND_ModelSetConfig.
+ * 
+ *  @param model The model.
+ *  @param auto_complete_config Returns whether the backend should auto-complete
+ *  the model configuration.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
+public static native IntBuffer TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
+
+/** Set the model configuration in Triton server. Only the inputs, outputs,
+ *  and max batch size can be changed. Any other changes to the model
+ *  configuration will be ignored by Triton. This function can only be called
+ *  from TRITONBACKEND_ModelInitialize, calling in any other context will result
+ *  in an error being returned. The function does not take ownership of the
+ *  message object and so the caller should call TRITONSERVER_MessageDelete to
+ *  release the object once the function returns.
+ * 
+ *  @param model The model.
+ *  @param config_version The format version of the model configuration.
+ *  If the configuration is not represented in the version's format
+ *  then an error will be returned. Currently only version 1 is supported.
+ *  @param model_config The updated model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelSetConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    TRITONSERVER_Message model_config);
+
+/** Get the TRITONSERVER_Server object that this model is being served
+ *  by.
+ * 
+ *  @param model The model.
+ *  @param server Returns the server.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @Cast("TRITONSERVER_Server**") PointerPointer server);
+public static native IntPointer TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONSERVER_Server server);
+
+/** Get the backend used by the model.
+ * 
+ *  @param model The model.
+ *  @param model Returns the backend object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_Backend**") PointerPointer backend);
+public static native IntPointer TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONBACKEND_Backend backend);
+
+/** Get the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param model The model.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") PointerPointer state);
+public static native IntPointer TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param model The model.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ModelSetState(
+    TRITONBACKEND_Model model, Pointer state);
+
+/**
+ *  TRITONBACKEND_ModelInstance
+ * 
+ *  Object representing a model instance implemented using the
+ *  backend.
+ * 
+ <p>
+ *  Get the name of the model instance. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param instance The model instance.
+ *  @param name Returns the instance name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") PointerPointer name);
+public static native IntPointer TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native IntBuffer TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native int[] TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the kind of the model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param kind Returns the instance kind.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    TRITONSERVER_InstanceGroupKind kind);
+
+/** Get the device ID of the model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param device_id Returns the instance device ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntPointer device_id);
+public static native IntBuffer TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntBuffer device_id);
+public static native int[] TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, int[] device_id);
+
+/** Get the host policy setting.  The 'host_policy' message is
+ *  owned by Triton and should not be modified or freed by the caller.
+ * 
+ *  The host policy setting, as JSON, is:
+ * 
+ *    {
+ *      "<host_policy>" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param instance The model instance.
+ *  @param host_policy Returns the host policy setting as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONSERVER_Message**") PointerPointer host_policy);
+public static native IntPointer TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONSERVER_Message host_policy);
+
+/** Whether the model instance is passive.
+ * 
+ *  @param instance The model instance.
+ *  @param is_passive Returns true if the instance is passive, false otherwise
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
+public static native IntBuffer TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
+
+/** Get the number of optimization profiles to be loaded for the instance.
+ * 
+ *  @param instance The model instance.
+ *  @param count Returns the number of optimization profiles.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
+public static native IntBuffer TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
+public static native int[] TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
+
+/** Get the name of optimization profile. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'instance'.
+ * 
+ *  @param instance The model instance.
+ *  @param index The index of the optimization profile. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_ModelInstanceProfileCount.
+ *  @param profile_name Returns the name of the optimization profile
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer profile_name);
+public static native IntPointer TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer profile_name);
+public static native IntBuffer TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer profile_name);
+public static native int[] TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] profile_name);
+
+/** Get the model associated with a model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param backend Returns the model object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Model**") PointerPointer model);
+public static native IntPointer TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Model model);
+
+/** Get the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
+ * 
+ *  @param instance The model instance.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") PointerPointer state);
+public static native IntPointer TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
+ * 
+ *  @param instance The model instance.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceSetState(
+    TRITONBACKEND_ModelInstance instance, Pointer state);
+
+/** Record statistics for an inference request.
+ * 
+ *  Set 'success' true to indicate that the inference request
+ *  completed successfully. In this case all timestamps should be
+ *  non-zero values reported in nanoseconds and should be collected
+ *  using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  Set 'success' to false to indicate that the inference request failed
+ *  to complete successfully. In this case all timestamps values are
+ *  ignored.
+ * 
+ *  For consistency of measurement across different backends, the
+ *  timestamps should be collected at the following points during
+ *  TRITONBACKEND_ModelInstanceExecute.
+ * 
+ *    TRITONBACKEND_ModelInstanceExecute()
+ *      CAPTURE TIMESPACE (exec_start_ns)
+ *      < process input tensors to prepare them for inference
+ *        execution, including copying the tensors to/from GPU if
+ *        necessary>
+ *      CAPTURE TIMESPACE (compute_start_ns)
+ *      < perform inference computations to produce outputs >
+ *      CAPTURE TIMESPACE (compute_end_ns)
+ *      < allocate output buffers and extract output tensors, including
+ *        copying the tensors to/from GPU if necessary>
+ *      CAPTURE TIMESPACE (exec_end_ns)
+ *      return
+ * 
+ *  Note that these statistics are associated with a valid
+ *  TRITONBACKEND_Request object and so must be reported before the
+ *  request is released. For backends that release the request before
+ *  all response(s) are sent, these statistics cannot capture
+ *  information about the time required to produce the response.
+ * 
+ *  @param instance The model instance.
+ *  @param request The inference request that statistics are being
+ *  reported for.
+ *  @param success True if the inference request completed
+ *  successfully, false if it failed to complete.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceReportStatistics(
+    TRITONBACKEND_ModelInstance instance, TRITONBACKEND_Request request,
+    @Cast("const bool") boolean success, @Cast("const uint64_t") long exec_start_ns,
+    @Cast("const uint64_t") long compute_start_ns, @Cast("const uint64_t") long compute_end_ns,
+    @Cast("const uint64_t") long exec_end_ns);
+
+/** Record statistics for the execution of an entire batch of
+ *  inference requests.
+ * 
+ *  All timestamps should be non-zero values reported in nanoseconds
+ *  and should be collected using
+ *  std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  See TRITONBACKEND_ModelInstanceReportStatistics for more information about
+ *  the timestamps.
+ * 
+ *  'batch_size' is the sum of the batch sizes for the individual
+ *  requests that were delivered together in the call to
+ *  TRITONBACKEND_ModelInstanceExecute. For example, if three requests
+ *  are passed to TRITONBACKEND_ModelInstanceExecute and those
+ *  requests have batch size 1, 2, and 3; then 'batch_size' should be
+ *  set to 6.
+ * 
+ *  @param instance The model instance.
+ *  @param batch_size Combined batch size of all the individual
+ *  requests executed in the batch.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+
+///
+///
+///
+public static native IntPointer TRITONBACKEND_ModelInstanceReportBatchStatistics(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint64_t") long batch_size,
+    @Cast("const uint64_t") long exec_start_ns, @Cast("const uint64_t") long compute_start_ns,
+    @Cast("const uint64_t") long compute_end_ns, @Cast("const uint64_t") long exec_end_ns);
+
+
+/**
+ *  The following functions can be implemented by a backend. Functions
+ *  indicated as required must be implemented or the backend will fail
+ *  to load.
+ * 
+ <p>
+ *  Initialize a backend. This function is optional, a backend is not
+ *  required to implement it. This function is called once when a
+ *  backend is loaded to allow the backend to initialize any state
+ *  associated with the backend. A backend has a single state that is
+ *  shared across all models that use the backend.
+ * 
+ *  @param backend The backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_Initialize(
+    TRITONBACKEND_Backend backend);
+
+/** Finalize for a backend. This function is optional, a backend is
+ *  not required to implement it. This function is called once, just
+ *  before the backend is unloaded. All state associated with the
+ *  backend should be freed and any threads created for the backend
+ *  should be exited/joined before returning from this function.
+ * 
+ *  @param backend The backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_Finalize(
+    TRITONBACKEND_Backend backend);
+
+/** Initialize for a model. This function is optional, a backend is
+ *  not required to implement it. This function is called once when a
+ *  model that uses the backend is loaded to allow the backend to
+ *  initialize any state associated with the model. The backend should
+ *  also examine the model configuration to determine if the
+ *  configuration is suitable for the backend. Any errors reported by
+ *  this function will prevent the model from loading.
+ * 
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInitialize(
+    TRITONBACKEND_Model model);
+
+/** Finalize for a model. This function is optional, a backend is not
+ *  required to implement it. This function is called once for a
+ *  model, just before the model is unloaded from Triton. All state
+ *  associated with the model should be freed and any threads created
+ *  for the model should be exited/joined before returning from this
+ *  function.
+ * 
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelFinalize(
+    TRITONBACKEND_Model model);
+
+/** Initialize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once when a model instance is created to allow the backend to
+ *  initialize any state associated with the instance.
+ * 
+ *  @param instance The model instance.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceInitialize(
+    TRITONBACKEND_ModelInstance instance);
+
+/** Finalize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once for an instance, just before the corresponding model is
+ *  unloaded from Triton. All state associated with the instance
+ *  should be freed and any threads created for the instance should be
+ *  exited/joined before returning from this function.
+ * 
+ *  @param instance The model instance.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceFinalize(
+    TRITONBACKEND_ModelInstance instance);
+
+/** Execute a batch of one or more requests on a model instance. This
+ *  function is required. Triton will not perform multiple
+ *  simultaneous calls to this function for a given model 'instance';
+ *  however, there may be simultaneous calls for different model
+ *  instances (for the same or different models).
+ * 
+ *  If an error is returned the ownership of the request objects
+ *  remains with Triton and the backend must not retain references to
+ *  the request objects or access them in any way.
+ * 
+ *  If success is returned, ownership of the request objects is
+ *  transferred to the backend and it is then responsible for creating
+ *  responses and releasing the request objects.
+ * 
+ *  @param instance The model instance.
+ *  @param requests The requests.
+ *  @param request_count The number of requests in the batch.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Request**") PointerPointer requests,
+    @Cast("const uint32_t") int request_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Request requests,
+    @Cast("const uint32_t") int request_count);
+
+
+// #ifdef __cplusplus
+// #endif
+
+
+// Parsed from tritonrepoagent.h
+
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+// #include "triton/core/tritonserver.h"
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONREPOAGENT
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllimport)
+// #elif defined(__GNUC__)
+// #define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONREPOAGENT_ISPEC
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllexport)
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// Targeting ../tritonserver/TRITONREPOAGENT_Agent.java
+
+
+// Targeting ../tritonserver/TRITONREPOAGENT_AgentModel.java
+
+
+
+/**
+ *  TRITONREPOAGENT API Version
+ * 
+ *  The TRITONREPOAGENT API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  repository agent should check that the API version used to compile
+ *  the agent is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the agent.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton repository agent API version does not support this agent");
+ *    }
+ *  */
+public static final int TRITONREPOAGENT_API_VERSION_MAJOR = 0;
+
+///
+public static final int TRITONREPOAGENT_API_VERSION_MINOR = 1;
+
+/** Get the TRITONREPOAGENT API version supported by Triton. This
+ *  value can be compared against the
+ *  TRITONREPOAGENT_API_VERSION_MAJOR and
+ *  TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
+ *  ensure that Triton is compatible with the agent.
+ * 
+ *  @param major Returns the TRITONREPOAGENT API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONREPOAGENT API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONREPOAGENT_ArtifactType
+ * 
+ *  The ways that the files that make up a model's repository content
+ *  are communicated between Triton and the agent.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a locally
+ *      accessible filesystem. The agent can access these files using
+ *      an appropriate filesystem API.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a remote filesystem.
+ *      The remote filesystem path follows the same convention as is used for
+ *      repository paths, for example, "s3://" prefix indicates an S3 path.
+ *  */
+public enum TRITONREPOAGENT_ArtifactType {
+  TRITONREPOAGENT_ARTIFACT_FILESYSTEM(0),
+  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM(1);
+
+    public final int value;
+    private TRITONREPOAGENT_ArtifactType(int v) { this.value = v; }
+    private TRITONREPOAGENT_ArtifactType(TRITONREPOAGENT_ArtifactType e) { this.value = e.value; }
+    public TRITONREPOAGENT_ArtifactType intern() { for (TRITONREPOAGENT_ArtifactType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** TRITONREPOAGENT_ActionType
+ * 
+ *  Types of repository actions that can be handled by an agent.
+ *  The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
+ *  TRITONREPOAGENT_ModelInitialize and ends with a call to
+ *  TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
+ *  state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
+ *  Possible lifecycles are:
+ * 
+ *  LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
+ *  LOAD -> LOAD_FAIL
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
+ *      successfully and the model is now loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
+ *      successfully. The model is not loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
+ *  */
+public enum TRITONREPOAGENT_ActionType {
+  TRITONREPOAGENT_ACTION_LOAD(0),
+  TRITONREPOAGENT_ACTION_LOAD_COMPLETE(1),
+  TRITONREPOAGENT_ACTION_LOAD_FAIL(2),
+  TRITONREPOAGENT_ACTION_UNLOAD(3),
+  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE(4);
+
+    public final int value;
+    private TRITONREPOAGENT_ActionType(int v) { this.value = v; }
+    private TRITONREPOAGENT_ActionType(TRITONREPOAGENT_ActionType e) { this.value = e.value; }
+    public TRITONREPOAGENT_ActionType intern() { for (TRITONREPOAGENT_ActionType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to the agent as indicated by 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to the agent via the local
+ *      filesytem. 'location' returns the full path to the directory
+ *      in the model repository that contains the model's
+ *      artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The
+ *      contents of the directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents. Use
+ *      TRITONREPOAGENT_RepositoryAcquire to get a location that can be
+ *      used to modify the model repository contents.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to the agent via a remote filesystem.
+ *      'location' returns the full path to the remote directory that contains
+ *      the model's artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The contents of
+ *      the remote directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents.
+ *      Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
+ *      that can be used to write updated model repository contents.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntBuffer artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") int[] artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Acquire a location where the agent can produce a new version of
+ *  the model repository files. This is a convenience method to create
+ *  a temporary directory for the agent. The agent is responsible for
+ *  calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
+ *  TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
+ *  acquired location is empty. The 'location' communicated depends on
+ *  the requested 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
+ *      on the local filesystem. 'location' returns the full path to
+ *      an empty directory that the agent should populate with the
+ *      model's artifacts. The returned location string is owned by
+ *      Triton, not the agent, and so should not be modified or freed.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Discard and release ownership of a previously acquired location
+ *  and its contents. The agent must not access or modify the location
+ *  or its contents after this call.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param path The location to release.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const char*") BytePointer location);
+
+/** Inform Triton that the specified repository location should be used for
+ *  the model in place of the original model repository. This method can only be
+ *  called when TRITONREPOAGENT_ModelAction is invoked with
+ *  TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
+ *  communicated depends on how the repository is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to Triton via the local filesytem. 'location' returns
+ *      the full path to the directory. Ownership of the contents of the
+ *      returned directory are transferred to Triton and the agent should not
+ *      modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
+ *      The local filesystem directory can be created using
+ *      TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
+ *      its own local filesystem API.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to Triton via a remote filesystem. 'location' returns
+ *      the full path to the remote filesystem directory. Ownership of the
+ *      contents of the returned directory are transferred to Triton and
+ *      the agent should not modified or freed the contents until
+ *      TRITONREPOAGENT_ModelFinalize.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
+
+/** Get the number of agent parameters defined for a model.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") int[] count);
+
+/** Get a parameter name and value. The caller does not own the
+ *  returned strings and must not modify or delete them.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param index The index of the parameter. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONREPOAGENT_ModelParameterCount.
+ *  @param parameter_name Returns the name of the parameter.
+ *  @param parameter_value Returns the value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") PointerPointer parameter_name,
+    @Cast("const char**") PointerPointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer parameter_name,
+    @Cast("const char**") @ByPtrPtr BytePointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer parameter_name,
+    @Cast("const char**") @ByPtrPtr ByteBuffer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] parameter_name,
+    @Cast("const char**") @ByPtrPtr byte[] parameter_value);
+
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. If the model repository does not contain a
+ *  config.pbtxt file then 'model_config' is returned as nullptr.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Get the user-specified state associated with the model.
+ * 
+ *  @param model The agent model.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model.
+ * 
+ *  @param model The agent model.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelSetState(
+    TRITONREPOAGENT_AgentModel model, Pointer state);
+
+/** Get the user-specified state associated with the agent.
+ * 
+ *  @param agent The agent.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the agent.
+ * 
+ *  @param agent The agent.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_SetState(
+    TRITONREPOAGENT_Agent agent, Pointer state);
+
+/**
+ *  The following functions can be implemented by an agent. Functions
+ *  indicated as required must be implemented or the agent will fail
+ *  to load.
+ * 
+ <p>
+ *  Initialize an agent. This function is optional. This function is
+ *  called once when an agent is loaded to allow the agent to
+ *  initialize any state associated with the agent. An agent has a
+ *  single state that is shared across all invocations of the agent.
+ * 
+ *  @param agent The agent.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_Initialize(
+    TRITONREPOAGENT_Agent agent);
+
+/** Finalize for an agent. This function is optional. This function is
+ *  called once, just before the agent is unloaded. All state
+ *  associated with the agent should be freed and any threads created
+ *  for the agent should be exited/joined before returning from this
+ *  function.
+ * 
+ *  @param agent The agent.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_Finalize(
+    TRITONREPOAGENT_Agent agent);
+
+/** Initialize a model associated with an agent. This function is optional.
+ *  This function is called once when an agent model's lifecycle begins to allow
+ *  the agent model to initialize any state associated with it. An agent model
+ *  has a single state that is shared across all the lifecycle of the agent
+ *  model.
+ * 
+ *  @param agent The agent to be associated with the model.
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelInitialize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+
+/** Finalize for a model. This function is optional. This function is
+ *  called once, just before the end of the agent model's lifecycle. All state
+ *  associated with the agent model should be freed and any threads created
+ *  for the agent model should be exited/joined before returning from this
+ *  function. If the model acquired a model location using
+ *  TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
+ *  TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
+ * 
+ *  @param agent The agent associated with the model.
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelFinalize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+
+/** Handle an action for a specified model. This function is
+ *  required. Triton will not perform multiple simultaneous calls to
+ *  this function for a given agent and model; however, there may be
+ *  simultaneous calls for the agent for different models.
+ * 
+ *  If the agent does not handle the action the agent should
+ *  immediately return success (nullptr).
+ * 
+ *  Any modification to the model's repository must be made when 'action_type'
+ *  is TRITONREPOAGENT_ACTION_LOAD.
+ *  To modify the model's repository the agent must either acquire a mutable
+ *  location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
+ *  or its own managed location, report the location to Triton via
+ *  TRITONREPOAGENT_ModelRepositoryUpdate, and then return
+ *  success (nullptr). If the agent does not need to make any changes
+ *  to the model repository it should not call
+ *  TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
+ *  To indicate that a model load should fail return a non-success status.
+ * 
+ *  @param agent The agent.
+ *  @param model The model that is the target of the action.
+ *  \action_type The type of action the agent should handle for the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ActionType action_type);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ActionType") int action_type);
+
+// #ifdef __cplusplus
+// #endif
+
+
+// Parsed from tritonserver.h
+
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+/** \file */
+
+// #include <stdbool.h>
+// #include <stddef.h>
+// #include <stdint.h>
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONSERVER
+// #if defined(_MSC_VER)
+// #define TRITONSERVER_DECLSPEC __declspec(dllexport)
+// #elif defined(__GNUC__)
+// #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
+// #else
+// #define TRITONSERVER_DECLSPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONSERVER_DECLSPEC __declspec(dllimport)
+// #else
+// #define TRITONSERVER_DECLSPEC
+// Targeting ../tritonserver/TRITONSERVER_Error.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequest.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponse.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceTrace.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Message.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Metrics.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocator.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Server.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ServerOptions.java
+
+
+
+/**
+ *  TRITONSERVER API Version
+ * 
+ *  The TRITONSERVER API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  client should check that the API version used to compile the
+ *  client is compatible with the API version of the Triton shared
+ *  library that it is linking against. This is typically done by code
+ *  similar to the following which makes sure that the major versions
+ *  are equal and that the minor version of the Triton shared library
+ *  is >= the minor version used to build the client.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton server API version does not support this client");
+ *    }
+ *  */
+public static final int TRITONSERVER_API_VERSION_MAJOR = 1;
+
+///
+public static final int TRITONSERVER_API_VERSION_MINOR = 3;
+
+/** Get the TRITONBACKEND API version supported by the Triton shared
+ *  library. This value can be compared against the
+ *  TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
+ *  used to build the client to ensure that Triton shared library is
+ *  compatible with the client.
+ * 
+ *  @param major Returns the TRITONSERVER API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONSERVER API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONSERVER_DataType
+ * 
+ *  Tensor data types recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_DataType {
+  TRITONSERVER_TYPE_INVALID(0),
+  TRITONSERVER_TYPE_BOOL(1),
+  TRITONSERVER_TYPE_UINT8(2),
+  TRITONSERVER_TYPE_UINT16(3),
+  TRITONSERVER_TYPE_UINT32(4),
+  TRITONSERVER_TYPE_UINT64(5),
+  TRITONSERVER_TYPE_INT8(6),
+  TRITONSERVER_TYPE_INT16(7),
+  TRITONSERVER_TYPE_INT32(8),
+  TRITONSERVER_TYPE_INT64(9),
+  TRITONSERVER_TYPE_FP16(10),
+  TRITONSERVER_TYPE_FP32(11),
+  TRITONSERVER_TYPE_FP64(12),
+  TRITONSERVER_TYPE_BYTES(13);
+
+    public final int value;
+    private TRITONSERVER_DataType(int v) { this.value = v; }
+    private TRITONSERVER_DataType(TRITONSERVER_DataType e) { this.value = e.value; }
+    public TRITONSERVER_DataType intern() { for (TRITONSERVER_DataType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a data type. The returned string
+ *  is not owned by the caller and so should not be modified or freed.
+ * 
+ *  @param datatype The data type.
+ *  @return The string representation of the data type. */
+
+///
+public static native String TRITONSERVER_DataTypeString(
+    TRITONSERVER_DataType datatype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_DataTypeString(
+    @Cast("TRITONSERVER_DataType") int datatype);
+
+/** Get the Triton datatype corresponding to a string representation
+ *  of a datatype.
+ * 
+ *  @param dtype The datatype string representation.
+ *  @return The Triton data type or TRITONSERVER_TYPE_INVALID if the
+ *  string does not represent a data type. */
+
+///
+public static native TRITONSERVER_DataType TRITONSERVER_StringToDataType(String dtype);
+public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(@Cast("const char*") BytePointer dtype);
+
+/** Get the size of a Triton datatype in bytes. Zero is returned for
+ *  TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
+ *  returned for TRITONSERVER_TYPE_INVALID.
+ * 
+ *  @param dtype The datatype.
+ *  @return The size of the datatype. */
+
+///
+///
+public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
+public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(@Cast("TRITONSERVER_DataType") int datatype);
+
+/** TRITONSERVER_MemoryType
+ * 
+ *  Types of memory recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_MemoryType {
+  TRITONSERVER_MEMORY_CPU(0),
+  TRITONSERVER_MEMORY_CPU_PINNED(1),
+  TRITONSERVER_MEMORY_GPU(2);
+
+    public final int value;
+    private TRITONSERVER_MemoryType(int v) { this.value = v; }
+    private TRITONSERVER_MemoryType(TRITONSERVER_MemoryType e) { this.value = e.value; }
+    public TRITONSERVER_MemoryType intern() { for (TRITONSERVER_MemoryType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a memory type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param memtype The memory type.
+ *  @return The string representation of the memory type. */
+
+///
+///
+public static native String TRITONSERVER_MemoryTypeString(
+    TRITONSERVER_MemoryType memtype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_MemoryTypeString(
+    @Cast("TRITONSERVER_MemoryType") int memtype);
+
+/** TRITONSERVER_ParameterType
+ * 
+ *  Types of parameters recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_ParameterType {
+  TRITONSERVER_PARAMETER_STRING(0),
+  TRITONSERVER_PARAMETER_INT(1),
+  TRITONSERVER_PARAMETER_BOOL(2);
+
+    public final int value;
+    private TRITONSERVER_ParameterType(int v) { this.value = v; }
+    private TRITONSERVER_ParameterType(TRITONSERVER_ParameterType e) { this.value = e.value; }
+    public TRITONSERVER_ParameterType intern() { for (TRITONSERVER_ParameterType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a parmeter type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param paramtype The parameter type.
+ *  @return The string representation of the parameter type. */
+
+///
+///
+public static native String TRITONSERVER_ParameterTypeString(
+    TRITONSERVER_ParameterType paramtype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_ParameterTypeString(
+    @Cast("TRITONSERVER_ParameterType") int paramtype);
+
+/** TRITONSERVER_InstanceGroupKind
+ * 
+ *  Kinds of instance groups recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_InstanceGroupKind {
+  TRITONSERVER_INSTANCEGROUPKIND_AUTO(0),
+  TRITONSERVER_INSTANCEGROUPKIND_CPU(1),
+  TRITONSERVER_INSTANCEGROUPKIND_GPU(2),
+  TRITONSERVER_INSTANCEGROUPKIND_MODEL(3);
+
+    public final int value;
+    private TRITONSERVER_InstanceGroupKind(int v) { this.value = v; }
+    private TRITONSERVER_InstanceGroupKind(TRITONSERVER_InstanceGroupKind e) { this.value = e.value; }
+    public TRITONSERVER_InstanceGroupKind intern() { for (TRITONSERVER_InstanceGroupKind e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of an instance-group kind. The
+ *  returned string is not owned by the caller and so should not be
+ *  modified or freed.
+ * 
+ *  @param kind The instance-group kind.
+ *  @return The string representation of the kind. */
+
+///
+///
+public static native String TRITONSERVER_InstanceGroupKindString(
+    TRITONSERVER_InstanceGroupKind kind);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InstanceGroupKindString(
+    @Cast("TRITONSERVER_InstanceGroupKind") int kind);
+
+/** TRITONSERVER_Logging
+ * 
+ *  Types/levels of logging.
+ *  */
+public enum TRITONSERVER_LogLevel {
+  TRITONSERVER_LOG_INFO(0),
+  TRITONSERVER_LOG_WARN(1),
+  TRITONSERVER_LOG_ERROR(2),
+  TRITONSERVER_LOG_VERBOSE(3);
+
+    public final int value;
+    private TRITONSERVER_LogLevel(int v) { this.value = v; }
+    private TRITONSERVER_LogLevel(TRITONSERVER_LogLevel e) { this.value = e.value; }
+    public TRITONSERVER_LogLevel intern() { for (TRITONSERVER_LogLevel e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Is a log level enabled?
+ * 
+ *  @param level The log level.
+ *  @return True if the log level is enabled, false if not enabled. */
+
+///
+public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
+    TRITONSERVER_LogLevel level);
+public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
+    @Cast("TRITONSERVER_LogLevel") int level);
+
+/** Log a message at a given log level if that level is enabled.
+ * 
+ *  @param level The log level.
+ *  @param filename The file name of the location of the log message.
+ *  @param line The line number of the log message.
+ *  @param msg The log message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    TRITONSERVER_LogLevel level, String filename, int line,
+    String msg);
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    @Cast("TRITONSERVER_LogLevel") int level, @Cast("const char*") BytePointer filename, int line,
+    @Cast("const char*") BytePointer msg);
+
+/** TRITONSERVER_Error
+ * 
+ *  Errors are reported by a TRITONSERVER_Error object. A NULL
+ *  TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
+ *  indicates error and the code and message for the error can be
+ *  retrieved from the object.
+ * 
+ *  The caller takes ownership of a TRITONSERVER_Error object returned by
+ *  the API and must call TRITONSERVER_ErrorDelete to release the object.
+ * 
+ <p>
+ *  The TRITONSERVER_Error error codes */
+public enum TRITONSERVER_Error_Code {
+  TRITONSERVER_ERROR_UNKNOWN(0),
+  TRITONSERVER_ERROR_INTERNAL(1),
+  TRITONSERVER_ERROR_NOT_FOUND(2),
+  TRITONSERVER_ERROR_INVALID_ARG(3),
+  TRITONSERVER_ERROR_UNAVAILABLE(4),
+  TRITONSERVER_ERROR_UNSUPPORTED(5),
+  TRITONSERVER_ERROR_ALREADY_EXISTS(6);
+
+    public final int value;
+    private TRITONSERVER_Error_Code(int v) { this.value = v; }
+    private TRITONSERVER_Error_Code(TRITONSERVER_Error_Code e) { this.value = e.value; }
+    public TRITONSERVER_Error_Code intern() { for (TRITONSERVER_Error_Code e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Create a new error object. The caller takes ownership of the
+ *  TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
+ *  release the object.
+ * 
+ *  @param code The error code.
+ *  @param msg The error message.
+ *  @return A new TRITONSERVER_Error object. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    TRITONSERVER_Error_Code code, String msg);
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    @Cast("TRITONSERVER_Error_Code") int code, @Cast("const char*") BytePointer msg);
+
+/** Delete an error object.
+ * 
+ *  @param error The error object. */
+
+///
+public static native void TRITONSERVER_ErrorDelete(TRITONSERVER_Error error);
+
+/** Get the error code.
+ * 
+ *  @param error The error object.
+ *  @return The error code. */
+
+///
+public static native TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
+
+/** Get the string representation of an error code. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed. The lifetime of the returned string extends only as long as
+ *  'error' and must not be accessed once 'error' is deleted.
+ * 
+ *  @param error The error object.
+ *  @return The string representation of the error code. */
+
+///
+public static native String TRITONSERVER_ErrorCodeString(
+    TRITONSERVER_Error error);
+
+/** Get the error message. The returned string is not owned by the
+ *  caller and so should not be modified or freed. The lifetime of the
+ *  returned string extends only as long as 'error' and must not be
+ *  accessed once 'error' is deleted.
+ * 
+ *  @param error The error object.
+ *  @return The error message. */
+
+///
+///
+///
+public static native String TRITONSERVER_ErrorMessage(
+    TRITONSERVER_Error error);
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
+
+
+
+/** Create a new response allocator object.
+ * 
+ *  The response allocator object is used by Triton to allocate
+ *  buffers to hold the output tensors in inference responses. Most
+ *  models generate a single response for each inference request
+ *  (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
+ *  callbacks will be:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn : optional (and typically not required)
+ *     - alloc_fn : called once for each output tensor in response
+ *    TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in response
+ * 
+ *  For models that generate multiple responses for each inference
+ *  request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
+ *  used to determine sets of alloc_fn callbacks that belong to the
+ *  same response:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *       ...
+ *    For each response, TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in the response
+ * 
+ *  In all cases the start_fn, alloc_fn and release_fn callback
+ *  functions must be thread-safe. Typically making these functions
+ *  thread-safe does not require explicit locking. The recommended way
+ *  to implement these functions is to have each inference request
+ *  provide a 'response_allocator_userp' object that is unique to that
+ *  request with TRITONSERVER_InferenceRequestSetResponseCallback. The
+ *  callback functions then operate only on this unique state. Locking
+ *  is required only when the callback function needs to access state
+ *  that is shared across inference requests (for example, a common
+ *  allocation pool).
+ * 
+ *  @param allocator Returns the new response allocator object.
+ *  @param alloc_fn The function to call to allocate buffers for result
+ *  tensors.
+ *  @param release_fn The function to call when the server no longer
+ *  holds a reference to an allocated buffer.
+ *  @param start_fn The function to call to indicate that the
+ *  subsequent 'alloc_fn' calls are for a new response. This callback
+ *  is optional (use nullptr to indicate that it should not be
+ *  invoked).
+ <p>
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @Cast("TRITONSERVER_ResponseAllocator**") PointerPointer allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @ByPtrPtr TRITONSERVER_ResponseAllocator allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+
+/** Delete a response allocator.
+ * 
+ *  @param allocator The response allocator object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorDelete(
+    TRITONSERVER_ResponseAllocator allocator);
+
+/** TRITONSERVER_Message
+ * 
+ *  Object representing a Triton Server message.
+ * 
+ <p>
+ *  Create a new message object from serialized JSON string.
+ * 
+ *  @param message The message object.
+ *  @param base The base of the serialized JSON.
+ *  @param byte_size The size, in bytes, of the serialized message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @Cast("TRITONSERVER_Message**") PointerPointer message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, @Cast("const char*") BytePointer base, @Cast("size_t") long byte_size);
+
+/** Delete a message object.
+ * 
+ *  @param message The message object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageDelete(
+    TRITONSERVER_Message message);
+
+/** Get the base and size of the buffer containing the serialized
+ *  message in JSON format. The buffer is owned by the
+ *  TRITONSERVER_Message object and should not be modified or freed by
+ *  the caller. The lifetime of the buffer extends only as long as
+ *  'message' and must not be accessed once 'message' is deleted.
+ * 
+ *  @param message The message object.
+ *  @param base Returns the base of the serialized message.
+ *  @param byte_size Returns the size, in bytes, of the serialized
+ *  message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+
+/** TRITONSERVER_Metrics
+ * 
+ *  Object representing metrics.
+ * 
+ <p>
+ *  Metric format types */
+public enum TRITONSERVER_MetricFormat {
+  TRITONSERVER_METRIC_PROMETHEUS(0);
+
+    public final int value;
+    private TRITONSERVER_MetricFormat(int v) { this.value = v; }
+    private TRITONSERVER_MetricFormat(TRITONSERVER_MetricFormat e) { this.value = e.value; }
+    public TRITONSERVER_MetricFormat intern() { for (TRITONSERVER_MetricFormat e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Delete a metrics object.
+ * 
+ *  @param metrics The metrics object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MetricsDelete(
+    TRITONSERVER_Metrics metrics);
+
+/** Get a buffer containing the metrics in the specified format. For
+ *  each format the buffer contains the following:
+ * 
+ *    TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
+ *    string (char*) that gives a text representation of the metrics in
+ *    prometheus format. 'byte_size' returns the length of the string
+ *    in bytes.
+ * 
+ *  The buffer is owned by the 'metrics' object and should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  extends only as long as 'metrics' and must not be accessed once
+ *  'metrics' is deleted.
+ * 
+ *  @param metrics The metrics object.
+ *  @param format The format to use for the returned metrics.
+ *  @param base Returns a pointer to the base of the formatted
+ *  metrics, as described above.
+ *  @param byte_size Returns the size, in bytes, of the formatted
+ *  metrics.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+
+/** TRITONSERVER_InferenceTrace
+ * 
+ *  Object that represents tracing for an inference request.
+ * 
+ <p>
+ *  Trace levels */
+public enum TRITONSERVER_InferenceTraceLevel {
+  TRITONSERVER_TRACE_LEVEL_DISABLED(0),
+  TRITONSERVER_TRACE_LEVEL_MIN(1),
+  TRITONSERVER_TRACE_LEVEL_MAX(2);
+
+    public final int value;
+    private TRITONSERVER_InferenceTraceLevel(int v) { this.value = v; }
+    private TRITONSERVER_InferenceTraceLevel(TRITONSERVER_InferenceTraceLevel e) { this.value = e.value; }
+    public TRITONSERVER_InferenceTraceLevel intern() { for (TRITONSERVER_InferenceTraceLevel e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a trace level. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param level The trace level.
+ *  @return The string representation of the trace level. */
+public static native String TRITONSERVER_InferenceTraceLevelString(
+    TRITONSERVER_InferenceTraceLevel level);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceLevelString(
+    @Cast("TRITONSERVER_InferenceTraceLevel") int level);
+
+// Trace activities
+public enum TRITONSERVER_InferenceTraceActivity {
+  TRITONSERVER_TRACE_REQUEST_START(0),
+  TRITONSERVER_TRACE_QUEUE_START(1),
+  TRITONSERVER_TRACE_COMPUTE_START(2),
+  TRITONSERVER_TRACE_COMPUTE_INPUT_END(3),
+  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START(4),
+  TRITONSERVER_TRACE_COMPUTE_END(5),
+  TRITONSERVER_TRACE_REQUEST_END(6);
+
+    public final int value;
+    private TRITONSERVER_InferenceTraceActivity(int v) { this.value = v; }
+    private TRITONSERVER_InferenceTraceActivity(TRITONSERVER_InferenceTraceActivity e) { this.value = e.value; }
+    public TRITONSERVER_InferenceTraceActivity intern() { for (TRITONSERVER_InferenceTraceActivity e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a trace activity. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param activity The trace activity.
+ *  @return The string representation of the trace activity. */
+public static native String TRITONSERVER_InferenceTraceActivityString(
+    TRITONSERVER_InferenceTraceActivity activity);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceActivityString(
+    @Cast("TRITONSERVER_InferenceTraceActivity") int activity);
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
+
+
+
+/** Create a new inference trace object. The caller takes ownership of
+ *  the TRITONSERVER_InferenceTrace object and must call
+ *  TRITONSERVER_InferenceTraceDelete to release the object.
+ * 
+ *  The activity callback function will be called to report activity
+ *  for 'trace' as well as for any child traces that are spawned by
+ *  'trace', and so the activity callback must check the trace object
+ *  to determine specifically what activity is being reported.
+ * 
+ *  The release callback is called for both 'trace' and for any child
+ *  traces spawned by 'trace'.
+ * 
+ *  @param trace Returns the new inference trace object.
+ *  @param level The tracing level.
+ *  @param parent_id The parent trace id for this trace. A value of 0
+ *  indicates that there is not parent trace.
+ *  @param activity_fn The callback function where activity for the
+ *  trace is reported.
+ *  @param release_fn The callback function called when all activity
+ *  is complete for the trace.
+ *  @param trace_userp User-provided pointer that is delivered to
+ *  the activity and release callback functions.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, TRITONSERVER_InferenceTraceLevel level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @ByPtrPtr TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceLevel level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @ByPtrPtr TRITONSERVER_InferenceTrace trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+
+/** Delete a trace object.
+ * 
+ *  @param trace The trace object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceDelete(
+    TRITONSERVER_InferenceTrace trace);
+
+/** Get the id associated with a trace. Every trace is assigned an id
+ *  that is unique across all traces created for a Triton server.
+ * 
+ *  @param trace The trace.
+ *  @param id Returns the id associated with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] id);
+
+/** Get the parent id associated with a trace. The parent id indicates
+ *  a parent-child relationship between two traces. A parent id value
+ *  of 0 indicates that there is no parent trace.
+ * 
+ *  @param trace The trace.
+ *  @param id Returns the parent id associated with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] parent_id);
+
+/** Get the name of the model associated with a trace. The caller does
+ *  not own the returned string and must not modify or delete it. The
+ *  lifetime of the returned string extends only as long as 'trace'.
+ * 
+ *  @param trace The trace.
+ *  @param model_name Returns the name of the model associated with
+ *  the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") PointerPointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr BytePointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr ByteBuffer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr byte[] model_name);
+
+/** Get the version of the model associated with a trace.
+ * 
+ *  @param trace The trace.
+ *  @param model_version Returns the version of the model associated
+ *  with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") long[] model_version);
+
+/** TRITONSERVER_InferenceRequest
+ * 
+ *  Object representing an inference request. The inference request
+ *  provides the meta-data and input tensor values needed for an
+ *  inference and returns the inference result meta-data and output
+ *  tensors. An inference request object can be modified and reused
+ *  multiple times.
+ * 
+ <p>
+ *  Inference request flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_RequestFlag {
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START(1),
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END(2);
+
+    public final int value;
+    private TRITONSERVER_RequestFlag(int v) { this.value = v; }
+    private TRITONSERVER_RequestFlag(TRITONSERVER_RequestFlag e) { this.value = e.value; }
+    public TRITONSERVER_RequestFlag intern() { for (TRITONSERVER_RequestFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Inference request release flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_RequestReleaseFlag {
+  TRITONSERVER_REQUEST_RELEASE_ALL(1);
+
+    public final int value;
+    private TRITONSERVER_RequestReleaseFlag(int v) { this.value = v; }
+    private TRITONSERVER_RequestReleaseFlag(TRITONSERVER_RequestReleaseFlag e) { this.value = e.value; }
+    public TRITONSERVER_RequestReleaseFlag intern() { for (TRITONSERVER_RequestReleaseFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Inference response complete flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_ResponseCompleteFlag {
+  TRITONSERVER_RESPONSE_COMPLETE_FINAL(1);
+
+    public final int value;
+    private TRITONSERVER_ResponseCompleteFlag(int v) { this.value = v; }
+    private TRITONSERVER_ResponseCompleteFlag(TRITONSERVER_ResponseCompleteFlag e) { this.value = e.value; }
+    public TRITONSERVER_ResponseCompleteFlag intern() { for (TRITONSERVER_ResponseCompleteFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
+
+
+
+/** Create a new inference request object.
+ * 
+ *  @param inference_request Returns the new request object.
+ *  @param server the inference server object.
+ *  @param model_name The name of the model to use for the request.
+ *  @param model_version The version of the model to use for the
+ *  request. If -1 then the server will choose a version based on the
+ *  model's policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @Cast("TRITONSERVER_InferenceRequest**") PointerPointer inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version);
+
+/** Delete an inference request object.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestDelete(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Get the ID for a request. The returned ID is owned by
+ *  'inference_request' and must not be modified or freed by the
+ *  caller.
+ * 
+ *  @param inference_request The request object.
+ *  @param id Returns the ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") PointerPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr byte[] id);
+
+/** Set the ID for a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param id The ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, String id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer id);
+
+/** Get the flag(s) associated with a request. On return 'flags' holds
+ *  a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
+ * 
+ *  @param inference_request The request object.
+ *  @param flags Returns the flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] flags);
+
+/** Set the flag(s) associated with a request. 'flags' should holds a
+ *  bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
+ * 
+ *  @param inference_request The request object.
+ *  @param flags The flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int flags);
+
+/** Get the correlation ID of the inference request. Default is 0,
+ *  which indictes that the request has no correlation ID. The
+ *  correlation ID is used to indicate two or more inference request
+ *  are related to each other. How this relationship is handled by the
+ *  inference server is determined by the model's scheduling
+ *  policy.
+ * 
+ *  @param inference_request The request object.
+ *  @param correlation_id Returns the correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] correlation_id);
+
+/** Set the correlation ID of the inference request. Default is 0, which
+ *  indictes that the request has no correlation ID. The correlation ID
+ *  is used to indicate two or more inference request are related to
+ *  each other. How this relationship is handled by the inference
+ *  server is determined by the model's scheduling policy.
+ * 
+ *  @param inference_request The request object.
+ *  @param correlation_id The correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long correlation_id);
+
+/** Get the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
+ * 
+ *  @param inference_request The request object.
+ *  @param priority Returns the priority level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] priority);
+
+/** Set the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
+ * 
+ *  @param inference_request The request object.
+ *  @param priority The priority level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int priority);
+
+/** Get the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
+ * 
+ *  @param inference_request The request object.
+ *  @param timeout_us Returns the timeout, in microseconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] timeout_us);
+
+/** Set the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
+ * 
+ *  @param inference_request The request object.
+ *  @param timeout_us The timeout, in microseconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long timeout_us);
+
+/** Add an input to a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param datatype The type of the input. Valid type names are BOOL,
+ *  UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
+ *  FP32, FP64, and BYTES.
+ *  @param shape The shape of the input.
+ *  @param dim_count The number of dimensions of 'shape'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
+
+/** Remove an input from a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove all inputs from a request.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputs(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Assign a buffer of data to an input. The buffer will be appended
+ *  to any existing buffers for that input. The 'inference_request'
+ *  object takes ownership of the buffer and so the caller should not
+ *  modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed
+ *  from 'inference_request'.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id);
+
+/** Assign a buffer of data to an input for execution on all model instances
+ *  with the specified host policy. The buffer will be appended to any existing
+ *  buffers for that input on all devices with this host policy. The
+ *  'inference_request' object takes ownership of the buffer and so the caller
+ *  should not modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed from
+ *  'inference_request'. If the execution is scheduled on a device that does not
+ *  have a input buffer specified using this function, then the input buffer
+ *  specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
+ *  a non-host policy specific version of data must be added using that API.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
+ *  @param host_policy_name All model instances executing with this host_policy
+ *  will use this input buffer for execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id, String host_policy_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id, @Cast("const char*") BytePointer host_policy_name);
+
+/** Clear all input data from an input, releasing ownership of the
+ *  buffer(s) that were appended to the input with
+ *  TRITONSERVER_InferenceRequestAppendInputData or
+ *  TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
+ *  @param inference_request The request object.
+ *  @param name The name of the input. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Add an output request to an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove an output request from an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove all output requests from an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Set the release callback for an inference request. The release
+ *  callback is called by Triton to return ownership of the request
+ *  object.
+ * 
+ *  @param inference_request The request object.
+ *  @param request_release_fn The function called to return ownership
+ *  of the 'inference_request' object.
+ *  @param request_release_userp User-provided pointer that is
+ *  delivered to the 'request_release_fn' callback.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetReleaseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
+    Pointer request_release_userp);
+
+/** Set the allocator and response callback for an inference
+ *  request. The allocator is used to allocate buffers for any output
+ *  tensors included in responses that are produced for this
+ *  request. The response callback is called to return response
+ *  objects representing responses produced for this request.
+ * 
+ *  @param inference_request The request object.
+ *  @param response_allocator The TRITONSERVER_ResponseAllocator to use
+ *  to allocate buffers to hold inference results.
+ *  @param response_allocator_userp User-provided pointer that is
+ *  delivered to the response allocator's start and allocation functions.
+ *  @param response_fn The function called to deliver an inference
+ *  response for this request.
+ *  @param response_userp User-provided pointer that is delivered to
+ *  the 'response_fn' callback.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetResponseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_ResponseAllocator response_allocator,
+    Pointer response_allocator_userp,
+    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
+    Pointer response_userp);
+
+/** TRITONSERVER_InferenceResponse
+ * 
+ *  Object representing an inference response. The inference response
+ *  provides the meta-data and output tensor values calculated by the
+ *  inference.
+ * 
+ <p>
+ *  Delete an inference response object.
+ * 
+ *  @param inference_response The response object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseDelete(
+    TRITONSERVER_InferenceResponse inference_response);
+
+/** Return the error status of an inference response. Return a
+ *  TRITONSERVER_Error object on failure, return nullptr on success.
+ *  The returned error object is owned by 'inference_response' and so
+ *  should not be deleted by the caller.
+ * 
+ *  @param inference_response The response object.
+ *  @return a TRITONSERVER_Error indicating the success or failure
+ *  status of the response. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseError(
+    TRITONSERVER_InferenceResponse inference_response);
+
+/** Get model used to produce a response. The caller does not own the
+ *  returned model name value and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param model_name Returns the name of the model.
+ *  @param model_version Returns the version of the model.
+ *  this response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") PointerPointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr BytePointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr ByteBuffer model_name,
+    @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr byte[] model_name,
+    @Cast("int64_t*") long[] model_version);
+
+/** Get the ID of the request corresponding to a response. The caller
+ *  does not own the returned ID and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param request_id Returns the ID of the request corresponding to
+ *  this response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") PointerPointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr BytePointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr ByteBuffer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr byte[] request_id);
+
+/** Get the number of parameters available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of parameters.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+
+/** Get all information about a parameter. The caller does not own any
+ *  of the returned values and must not modify or delete them. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  The 'vvalue' returns a void* pointer that must be cast
+ *  appropriately based on 'type'. For example:
+ * 
+ *    void* vvalue;
+ *    TRITONSERVER_ParameterType type;
+ *    TRITONSERVER_InferenceResponseParameter(
+ *                      response, index, &name, &type, &vvalue);
+ *    switch (type) {
+ *      case TRITONSERVER_PARAMETER_BOOL:
+ *        bool value = *(reinterpret_cast<bool*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_INT:
+ *        int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_STRING:
+ *        const char* value = reinterpret_cast<const char*>(vvalue);
+ *        ...
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the parameter, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseParameterCount.
+ *  @param name Returns the name of the parameter.
+ *  @param type Returns the type of the parameter.
+ *  @param vvalue Returns a pointer to the parameter value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") PointerPointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_ParameterType*") IntBuffer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_ParameterType*") int[] type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+
+/** Get the number of outputs available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of output tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+
+/** Get all information about an output tensor.  The tensor data is
+ *  returned as the base pointer to the data and the size, in bytes,
+ *  of the data. The caller does not own any of the returned values
+ *  and must not modify or delete them. The lifetime of all returned
+ *  values extends until 'inference_response' is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param name Returns the name of the output.
+ *  @param datatype Returns the type of the output.
+ *  @param shape Returns the shape of the output.
+ *  @param dim_count Returns the number of dimensions of the returned
+ *  shape.
+ *  @param base Returns the tensor data for the output.
+ *  @param byte_size Returns the size, in bytes, of the data.
+ *  @param memory_type Returns the memory type of the data.
+ *  @param memory_type_id Returns the memory type id of the data.
+ *  @param userp The user-specified value associated with the buffer
+ *  in TRITONSERVER_ResponseAllocatorAllocFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") PointerPointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint64_t*") LongBuffer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint64_t*") long[] dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+
+/** Get a classification label associated with an output for a given
+ *  index.  The caller does not own the returned label and must not
+ *  modify or delete it. The lifetime of all returned label extends
+ *  until 'inference_response' is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param class_index The index of the class.
+ *  @param name Returns the label corresponding to 'class_index' or
+ *  nullptr if no label.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") PointerPointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr BytePointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr ByteBuffer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr byte[] label);
+
+
+/** TRITONSERVER_ServerOptions
+ * 
+ *  Options to use when creating an inference server.
+ * 
+ <p>
+ *  Model control modes */
+public enum TRITONSERVER_ModelControlMode {
+  TRITONSERVER_MODEL_CONTROL_NONE(0),
+  TRITONSERVER_MODEL_CONTROL_POLL(1),
+  TRITONSERVER_MODEL_CONTROL_EXPLICIT(2);
+
+    public final int value;
+    private TRITONSERVER_ModelControlMode(int v) { this.value = v; }
+    private TRITONSERVER_ModelControlMode(TRITONSERVER_ModelControlMode e) { this.value = e.value; }
+    public TRITONSERVER_ModelControlMode intern() { for (TRITONSERVER_ModelControlMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Create a new server options object. The caller takes ownership of
+ *  the TRITONSERVER_ServerOptions object and must call
+ *  TRITONSERVER_ServerOptionsDelete to release the object.
+ * 
+ *  @param options Returns the new server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @Cast("TRITONSERVER_ServerOptions**") PointerPointer options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @ByPtrPtr TRITONSERVER_ServerOptions options);
+
+/** Delete a server options object.
+ * 
+ *  @param options The server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsDelete(
+    TRITONSERVER_ServerOptions options);
+
+/** Set the textual ID for the server in a server options. The ID is a
+ *  name that identifies the server.
+ * 
+ *  @param options The server options object.
+ *  @param server_id The server identifier.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, String server_id);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer server_id);
+
+/** Set the model repository path in a server options. The path must be
+ *  the full absolute path to the model repository. This function can be called
+ *  multiple times with different paths to set multiple model repositories.
+ *  Note that if a model is not unique across all model repositories
+ *  at any time, the model will not be available.
+ * 
+ *  @param options The server options object.
+ *  @param model_repository_path The full path to the model repository.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, String model_repository_path);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_repository_path);
+
+/** Set the model control mode in a server options. For each mode the models
+ *  will be managed as the following:
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
+ *    loaded on startup. After startup any changes to the model repository will
+ *    be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
+ *    an error.
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
+ *    loaded on startup. The model repository can be polled periodically using
+ *    TRITONSERVER_ServerPollModelRepository and the server will load, unload,
+ *    and updated models according to changes in the model repository.
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
+ *    not be loaded on startup. The corresponding model control APIs must be
+ *    called to load / unload a model in the model repository.
+ * 
+ *  @param options The server options object.
+ *  @param mode The mode to use for the model control.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
+    TRITONSERVER_ServerOptions options, TRITONSERVER_ModelControlMode mode);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
+    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_ModelControlMode") int mode);
+
+/** Set the model to be loaded at startup in a server options. The model must be
+ *  present in one, and only one, of the specified model repositories.
+ *  This function can be called multiple times with different model name
+ *  to set multiple startup models.
+ *  Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
+ * 
+ *  @param options The server options object.
+ *  @param mode_name The name of the model to load on startup.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_name);
+
+/** Enable or disable strict model configuration handling in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param strict True to enable strict model configuration handling,
+ *  false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictModelConfig(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+
+/** Set the total pinned memory byte size that the server can allocate
+ *  in a server options. The pinned memory pool will be shared across
+ *  Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
+ * 
+ *  @param options The server options object.
+ *  @param size The pinned memory pool byte size.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, @Cast("uint64_t") long size);
+
+/** Set the total CUDA memory byte size that the server can allocate
+ *  on given GPU device in a server options. The pinned memory pool
+ *  will be shared across Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
+ * 
+ *  @param options The server options object.
+ *  @param gpu_device The GPU device to allocate the memory pool.
+ *  @param size The CUDA memory pool byte size.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, int gpu_device, @Cast("uint64_t") long size);
+
+/** Set the minimum support CUDA compute capability in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param cc The minimum CUDA compute capability.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+    TRITONSERVER_ServerOptions options, double cc);
+
+/** Enable or disable exit-on-error in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param exit True to enable exiting on intialization error, false
+ *  to continue.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitOnError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean exit);
+
+/** Enable or disable strict readiness handling in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param strict True to enable strict readiness handling, false to
+ *  disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictReadiness(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+
+/** Set the exit timeout, in seconds, for the server in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param timeout The exit timeout, in seconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitTimeout(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int timeout);
+
+/** Set the number of threads used in buffer manager in a server options.
+ * 
+ *  @param thread_count The number of threads.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int thread_count);
+
+/** Enable or disable info level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable info logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogInfo(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Enable or disable warning level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable warning logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogWarn(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Enable or disable error level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable error logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Set verbose logging level. Level zero disables verbose logging.
+ * 
+ *  @param options The server options object.
+ *  @param level The verbose logging level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogVerbose(
+    TRITONSERVER_ServerOptions options, int level);
+
+/** Enable or disable metrics collection in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param metrics True to enable metrics, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean metrics);
+
+/** Enable or disable GPU metrics collection in a server options. GPU
+ *  metrics are collected if both this option and
+ *  TRITONSERVER_ServerOptionsSetMetrics are true.
+ * 
+ *  @param options The server options object.
+ *  @param gpu_metrics True to enable GPU metrics, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetGpuMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean gpu_metrics);
+
+/** Set the directory containing backend shared libraries. This
+ *  directory is searched last after the version and model directory
+ *  in the model repository when looking for the backend shared
+ *  library for a model. If the backend is named 'be' the directory
+ *  searched is 'backend_dir'/be/libtriton_be.so.
+ * 
+ *  @param options The server options object.
+ *  @param backend_dir The full path of the backend directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, String backend_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_dir);
+
+/** Set the directory containing repository agent shared libraries. This
+ *  directory is searched when looking for the repository agent shared
+ *  library for a model. If the backend is named 'ra' the directory
+ *  searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
+ * 
+ *  @param options The server options object.
+ *  @param repoagent_dir The full path of the repository agent directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, String repoagent_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer repoagent_dir);
+
+/** Set a configuration setting for a named backend in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param backend_name The name of the backend.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, String backend_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+
+/** Set a host policy setting for a given policy name in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param policy_name The name of the policy.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, String policy_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer policy_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+
+/** TRITONSERVER_Server
+ * 
+ *  An inference server.
+ * 
+ <p>
+ *  Model batch flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_ModelBatchFlag {
+  TRITONSERVER_BATCH_UNKNOWN(1),
+  TRITONSERVER_BATCH_FIRST_DIM(2);
+
+    public final int value;
+    private TRITONSERVER_ModelBatchFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelBatchFlag(TRITONSERVER_ModelBatchFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelBatchFlag intern() { for (TRITONSERVER_ModelBatchFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Model index flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_ModelIndexFlag {
+  TRITONSERVER_INDEX_FLAG_READY(1);
+
+    public final int value;
+    private TRITONSERVER_ModelIndexFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelIndexFlag(TRITONSERVER_ModelIndexFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelIndexFlag intern() { for (TRITONSERVER_ModelIndexFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Model transaction policy flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_ModelTxnPropertyFlag {
+  TRITONSERVER_TXN_ONE_TO_ONE(1),
+  TRITONSERVER_TXN_DECOUPLED(2);
+
+    public final int value;
+    private TRITONSERVER_ModelTxnPropertyFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelTxnPropertyFlag(TRITONSERVER_ModelTxnPropertyFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelTxnPropertyFlag intern() { for (TRITONSERVER_ModelTxnPropertyFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Create a new server object. The caller takes ownership of the
+ *  TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
+ *  to release the object.
+ * 
+ *  @param server Returns the new inference server object.
+ *  @param options The inference server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @Cast("TRITONSERVER_Server**") PointerPointer server, TRITONSERVER_ServerOptions options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @ByPtrPtr TRITONSERVER_Server server, TRITONSERVER_ServerOptions options);
+
+/** Delete a server object. If server is not already stopped it is
+ *  stopped before being deleted.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerDelete(
+    TRITONSERVER_Server server);
+
+/** Stop a server object. A server can't be restarted once it is
+ *  stopped.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerStop(
+    TRITONSERVER_Server server);
+
+/** Check the model repository for changes and update server state
+ *  based on those changes.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server server);
+
+/** Is the server live?
+ * 
+ *  @param server The inference server object.
+ *  @param live Returns true if server is live, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] live);
+
+/** Is the server ready?
+ * 
+ *  @param server The inference server object.
+ *  @param ready Returns true if server is ready, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] ready);
+
+/** Is the model ready?
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model to get readiness for.
+ *  @param model_version The version of the model to get readiness
+ *  for.  If -1 then the server will choose a version based on the
+ *  model's policy.
+ *  @param ready Returns true if server is ready, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
+
+/** Get the batch properties of the model. The properties are
+ *  communicated by a flags value and an (optional) object returned by
+ *  'voidp'.
+ * 
+ *    - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
+ *      batching properties of the model. This means that the model
+ *      does not support batching in any way that is useable by
+ *      Triton. The returned 'voidp' value is nullptr.
+ * 
+ *    - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
+ *      along the first dimension of every input and output
+ *      tensor. Triton schedulers that perform batching can
+ *      automatically batch inference requests along this dimension.
+ *      The returned 'voidp' value is nullptr.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param flags Returns flags indicating the batch properties of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the
+ *  'flags' value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+
+/** Get the transaction policy of the model. The policy is
+ *  communicated by a flags value.
+ * 
+ *    - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
+ *      one response per request.
+ * 
+ *    - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
+ *      to many responses per request.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param txn_flags Returns flags indicating the transaction policy of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the 'flags' value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+
+/** Get the metadata of the server as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param server_metadata Returns the server metadata message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Message**") PointerPointer server_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Message server_metadata);
+
+/** Get the metadata of a model as a TRITONSERVER_Message
+ *  object.  The caller takes ownership of the message object and must
+ *  call TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.
+ *  If -1 then the server will choose a version based on the model's
+ *  policy.
+ *  @param model_metadata Returns the model metadata message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+
+/** Get the statistics of a model as a TRITONSERVER_Message
+ *  object. The caller takes ownership of the object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  If empty, then statistics for all available models will be returned,
+ *  and the server will choose a version based on those models' policies.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param model_stats Returns the model statistics message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+
+/** Get the configuration of a model as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model config message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Get the index of all unique models in the model repositories as a
+ *  TRITONSERVER_Message object. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object.
+ * 
+ *  If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
+ *  that are loaded into the server and ready for inferencing are
+ *  returned.
+ * 
+ *  @param server The inference server object.
+ *  @param flags TRITONSERVER_ModelIndexFlag flags that control how to
+ *  collect the index.
+ *  @param model_index Return the model index message that holds the
+ *  index of all models contained in the server's model repository(s).
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_index);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @ByPtrPtr TRITONSERVER_Message model_index);
+
+/** Load the requested model or reload the model if it is already
+ *  loaded. The function does not return until the model is loaded or
+ *  fails to load. Returned error indicates if model loaded
+ *  successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Unload the requested model. Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model to be fully unload
+ *  and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Unload the requested model, and also unload any dependent model that
+ *  was loaded along with the requested model (for example, the models composing
+ *  an ensemble). Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model and all dependent
+ *  models to be fully unload and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Get the current metrics for the server. The caller takes ownership
+ *  of the metrics object and must call TRITONSERVER_MetricsDelete to
+ *  release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param metrics Returns the metrics.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Metrics**") PointerPointer metrics);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Metrics metrics);
+
+/** Perform inference using the meta-data and inputs supplied by the
+ *  'inference_request'. If the function returns success, then the
+ *  caller releases ownership of 'inference_request' and must not
+ *  access it in any way after this call, until ownership is returned
+ *  via the 'request_release_fn' callback registered in the request
+ *  object with TRITONSERVER_InferenceRequestSetReleaseCallback.
+ * 
+ *  The function unconditionally takes ownership of 'trace' and so the
+ *  caller must not access it in any way after this call (except in
+ *  the trace id callback) until ownership is returned via the trace's
+ *  release_fn callback.
+ * 
+ *  Responses produced for this request are returned using the
+ *  allocator and callback registered with the request by
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ * 
+ *  @param server The inference server object.
+ *  @param inference_request The request object.
+ *  @param trace The trace object for this request, or nullptr if no
+ *  tracing.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
+    TRITONSERVER_Server server,
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceTrace trace);
+
+
+// #ifdef __cplusplus
+// #endif
+
+
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
new file mode 100644
index 00000000000..fd26f3af2e6
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Backend extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Backend() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Backend(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
new file mode 100644
index 00000000000..38af26dcaf8
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Input extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Input() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Input(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
new file mode 100644
index 00000000000..30889ef0c39
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_MemoryManager extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_MemoryManager() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_MemoryManager(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
new file mode 100644
index 00000000000..d8ce8278d6c
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Model extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Model() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Model(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
new file mode 100644
index 00000000000..aba2d9db0dc
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_ModelInstance extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_ModelInstance() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_ModelInstance(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
new file mode 100644
index 00000000000..0aa168c357e
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Output extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Output() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Output(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
new file mode 100644
index 00000000000..7977233caec
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Request extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Request() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Request(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
new file mode 100644
index 00000000000..04a8e715fa3
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Response extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Response() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Response(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
new file mode 100644
index 00000000000..a6985c55627
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_ResponseFactory extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_ResponseFactory() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_ResponseFactory(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
new file mode 100644
index 00000000000..b644bb6fcf4
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONREPOAGENT_Agent extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONREPOAGENT_Agent() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONREPOAGENT_Agent(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
new file mode 100644
index 00000000000..07bf751648f
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONREPOAGENT_AgentModel extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONREPOAGENT_AgentModel() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONREPOAGENT_AgentModel(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
new file mode 100644
index 00000000000..07037256c2b
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Error extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Error() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Error(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
new file mode 100644
index 00000000000..afc9981227a
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceRequest extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceRequest() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceRequest(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
new file mode 100644
index 00000000000..c45b14de79d
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
@@ -0,0 +1,64 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for inference request release callback function. The callback
+ *  indicates what type of release is being performed on the request
+ *  and for some of these the callback function takes ownership of the
+ *  TRITONSERVER_InferenceRequest object. The 'userp' data is the data
+ *  provided as 'request_release_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetReleaseCallback.
+ * 
+ *  One or more flags will be specified when the callback is invoked,
+ *  and the callback must take the following actions:
+ * 
+ *    - TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request
+ *      is being released and ownership is passed to the callback
+ *      function. Triton will not longer access the 'request' object
+ *      itself nor any input tensor data associated with the
+ *      request. The callback should free or otherwise manage the
+ *      'request' object and all associated tensor data.
+ * 
+ *  Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always
+ *  be set when the callback is invoked but in the future that may
+ *  change, so the callback should explicitly check for the flag
+ *  before taking ownership of the request object.
+ *  */
+
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceRequestReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceRequestReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceRequestReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceRequest request, @Cast("const uint32_t") int flags, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
new file mode 100644
index 00000000000..9fdf58ec38f
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceResponse extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceResponse() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceResponse(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
new file mode 100644
index 00000000000..76d544843c2
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
@@ -0,0 +1,59 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for callback function indicating that an inference response
+ *  has completed. The callback function takes ownership of the
+ *  TRITONSERVER_InferenceResponse object. The 'userp' data is the
+ *  data provided as 'response_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ * 
+ *  One or more flags may be specified when the callback is invoked:
+ * 
+ *    - TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more
+ *      responses will be generated for a given request (more
+ *      specifically, that no more responses will be generated for the
+ *      inference request that set this callback and 'userp'). When
+ *      this flag is set 'response' may be a response object or may be
+ *      nullptr. If 'response' is not nullptr, then 'response' is the
+ *      last response that Triton will produce for the request. If
+ *      'response' is nullptr then Triton is indicating that no more
+ *      responses will be produced for the request. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceResponseCompleteFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceResponseCompleteFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceResponseCompleteFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceResponse response, @Cast("const uint32_t") int flags,
+    Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
new file mode 100644
index 00000000000..ba190840c1d
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTrace extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceTrace() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceTrace(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
new file mode 100644
index 00000000000..4b0fad3bd72
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
@@ -0,0 +1,47 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for trace activity callback function. This callback function
+ *  is used to report activity occurring for a trace. This function
+ *  does not take ownership of 'trace' and so any information needed
+ *  from that object must be copied before returning. The 'userp' data
+ *  is the same as what is supplied in the call to
+ *  TRITONSERVER_InferenceTraceNew. */
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTraceActivityFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceTraceActivityFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceTraceActivityFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceTrace trace,
+    TRITONSERVER_InferenceTraceActivity activity, @Cast("uint64_t") long timestamp_ns,
+    Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
new file mode 100644
index 00000000000..c035b0838d1
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for trace release callback function. This callback function
+ *  is called when all activity for the trace has completed. The
+ *  callback function takes ownership of the
+ *  TRITONSERVER_InferenceTrace object. The 'userp' data is the same
+ *  as what is supplied in the call to TRITONSERVER_InferenceTraceNew. */
+
+///
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTraceReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceTraceReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceTraceReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceTrace trace, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
new file mode 100644
index 00000000000..54cc3e19e4c
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Message extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Message() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Message(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
new file mode 100644
index 00000000000..7eb8af9bbe6
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Metrics extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Metrics() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Metrics(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
new file mode 100644
index 00000000000..a4d00fdc4d4
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocator extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_ResponseAllocator() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_ResponseAllocator(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
new file mode 100644
index 00000000000..3a645526d6c
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
@@ -0,0 +1,81 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** TRITONSERVER_ResponseAllocator
+ * 
+ *  Object representing a memory allocator for output tensors in an
+ *  inference response.
+ * 
+ <p>
+ *  Type for allocation function that allocates a buffer to hold an
+ *  output tensor.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param tensor_name The name of the output tensor to allocate for.
+ *  @param byte_size The size of the buffer to allocate.
+ *  @param memory_type The type of memory that the caller prefers for
+ *  the buffer allocation.
+ *  @param memory_type_id The ID of the memory that the caller prefers
+ *  for the buffer allocation.
+ *  @param userp The user data pointer that is provided as
+ *  'response_allocator_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param buffer Returns a pointer to the allocated memory.
+ *  @param buffer_userp Returns a user-specified value to associate
+ *  with the buffer, or nullptr if no user-specified value should be
+ *  associated with the buffer. This value will be provided in the
+ *  call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
+ *  is released and will also be returned by
+ *  TRITONSERVER_InferenceResponseOutput.
+ *  @param actual_memory_type Returns the type of memory where the
+ *  allocation resides. May be different than the type of memory
+ *  requested by 'memory_type'.
+ *  @param actual_memory_type_id Returns the ID of the memory where
+ *  the allocation resides. May be different than the ID of the memory
+ *  requested by 'memory_type_id'.
+ *  @return a TRITONSERVER_Error object if a failure occurs while
+ *  attempting an allocation. If an error is returned all other return
+ *  values will be ignored. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorAllocFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorAllocFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorAllocFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+    @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id, Pointer userp, @Cast("void**") PointerPointer buffer, @Cast("void**") PointerPointer buffer_userp,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer actual_memory_type,
+    @Cast("int64_t*") LongPointer actual_memory_type_id);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
new file mode 100644
index 00000000000..b4fd4977476
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
@@ -0,0 +1,60 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for function that is called when the server no longer holds
+ *  any reference to a buffer allocated by
+ *  TRITONSERVER_ResponseAllocatorAllocFn_t. In practice this function
+ *  is typically called when the response object associated with the
+ *  buffer is deleted by TRITONSERVER_InferenceResponseDelete.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param buffer Pointer to the buffer to be freed.
+ *  @param buffer_userp The user-specified value associated
+ *  with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
+ *  @param byte_size The size of the buffer.
+ *  @param memory_type The type of memory holding the buffer.
+ *  @param memory_type_id The ID of the memory holding the buffer.
+ *  @return a TRITONSERVER_Error object if a failure occurs while
+ *  attempting the release. If an error is returned Triton will not
+ *  attempt to release the buffer again. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, Pointer buffer, Pointer buffer_userp,
+    @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
new file mode 100644
index 00000000000..fc0b4fb948c
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
@@ -0,0 +1,55 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for function that is called to indicate that subsequent
+ *  allocation requests will refer to a new response.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param userp The user data pointer that is provided as
+ *  'response_allocator_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @return a TRITONSERVER_Error object if a failure occurs. */
+
+///
+///
+///
+///
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorStartFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorStartFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorStartFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
new file mode 100644
index 00000000000..df7d02cc3d2
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Server extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Server() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Server(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
new file mode 100644
index 00000000000..a40c7cb81f6
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ServerOptions extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_ServerOptions() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_ServerOptions(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
new file mode 100644
index 00000000000..55f5af35bcd
--- /dev/null
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2018-2021 Samuel Audet
+ *
+ * Licensed either under the Apache License, Version 2.0, or (at your option)
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (subject to the "Classpath" exception),
+ * either version 2, or any later version (collectively, the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.gnu.org/licenses/
+ *     http://www.gnu.org/software/classpath/license.html
+ *
+ * or as provided in the LICENSE.txt file that accompanied this code.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.bytedeco.tritonserver.presets;
+
+import java.util.List;
+import org.bytedeco.javacpp.ClassProperties;
+import org.bytedeco.javacpp.LoadEnabled;
+import org.bytedeco.javacpp.Loader;
+import org.bytedeco.javacpp.annotation.Platform;
+import org.bytedeco.javacpp.annotation.Properties;
+import org.bytedeco.javacpp.tools.Info;
+import org.bytedeco.javacpp.tools.InfoMap;
+import org.bytedeco.javacpp.tools.InfoMapper;
+
+import org.bytedeco.cuda.presets.cudart;
+import org.bytedeco.cuda.presets.cublas;
+import org.bytedeco.cuda.presets.cudnn;
+import org.bytedeco.cuda.presets.nvrtc;
+import org.bytedeco.tensorrt.presets.*;
+//import org.bytedeco.tensorrt.presets.nvinfer;
+//import org.bytedeco.tensorrt.presets.nvinfer_plugin;
+//import org.bytedeco.tensorrt.presets.nvonnxparser;
+//import org.bytedeco.tensorrt.presets.nvparsers;
+
+/**
+ *
+ * @author Samuel Audet
+ */
+@Properties(
+    inherit = {cublas.class, cudnn.class, nvrtc.class, nvinfer.class, nvinfer_plugin.class, nvonnxparser.class, nvparsers.class},
+    value = {
+        @Platform(
+            value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"},
+            compiler = "cpp11",
+            include = {"tritonbackend.h", "tritonrepoagent.h", "tritonserver.h"},
+			link = "tritonserver"
+        ),
+        @Platform(
+            value = "linux-arm64",
+            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            linkpath = {"/opt/tritonserver/lib/"}
+        ),
+        @Platform(
+            value = "linux-ppc64le",
+            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            linkpath = {"/opt/tritonserver/lib/"}
+        ),
+        @Platform(
+            value = "linux-x86_64",
+            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            linkpath = {"/opt/tritonserver/lib/"}
+        ),
+        @Platform(
+            value = "windows-x86_64",
+            includepath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/include",
+            linkpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/lib/"
+        )
+    },
+    target = "org.bytedeco.tritonserver.tritonserver",
+    global = "org.bytedeco.tritonserver.global.tritonserver"
+)
+public class tritonserver implements LoadEnabled, InfoMapper {
+    static { Loader.checkVersion("org.bytedeco", "tritonserver"); }
+
+    @Override public void init(ClassProperties properties) {
+        String platform = properties.getProperty("platform");
+        List<String> preloads = properties.get("platform.preload");
+        List<String> resources = properties.get("platform.preloadresource");
+
+        // Only apply this at load time since we don't want to copy the CUDA libraries here
+        if (!Loader.isLoadLibraries()) {
+            return;
+        }
+        int i = 0;
+        String[] libs = {"cudart", "cublasLt", "cublas", "cudnn", "nvrtc",
+                         "cudnn_ops_infer", "cudnn_ops_train", "cudnn_adv_infer",
+                         "cudnn_adv_train", "cudnn_cnn_infer", "cudnn_cnn_train",
+                         "nvinfer", "nvinfer_plugin", "nvonnxparser", "nvparsers"};
+        for (String lib : libs) {
+            if (platform.startsWith("linux")) {
+                lib += lib.startsWith("cudnn") ? "@.8" : lib.equals("cudart") ? "@.11.0" : lib.equals("nvrtc") ? "@.11.2" : "@.11";
+				lib += lib.startsWith("nvinfer") ? "@.8" : lib.equals("nvonnxparser") ? "@.8" : lib.equals("nvparsers") ? "@.8" :"@.8";
+            } else if (platform.startsWith("windows")) {
+                lib += lib.startsWith("cudnn") ? "64_8" : lib.equals("cudart") ? "64_110" : lib.equals("nvrtc") ? "64_112_0" : "64_11";
+				lib += lib.startsWith("nvinfer") ? "64_8" : lib.equals("nvonnxparser") ? "64_8" : lib.equals("nvparsers") ? "64_8" :"64_8";
+            } else {
+                continue; // no CUDA
+            }
+			if (!preloads.contains(lib)) {
+                preloads.add(i++, lib);
+            }
+        }
+        if (i > 0) {
+            resources.add("/org/bytedeco/cuda/");
+			resources.add("/org/bytedeco/tensorrt/");
+        }
+    }
+
+    public void map(InfoMap infoMap) {
+        infoMap.put(new Info().enumerate())
+			   .put(new Info("TRITONSERVER_EXPORT").cppTypes().annotations())
+			   .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
+			   .put(new Info("TTRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
+			   .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())               
+        ;
+    }
+}
diff --git a/tritonserver/src/main/java9/module-info.java b/tritonserver/src/main/java9/module-info.java
new file mode 100644
index 00000000000..cd29e14ccd8
--- /dev/null
+++ b/tritonserver/src/main/java9/module-info.java
@@ -0,0 +1,8 @@
+module org.bytedeco.tritonserver {
+  requires transitive org.bytedeco.javacpp;
+  requires transitive org.bytedeco.cuda;
+  requires transitive org.bytedeco.tensorrt;
+  exports org.bytedeco.tensorrt.global;
+  exports org.bytedeco.tensorrt.presets;
+  exports org.bytedeco.tensorrt.tritonserver;
+}

From 4188be50af9c80c2bd89e067c73139375b030bbf Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Fri, 17 Sep 2021 22:44:45 +0800
Subject: [PATCH 02/21] Update README.md

---
 tritonserver/README.md | 305 -----------------------------------------
 1 file changed, 305 deletions(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index f157200b1be..8b137891791 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -1,306 +1 @@
-JavaCPP Presets for TensorRT
-============================
 
-[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tensorrt/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tensorrt) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/tensorrt.svg)](http://bytedeco.org/builds/)  
-<sup>Build status for all platforms:</sup> [![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)  <sup>Commercial support:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
-
-
-License Agreements
-------------------
-By downloading these archives, you agree to the terms of the license agreements for NVIDIA software included in the archives.
-
-### TensorRT
-To view the license for TensorRT included in these archives, click [here](https://docs.nvidia.com/deeplearning/tensorrt/sla/)
-
-
-Introduction
-------------
-This directory contains the JavaCPP Presets module for:
-
- * TensorRT 8.0.1.6  https://developer.nvidia.com/tensorrt
-
-Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
-
-
-Documentation
--------------
-Java API documentation is available here:
-
- * http://bytedeco.org/javacpp-presets/tensorrt/apidocs/
-
-
-Sample Usage
-------------
-Here is a simple example of TensorRT ported to Java from the `sampleGoogleNet.cpp` sample file included in `TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-9.0.cudnn7.0.tar.gz` available at:
-
- * https://developer.nvidia.com/nvidia-tensorrt-download
-
-We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the `pom.xml` and `SampleGoogleNet.java` source files below, simply execute on the command line:
-```bash
- $ mvn compile exec:java
-```
-
-### The `pom.xml` build file
-```xml
-<project>
-    <modelVersion>4.0.0</modelVersion>
-    <groupId>org.bytedeco.tensorrt</groupId>
-    <artifactId>samplegooglenet</artifactId>
-    <version>1.5.6</version>
-    <properties>
-        <exec.mainClass>SampleGoogleNet</exec.mainClass>
-    </properties>
-    <dependencies>
-        <dependency>
-            <groupId>org.bytedeco</groupId>
-            <artifactId>tensorrt-platform</artifactId>
-            <version>8.0-1.5.6</version>
-        </dependency>
-
-        <!-- Additional dependencies to use bundled CUDA, cuDNN, NCCL, and TensorRT -->
-        <dependency>
-            <groupId>org.bytedeco</groupId>
-            <artifactId>cuda-platform-redist</artifactId>
-            <version>11.4-8.2-1.5.6</version>
-        </dependency>
-        <dependency>
-            <groupId>org.bytedeco</groupId>
-            <artifactId>tensorrt-platform-redist</artifactId>
-            <version>8.0-1.5.6</version>
-        </dependency>
-
-    </dependencies>
-    <build>
-        <sourceDirectory>.</sourceDirectory>
-    </build>
-</project>
-```
-
-### The `SampleGoogleNet.java` source file
-```java
-import java.io.*;
-import java.util.*;
-import org.bytedeco.javacpp.*;
-
-import org.bytedeco.cuda.cudart.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import org.bytedeco.tensorrt.nvparsers.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-public class SampleGoogleNet {
-    static void CHECK(int status)
-    {
-        if (status != 0)
-        {
-            System.out.println("Cuda failure: " + status);
-            System.exit(6);
-        }
-    }
-
-    // Logger for GIE info/warning/errors
-    static class Logger extends ILogger
-    {
-        @Override public void log(Severity severity, String msg)
-        {
-            severity = severity.intern();
-
-            // suppress info-level messages
-            if (severity == Severity.kINFO) return;
-
-            switch (severity)
-            {
-                case kINTERNAL_ERROR: System.err.print("INTERNAL_ERROR: "); break;
-                case kERROR: System.err.print("ERROR: "); break;
-                case kWARNING: System.err.print("WARNING: "); break;
-                case kINFO: System.err.print("INFO: "); break;
-                default: System.err.print("UNKNOWN: "); break;
-            }
-            System.err.println(msg);
-        }
-    }
-    static Logger gLogger = new Logger();
-
-    static String locateFile(String input, String[] directories)
-    {
-        String file = "";
-        int MAX_DEPTH = 10;
-        boolean found = false;
-        for (String dir : directories)
-        {
-            file = dir + input;
-            for (int i = 0; i < MAX_DEPTH && !found; i++)
-            {
-                File checkFile = new File(file);
-                found = checkFile.exists();
-                if (found) break;
-                file = "../" + file;
-            }
-            if (found) break;
-            file = "";
-        }
-
-        if (file.isEmpty())
-            System.err.println("Could not find a file due to it not existing in the data directory.");
-        return file;
-    }
-
-    // stuff we know about the network and the caffe input/output blobs
-
-    static int BATCH_SIZE = 4;
-    static int TIMING_ITERATIONS = 1000;
-
-    static String INPUT_BLOB_NAME = "data";
-    static String OUTPUT_BLOB_NAME = "prob";
-
-
-    static String locateFile(String input)
-    {
-        String[] dirs = {"data/samples/googlenet/", "data/googlenet/"};
-        return locateFile(input, dirs);
-    }
-
-    static class Profiler extends IProfiler
-    {
-        LinkedHashMap<String, Float> mProfile = new LinkedHashMap<String, Float>();
-
-        @Override public void reportLayerTime(String layerName, float ms)
-        {
-            Float time = mProfile.get(layerName);
-            mProfile.put(layerName, (time != null ? time : 0) + ms);
-        }
-
-        public void printLayerTimes()
-        {
-            float totalTime = 0;
-            for (Map.Entry<String,Float> e : mProfile.entrySet())
-            {
-                System.out.printf("%-40.40s %4.3fms\n", e.getKey(), e.getValue() / TIMING_ITERATIONS);
-                totalTime += e.getValue();
-            }
-            System.out.printf("Time over all layers: %4.3f\n", totalTime / TIMING_ITERATIONS);
-        }
-
-    }
-    static Profiler gProfiler = new Profiler();
-
-    static void caffeToGIEModel(String deployFile,     // name for caffe prototxt
-                         String modelFile,             // name for model 
-                         String[] outputs,             // network outputs
-                         int maxBatchSize,             // batch size - NB must be at least as large as the batch we want to run with)
-                         IHostMemory[] gieModelStream)
-    {
-        // create API root class - must span the lifetime of the engine usage
-        IBuilder builder = createInferBuilder(gLogger);
-        INetworkDefinition network = builder.createNetwork();
-
-        // parse the caffe model to populate the network, then set the outputs
-        ICaffeParser parser = createCaffeParser();
-
-        boolean useFp16 = builder.platformHasFastFp16();
-
-        DataType modelDataType = useFp16 ? DataType.kHALF : DataType.kFLOAT; // create a 16-bit model if it's natively supported
-        IBlobNameToTensor blobNameToTensor =
-            parser.parse(locateFile(deployFile),                // caffe deploy file
-                                     locateFile(modelFile),     // caffe model file
-                                     network,                   // network definition that the parser will populate
-                                     modelDataType);
-
-        assert blobNameToTensor != null;
-        // the caffe file has no notion of outputs, so we need to manually say which tensors the engine should generate    
-        for (String s : outputs)
-            network.markOutput(blobNameToTensor.find(s));
-
-        // Build the engine
-        builder.setMaxBatchSize(maxBatchSize);
-        builder.setMaxWorkspaceSize(16 << 20);
-
-        // set up the network for paired-fp16 format if available
-        if(useFp16)
-            builder.setHalf2Mode(true);
-
-        ICudaEngine engine = builder.buildCudaEngine(network);
-        assert engine != null;
-
-        // we don't need the network any more, and we can destroy the parser
-        network.destroy();
-        parser.destroy();
-
-        // serialize the engine, then close everything down
-        gieModelStream[0] = engine.serialize();
-        engine.destroy();
-        builder.destroy();
-        shutdownProtobufLibrary();
-    }
-
-    static void timeInference(ICudaEngine engine, int batchSize)
-    {
-        // input and output buffer pointers that we pass to the engine - the engine requires exactly ICudaEngine::getNbBindings(),
-        // of these, but in this case we know that there is exactly one input and one output.
-        assert engine.getNbBindings() == 2;
-        PointerPointer buffers = new PointerPointer(2);
-
-        // In order to bind the buffers, we need to know the names of the input and output tensors.
-        // note that indices are guaranteed to be less than ICudaEngine::getNbBindings()
-        int inputIndex = engine.getBindingIndex(INPUT_BLOB_NAME), outputIndex = engine.getBindingIndex(OUTPUT_BLOB_NAME);
-
-        // allocate GPU buffers
-        DimsCHW inputDims = new DimsCHW(engine.getBindingDimensions(inputIndex)), outputDims = new DimsCHW(engine.getBindingDimensions(outputIndex));
-        long inputSize = batchSize * inputDims.c().get() * inputDims.h().get() * inputDims.w().get() * Float.SIZE / 8;
-        long outputSize = batchSize * outputDims.c().get() * outputDims.h().get() * outputDims.w().get() * Float.SIZE / 8;
-
-        CHECK(cudaMalloc(buffers.position(inputIndex), inputSize));
-        CHECK(cudaMalloc(buffers.position(outputIndex), outputSize));
-
-        IExecutionContext context = engine.createExecutionContext();
-        context.setProfiler(gProfiler);
-
-        // zero the input buffer
-        CHECK(cudaMemset(buffers.position(inputIndex).get(), 0, inputSize));
-
-        for (int i = 0; i < TIMING_ITERATIONS;i++)
-            context.execute(batchSize, buffers.position(0));
-
-        // release the context and buffers
-        context.destroy();
-        CHECK(cudaFree(buffers.position(inputIndex).get()));
-        CHECK(cudaFree(buffers.position(outputIndex).get()));
-    }
-
-
-    public static void main(String[] args)
-    {
-        System.out.println("Building and running a GPU inference engine for GoogleNet, N=4...");
-
-        // parse the caffe model and the mean file
-        IHostMemory[] gieModelStream = { null };
-        caffeToGIEModel("googlenet.prototxt", "googlenet.caffemodel", new String[] { OUTPUT_BLOB_NAME }, BATCH_SIZE, gieModelStream);
-
-        // create an engine
-        IRuntime infer = createInferRuntime(gLogger);
-        ICudaEngine engine = infer.deserializeCudaEngine(gieModelStream[0].data(), gieModelStream[0].size(), null);
-
-        System.out.println("Bindings after deserializing:"); 
-        for (int bi = 0; bi < engine.getNbBindings(); bi++) { 
-            if (engine.bindingIsInput(bi)) { 
-                System.out.printf("Binding %d (%s): Input.\n",  bi, engine.getBindingName(bi));
-            } else { 
-                System.out.printf("Binding %d (%s): Output.\n", bi, engine.getBindingName(bi));
-            } 
-        }
-
-        // run inference with null data to time network performance
-        timeInference(engine, BATCH_SIZE);
-
-        engine.destroy();
-        infer.destroy();
-
-        gProfiler.printLayerTimes();
-
-        System.out.println("Done.");
-
-        System.exit(0);
-    }
-}
-```

From db9a1a96717191e9c6c6de340399dfa8f4b9c785 Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Fri, 17 Sep 2021 22:47:01 +0800
Subject: [PATCH 03/21] Update tritonserver.java

---
 .../java/org/bytedeco/tritonserver/presets/tritonserver.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
index 55f5af35bcd..28cef44e409 100644
--- a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -44,7 +44,7 @@
 
 /**
  *
- * @author Samuel Audet
+ * @author Jack He
  */
 @Properties(
     inherit = {cublas.class, cudnn.class, nvrtc.class, nvinfer.class, nvinfer_plugin.class, nvonnxparser.class, nvparsers.class},

From d704682929652aedfa33e7890afb1a2debc6fe2a Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Mon, 20 Sep 2021 08:22:24 +0000
Subject: [PATCH 04/21] fix one typo

---
 .../tritonserver/global/tritonserver.java     | 307 +++++++++---------
 .../tritonserver/presets/tritonserver.java    |  20 +-
 2 files changed, 165 insertions(+), 162 deletions(-)

diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
index 8840fbb4c7a..d4f1977d529 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -67,10 +67,10 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 
 // #ifdef _COMPILING_TRITONBACKEND
 // #if defined(_MSC_VER)
-public static native @MemberGetter int TRITONBACKEND_DECLSPEC();
-public static final int TRITONBACKEND_DECLSPEC = TRITONBACKEND_DECLSPEC();
+// #define TRITONBACKEND_DECLSPEC __declspec(dllexport)
 // #define TRITONBACKEND_ISPEC __declspec(dllimport)
 // #elif defined(__GNUC__)
+// #define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
 // #define TRITONBACKEND_ISPEC
 // #else
 // #define TRITONBACKEND_DECLSPEC
@@ -78,6 +78,7 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 // #endif
 // #else
 // #if defined(_MSC_VER)
+// #define TRITONBACKEND_DECLSPEC __declspec(dllimport)
 // #define TRITONBACKEND_ISPEC __declspec(dllexport)
 // #else
 // #define TRITONBACKEND_DECLSPEC
@@ -154,11 +155,11 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
     @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
-public static native IntBuffer TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
     @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
-public static native int[] TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
     @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
 
 /** TRITONBACKEND_ArtifactType
@@ -216,11 +217,11 @@ public enum TRITONBACKEND_ArtifactType {
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_MemoryManagerAllocate(
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
     TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
     @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
     @Cast("const uint64_t") long byte_size);
-public static native IntPointer TRITONBACKEND_MemoryManagerAllocate(
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
     TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
     @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
     @Cast("const uint64_t") long byte_size);
@@ -242,7 +243,7 @@ public static native IntPointer TRITONBACKEND_MemoryManagerAllocate(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_MemoryManagerFree(
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
     TRITONBACKEND_MemoryManager manager, Pointer buffer,
     @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id);
 
@@ -274,19 +275,19 @@ public static native IntPointer TRITONBACKEND_MemoryManagerFree(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_InputProperties(
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
     TRITONBACKEND_Input input, @Cast("const char**") PointerPointer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
     @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native IntPointer TRITONBACKEND_InputProperties(
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
     TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr BytePointer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
     @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native IntBuffer TRITONBACKEND_InputProperties(
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
     TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr ByteBuffer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
     @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native int[] TRITONBACKEND_InputProperties(
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
     TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr byte[] name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
     @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
@@ -317,31 +318,31 @@ public static native int[] TRITONBACKEND_InputProperties(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") PointerPointer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
     @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
     @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native IntBuffer TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
     @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native int[] TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
     @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
-public static native IntPointer TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
     @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native IntBuffer TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
     @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native int[] TRITONBACKEND_InputPropertiesForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
     TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
     @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
@@ -370,19 +371,19 @@ public static native int[] TRITONBACKEND_InputPropertiesForHostPolicy(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_InputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
     TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer,
     @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntPointer TRITONBACKEND_InputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
     TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
     @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntBuffer TRITONBACKEND_InputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
     TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
     @Cast("uint64_t*") LongBuffer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongBuffer memory_type_id);
-public static native int[] TRITONBACKEND_InputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
     TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
     @Cast("uint64_t*") long[] buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") long[] memory_type_id);
@@ -419,31 +420,31 @@ public static native int[] TRITONBACKEND_InputBuffer(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntBuffer TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
-public static native int[] TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
-public static native IntPointer TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntBuffer TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, String host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
-public static native int[] TRITONBACKEND_InputBufferForHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
     TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
     @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
     TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
@@ -478,19 +479,19 @@ public static native int[] TRITONBACKEND_InputBufferForHostPolicy(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_OutputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
     TRITONBACKEND_Output output, @Cast("void**") PointerPointer buffer,
     @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntPointer TRITONBACKEND_OutputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
     TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
     @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongPointer memory_type_id);
-public static native IntBuffer TRITONBACKEND_OutputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
     TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
     @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") LongBuffer memory_type_id);
-public static native int[] TRITONBACKEND_OutputBuffer(
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
     TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
     @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
     @Cast("int64_t*") long[] memory_type_id);
@@ -510,13 +511,13 @@ public static native int[] TRITONBACKEND_OutputBuffer(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
     TRITONBACKEND_Request request, @Cast("const char**") PointerPointer id);
-public static native IntPointer TRITONBACKEND_RequestId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
     TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr BytePointer id);
-public static native IntBuffer TRITONBACKEND_RequestId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
     TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
-public static native int[] TRITONBACKEND_RequestId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
     TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr byte[] id);
 
 /** Get the correlation ID of the request. Zero indicates that the
@@ -527,11 +528,11 @@ public static native int[] TRITONBACKEND_RequestId(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestCorrelationId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
     TRITONBACKEND_Request request, @Cast("uint64_t*") LongPointer id);
-public static native IntBuffer TRITONBACKEND_RequestCorrelationId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
     TRITONBACKEND_Request request, @Cast("uint64_t*") LongBuffer id);
-public static native int[] TRITONBACKEND_RequestCorrelationId(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
     TRITONBACKEND_Request request, @Cast("uint64_t*") long[] id);
 
 /** Get the number of input tensors specified in the request.
@@ -541,11 +542,11 @@ public static native int[] TRITONBACKEND_RequestCorrelationId(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestInputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
-public static native IntBuffer TRITONBACKEND_RequestInputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
-public static native int[] TRITONBACKEND_RequestInputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
 
 /** Get the name of an input tensor. The caller does not own
@@ -561,16 +562,16 @@ public static native int[] TRITONBACKEND_RequestInputCount(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestInputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") PointerPointer input_name);
-public static native IntPointer TRITONBACKEND_RequestInputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr BytePointer input_name);
-public static native IntBuffer TRITONBACKEND_RequestInputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr ByteBuffer input_name);
-public static native int[] TRITONBACKEND_RequestInputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr byte[] input_name);
 
@@ -585,13 +586,13 @@ public static native int[] TRITONBACKEND_RequestInputName(
 
 ///
 ///
-public static native IntPointer TRITONBACKEND_RequestInput(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
     TRITONBACKEND_Request request, String name,
     @Cast("TRITONBACKEND_Input**") PointerPointer input);
-public static native IntPointer TRITONBACKEND_RequestInput(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
     TRITONBACKEND_Request request, String name,
     @ByPtrPtr TRITONBACKEND_Input input);
-public static native IntBuffer TRITONBACKEND_RequestInput(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
     TRITONBACKEND_Request request, @Cast("const char*") BytePointer name,
     @ByPtrPtr TRITONBACKEND_Input input);
 
@@ -613,10 +614,10 @@ public static native IntBuffer TRITONBACKEND_RequestInput(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestInputByIndex(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("TRITONBACKEND_Input**") PointerPointer input);
-public static native IntPointer TRITONBACKEND_RequestInputByIndex(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @ByPtrPtr TRITONBACKEND_Input input);
 
@@ -628,11 +629,11 @@ public static native IntPointer TRITONBACKEND_RequestInputByIndex(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestOutputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
-public static native IntBuffer TRITONBACKEND_RequestOutputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
-public static native int[] TRITONBACKEND_RequestOutputCount(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
     TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
 
 /** Get the name of a requested output tensor. The caller does not own
@@ -648,16 +649,16 @@ public static native int[] TRITONBACKEND_RequestOutputCount(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_RequestOutputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") PointerPointer output_name);
-public static native IntPointer TRITONBACKEND_RequestOutputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr BytePointer output_name);
-public static native IntBuffer TRITONBACKEND_RequestOutputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr ByteBuffer output_name);
-public static native int[] TRITONBACKEND_RequestOutputName(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
     TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr byte[] output_name);
 
@@ -681,7 +682,7 @@ public static native int[] TRITONBACKEND_RequestOutputName(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_RequestRelease(
+public static native TRITONSERVER_Error TRITONBACKEND_RequestRelease(
     TRITONBACKEND_Request request, @Cast("uint32_t") int release_flags);
 
 /**
@@ -702,9 +703,9 @@ public static native IntPointer TRITONBACKEND_RequestRelease(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseFactoryNew(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
     @Cast("TRITONBACKEND_ResponseFactory**") PointerPointer factory, TRITONBACKEND_Request request);
-public static native IntPointer TRITONBACKEND_ResponseFactoryNew(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
     @ByPtrPtr TRITONBACKEND_ResponseFactory factory, TRITONBACKEND_Request request);
 
 /** Destroy a response factory.
@@ -713,7 +714,7 @@ public static native IntPointer TRITONBACKEND_ResponseFactoryNew(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseFactoryDelete(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryDelete(
     TRITONBACKEND_ResponseFactory factory);
 
 /** Send response flags without a corresponding response.
@@ -730,7 +731,7 @@ public static native IntPointer TRITONBACKEND_ResponseFactoryDelete(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ResponseFactorySendFlags(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactorySendFlags(
     TRITONBACKEND_ResponseFactory factory, @Cast("const uint32_t") int send_flags);
 
 /**
@@ -767,9 +768,9 @@ public static native IntPointer TRITONBACKEND_ResponseFactorySendFlags(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseNew(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
     @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_Request request);
-public static native IntPointer TRITONBACKEND_ResponseNew(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
     @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_Request request);
 
 /** Create a response using a factory.
@@ -779,9 +780,9 @@ public static native IntPointer TRITONBACKEND_ResponseNew(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseNewFromFactory(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
     @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_ResponseFactory factory);
-public static native IntPointer TRITONBACKEND_ResponseNewFromFactory(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
     @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_ResponseFactory factory);
 
 /** Destroy a response. It is not necessary to delete a response if
@@ -792,7 +793,7 @@ public static native IntPointer TRITONBACKEND_ResponseNewFromFactory(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseDelete(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseDelete(
     TRITONBACKEND_Response response);
 
 /** Set a string parameter in the response.
@@ -803,9 +804,9 @@ public static native IntPointer TRITONBACKEND_ResponseDelete(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseSetStringParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
     TRITONBACKEND_Response response, String name, String value);
-public static native IntBuffer TRITONBACKEND_ResponseSetStringParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
     TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const char*") BytePointer value);
 
 /** Set an integer parameter in the response.
@@ -816,9 +817,9 @@ public static native IntBuffer TRITONBACKEND_ResponseSetStringParameter(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseSetIntParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
     TRITONBACKEND_Response response, String name, @Cast("const int64_t") long value);
-public static native IntBuffer TRITONBACKEND_ResponseSetIntParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
     TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const int64_t") long value);
 
 /** Set an boolean parameter in the response.
@@ -829,9 +830,9 @@ public static native IntBuffer TRITONBACKEND_ResponseSetIntParameter(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseSetBoolParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
     TRITONBACKEND_Response response, String name, @Cast("const bool") boolean value);
-public static native IntBuffer TRITONBACKEND_ResponseSetBoolParameter(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
     TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const bool") boolean value);
 
 /** Create an output tensor in the response. The lifetime of the
@@ -849,31 +850,31 @@ public static native IntBuffer TRITONBACKEND_ResponseSetBoolParameter(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
     String name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native IntPointer TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     String name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native IntBuffer TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
-public static native int[] TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     String name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
-public static native IntPointer TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native IntBuffer TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     String name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
-public static native int[] TRITONBACKEND_ResponseOutput(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
     @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
     @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
@@ -897,7 +898,7 @@ public static native int[] TRITONBACKEND_ResponseOutput(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ResponseSend(
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
     TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
     TRITONSERVER_Error error);
 
@@ -943,13 +944,13 @@ public enum TRITONBACKEND_ExecutionPolicy {
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_BackendName(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
     TRITONBACKEND_Backend backend, @Cast("const char**") PointerPointer name);
-public static native IntPointer TRITONBACKEND_BackendName(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
     TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native IntBuffer TRITONBACKEND_BackendName(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
     TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native int[] TRITONBACKEND_BackendName(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
     TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr byte[] name);
 
 /** Get the backend configuration.  The 'backend_config' message is
@@ -969,9 +970,9 @@ public static native int[] TRITONBACKEND_BackendName(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_BackendConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
     TRITONBACKEND_Backend backend, @Cast("TRITONSERVER_Message**") PointerPointer backend_config);
-public static native IntPointer TRITONBACKEND_BackendConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
     TRITONBACKEND_Backend backend, @ByPtrPtr TRITONSERVER_Message backend_config);
 
 /** Get the execution policy for this backend. By default the
@@ -982,11 +983,11 @@ public static native IntPointer TRITONBACKEND_BackendConfig(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_BackendExecutionPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntPointer policy);
-public static native IntBuffer TRITONBACKEND_BackendExecutionPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntBuffer policy);
-public static native int[] TRITONBACKEND_BackendExecutionPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") int[] policy);
 
 /** Set the execution policy for this backend. By default the
@@ -1001,9 +1002,9 @@ public static native int[] TRITONBACKEND_BackendExecutionPolicy(
 
 ///
 ///
-public static native IntPointer TRITONBACKEND_BackendSetExecutionPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
     TRITONBACKEND_Backend backend, TRITONBACKEND_ExecutionPolicy policy);
-public static native IntBuffer TRITONBACKEND_BackendSetExecutionPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
 
 /** Get the location of the files that make up the backend
@@ -1024,16 +1025,16 @@ public static native IntBuffer TRITONBACKEND_BackendSetExecutionPolicy(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_BackendArtifacts(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
     @Cast("const char**") PointerPointer location);
-public static native IntPointer TRITONBACKEND_BackendArtifacts(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
     @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native IntBuffer TRITONBACKEND_BackendArtifacts(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
     @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native int[] TRITONBACKEND_BackendArtifacts(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
     @Cast("const char**") @ByPtrPtr byte[] location);
 
@@ -1044,9 +1045,9 @@ public static native int[] TRITONBACKEND_BackendArtifacts(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_BackendMemoryManager(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_MemoryManager**") PointerPointer manager);
-public static native IntPointer TRITONBACKEND_BackendMemoryManager(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
     TRITONBACKEND_Backend backend, @ByPtrPtr TRITONBACKEND_MemoryManager manager);
 
 /** Get the user-specified state associated with the backend. The
@@ -1057,9 +1058,9 @@ public static native IntPointer TRITONBACKEND_BackendMemoryManager(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_BackendState(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
     TRITONBACKEND_Backend backend, @Cast("void**") PointerPointer state);
-public static native IntPointer TRITONBACKEND_BackendState(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
     TRITONBACKEND_Backend backend, @Cast("void**") @ByPtrPtr Pointer state);
 
 /** Set the user-specified state associated with the backend. The
@@ -1074,7 +1075,7 @@ public static native IntPointer TRITONBACKEND_BackendState(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_BackendSetState(
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetState(
     TRITONBACKEND_Backend backend, Pointer state);
 
 /**
@@ -1092,13 +1093,13 @@ public static native IntPointer TRITONBACKEND_BackendSetState(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
     TRITONBACKEND_Model model, @Cast("const char**") PointerPointer name);
-public static native IntPointer TRITONBACKEND_ModelName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
     TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native IntBuffer TRITONBACKEND_ModelName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
     TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native int[] TRITONBACKEND_ModelName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
     TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr byte[] name);
 
 /** Get the version of the model.
@@ -1109,11 +1110,11 @@ public static native int[] TRITONBACKEND_ModelName(
 
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
     TRITONBACKEND_Model model, @Cast("uint64_t*") LongPointer version);
-public static native IntBuffer TRITONBACKEND_ModelVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
     TRITONBACKEND_Model model, @Cast("uint64_t*") LongBuffer version);
-public static native int[] TRITONBACKEND_ModelVersion(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
     TRITONBACKEND_Model model, @Cast("uint64_t*") long[] version);
 
 /** Get the location of the files that make up the model. The
@@ -1133,16 +1134,16 @@ public static native int[] TRITONBACKEND_ModelVersion(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelRepository(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
     TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
     @Cast("const char**") PointerPointer location);
-public static native IntPointer TRITONBACKEND_ModelRepository(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
     TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
     @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native IntBuffer TRITONBACKEND_ModelRepository(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
     TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
     @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native int[] TRITONBACKEND_ModelRepository(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
     TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
     @Cast("const char**") @ByPtrPtr byte[] location);
 
@@ -1163,10 +1164,10 @@ public static native int[] TRITONBACKEND_ModelRepository(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
     TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
     @Cast("TRITONSERVER_Message**") PointerPointer model_config);
-public static native IntPointer TRITONBACKEND_ModelConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
     TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
     @ByPtrPtr TRITONSERVER_Message model_config);
 
@@ -1182,9 +1183,9 @@ public static native IntPointer TRITONBACKEND_ModelConfig(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelAutoCompleteConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
     TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
-public static native IntBuffer TRITONBACKEND_ModelAutoCompleteConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
     TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
 
 /** Set the model configuration in Triton server. Only the inputs, outputs,
@@ -1203,7 +1204,7 @@ public static native IntBuffer TRITONBACKEND_ModelAutoCompleteConfig(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelSetConfig(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetConfig(
     TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
     TRITONSERVER_Message model_config);
 
@@ -1215,9 +1216,9 @@ public static native IntPointer TRITONBACKEND_ModelSetConfig(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelServer(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
     TRITONBACKEND_Model model, @Cast("TRITONSERVER_Server**") PointerPointer server);
-public static native IntPointer TRITONBACKEND_ModelServer(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
     TRITONBACKEND_Model model, @ByPtrPtr TRITONSERVER_Server server);
 
 /** Get the backend used by the model.
@@ -1227,9 +1228,9 @@ public static native IntPointer TRITONBACKEND_ModelServer(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelBackend(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
     TRITONBACKEND_Model model, @Cast("TRITONBACKEND_Backend**") PointerPointer backend);
-public static native IntPointer TRITONBACKEND_ModelBackend(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
     TRITONBACKEND_Model model, @ByPtrPtr TRITONBACKEND_Backend backend);
 
 /** Get the user-specified state associated with the model. The
@@ -1240,9 +1241,9 @@ public static native IntPointer TRITONBACKEND_ModelBackend(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
     TRITONBACKEND_Model model, @Cast("void**") PointerPointer state);
-public static native IntPointer TRITONBACKEND_ModelState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
     TRITONBACKEND_Model model, @Cast("void**") @ByPtrPtr Pointer state);
 
 /** Set the user-specified state associated with the model. The
@@ -1257,7 +1258,7 @@ public static native IntPointer TRITONBACKEND_ModelState(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelSetState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetState(
     TRITONBACKEND_Model model, Pointer state);
 
 /**
@@ -1276,13 +1277,13 @@ public static native IntPointer TRITONBACKEND_ModelSetState(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
     TRITONBACKEND_ModelInstance instance, @Cast("const char**") PointerPointer name);
-public static native IntPointer TRITONBACKEND_ModelInstanceName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
     TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native IntBuffer TRITONBACKEND_ModelInstanceName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
     TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native int[] TRITONBACKEND_ModelInstanceName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
     TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr byte[] name);
 
 /** Get the kind of the model instance.
@@ -1292,7 +1293,7 @@ public static native int[] TRITONBACKEND_ModelInstanceName(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceKind(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
     TRITONBACKEND_ModelInstance instance,
     TRITONSERVER_InstanceGroupKind kind);
 
@@ -1305,11 +1306,11 @@ public static native IntPointer TRITONBACKEND_ModelInstanceKind(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceDeviceId(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
     TRITONBACKEND_ModelInstance instance, IntPointer device_id);
-public static native IntBuffer TRITONBACKEND_ModelInstanceDeviceId(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
     TRITONBACKEND_ModelInstance instance, IntBuffer device_id);
-public static native int[] TRITONBACKEND_ModelInstanceDeviceId(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
     TRITONBACKEND_ModelInstance instance, int[] device_id);
 
 /** Get the host policy setting.  The 'host_policy' message is
@@ -1329,9 +1330,9 @@ public static native int[] TRITONBACKEND_ModelInstanceDeviceId(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
     TRITONBACKEND_ModelInstance instance, @Cast("TRITONSERVER_Message**") PointerPointer host_policy);
-public static native IntPointer TRITONBACKEND_ModelInstanceHostPolicy(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
     TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONSERVER_Message host_policy);
 
 /** Whether the model instance is passive.
@@ -1341,9 +1342,9 @@ public static native IntPointer TRITONBACKEND_ModelInstanceHostPolicy(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceIsPassive(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
     TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
-public static native IntBuffer TRITONBACKEND_ModelInstanceIsPassive(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
     TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
 
 /** Get the number of optimization profiles to be loaded for the instance.
@@ -1353,11 +1354,11 @@ public static native IntBuffer TRITONBACKEND_ModelInstanceIsPassive(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceProfileCount(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
     TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
-public static native IntBuffer TRITONBACKEND_ModelInstanceProfileCount(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
     TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
-public static native int[] TRITONBACKEND_ModelInstanceProfileCount(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
     TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
 
 /** Get the name of optimization profile. The caller does not own
@@ -1373,16 +1374,16 @@ public static native int[] TRITONBACKEND_ModelInstanceProfileCount(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceProfileName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
     @Cast("const char**") PointerPointer profile_name);
-public static native IntPointer TRITONBACKEND_ModelInstanceProfileName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr BytePointer profile_name);
-public static native IntBuffer TRITONBACKEND_ModelInstanceProfileName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr ByteBuffer profile_name);
-public static native int[] TRITONBACKEND_ModelInstanceProfileName(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr byte[] profile_name);
 
@@ -1393,9 +1394,9 @@ public static native int[] TRITONBACKEND_ModelInstanceProfileName(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceModel(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
     TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Model**") PointerPointer model);
-public static native IntPointer TRITONBACKEND_ModelInstanceModel(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
     TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Model model);
 
 /** Get the user-specified state associated with the model
@@ -1407,9 +1408,9 @@ public static native IntPointer TRITONBACKEND_ModelInstanceModel(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
     TRITONBACKEND_ModelInstance instance, @Cast("void**") PointerPointer state);
-public static native IntPointer TRITONBACKEND_ModelInstanceState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
     TRITONBACKEND_ModelInstance instance, @Cast("void**") @ByPtrPtr Pointer state);
 
 /** Set the user-specified state associated with the model
@@ -1425,7 +1426,7 @@ public static native IntPointer TRITONBACKEND_ModelInstanceState(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceSetState(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSetState(
     TRITONBACKEND_ModelInstance instance, Pointer state);
 
 /** Record statistics for an inference request.
@@ -1477,7 +1478,7 @@ public static native IntPointer TRITONBACKEND_ModelInstanceSetState(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceReportStatistics(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportStatistics(
     TRITONBACKEND_ModelInstance instance, TRITONBACKEND_Request request,
     @Cast("const bool") boolean success, @Cast("const uint64_t") long exec_start_ns,
     @Cast("const uint64_t") long compute_start_ns, @Cast("const uint64_t") long compute_end_ns,
@@ -1515,7 +1516,7 @@ public static native IntPointer TRITONBACKEND_ModelInstanceReportStatistics(
 ///
 ///
 ///
-public static native IntPointer TRITONBACKEND_ModelInstanceReportBatchStatistics(
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportBatchStatistics(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint64_t") long batch_size,
     @Cast("const uint64_t") long exec_start_ns, @Cast("const uint64_t") long compute_start_ns,
     @Cast("const uint64_t") long compute_end_ns, @Cast("const uint64_t") long exec_end_ns);
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
index 28cef44e409..60af6d7af8e 100644
--- a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -36,11 +36,10 @@
 import org.bytedeco.cuda.presets.cublas;
 import org.bytedeco.cuda.presets.cudnn;
 import org.bytedeco.cuda.presets.nvrtc;
-import org.bytedeco.tensorrt.presets.*;
-//import org.bytedeco.tensorrt.presets.nvinfer;
-//import org.bytedeco.tensorrt.presets.nvinfer_plugin;
-//import org.bytedeco.tensorrt.presets.nvonnxparser;
-//import org.bytedeco.tensorrt.presets.nvparsers;
+import org.bytedeco.tensorrt.presets.nvinfer;
+import org.bytedeco.tensorrt.presets.nvinfer_plugin;
+import org.bytedeco.tensorrt.presets.nvonnxparser;
+import org.bytedeco.tensorrt.presets.nvparsers;
 
 /**
  *
@@ -57,12 +56,12 @@
         ),
         @Platform(
             value = "linux-arm64",
-            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            includepath = {"/opt/tritonserver/include/triton/core/"},
             linkpath = {"/opt/tritonserver/lib/"}
         ),
         @Platform(
             value = "linux-ppc64le",
-            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            includepath = {"/opt/tritonserver/include/triton/core/"},
             linkpath = {"/opt/tritonserver/lib/"}
         ),
         @Platform(
@@ -120,8 +119,11 @@ public void map(InfoMap infoMap) {
         infoMap.put(new Info().enumerate())
 			   .put(new Info("TRITONSERVER_EXPORT").cppTypes().annotations())
 			   .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
-			   .put(new Info("TTRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
-			   .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())               
+			   .put(new Info("TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
+			   .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
+			   //.put(new Info("TRITONSERVER_datatype_enum").pointerTypes("TRITONSERVER_DataType"))
+			   //.put(new Info("TRITONSERVER_DataType").valueTypes("TRITONSERVER_DataType").pointerTypes("@Cast(\"TRITONSERVER_DataType*\") PointerPointer", "@ByPtrPtr TRITONSERVER_DataType"))
+			   
         ;
     }
 }

From bb9f13256298948ebe52996693ebd4724ee5cd97 Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Tue, 21 Sep 2021 03:35:58 +0000
Subject: [PATCH 05/21] change the order of include files to make enum parsed
 first before using

---
 tritonserver/pom.xml                          |    2 +-
 .../org/bytedeco/tensorrt/global/nvinfer.java | 3248 --------
 .../tensorrt/global/nvinfer_plugin.java       |  391 -
 .../tensorrt/global/nvonnxparser.java         |  169 -
 .../bytedeco/tensorrt/global/nvparsers.java   |  320 -
 .../org/bytedeco/tensorrt/nvinfer/Dims2.java  |   59 -
 .../org/bytedeco/tensorrt/nvinfer/Dims3.java  |   61 -
 .../org/bytedeco/tensorrt/nvinfer/Dims32.java |   58 -
 .../org/bytedeco/tensorrt/nvinfer/Dims4.java  |   62 -
 .../bytedeco/tensorrt/nvinfer/DimsExprs.java  |   50 -
 .../org/bytedeco/tensorrt/nvinfer/DimsHW.java |  101 -
 .../nvinfer/DynamicPluginTensorDesc.java      |   53 -
 .../tensorrt/nvinfer/EnumMaxImpl.java         |   43 -
 .../tensorrt/nvinfer/IActivationLayer.java    |  115 -
 .../bytedeco/tensorrt/nvinfer/IAlgorithm.java |   90 -
 .../tensorrt/nvinfer/IAlgorithmContext.java   |   72 -
 .../tensorrt/nvinfer/IAlgorithmIOInfo.java    |   60 -
 .../tensorrt/nvinfer/IAlgorithmSelector.java  |   76 -
 .../tensorrt/nvinfer/IAlgorithmVariant.java   |   52 -
 .../bytedeco/tensorrt/nvinfer/IBuilder.java   |  325 -
 .../tensorrt/nvinfer/IBuilderConfig.java      |  769 --
 .../tensorrt/nvinfer/IConcatenationLayer.java |   63 -
 .../tensorrt/nvinfer/IConstantLayer.java      |   87 -
 .../tensorrt/nvinfer/IConvolutionLayer.java   |  556 --
 .../tensorrt/nvinfer/ICudaEngine.java         |  651 --
 .../tensorrt/nvinfer/IDeconvolutionLayer.java |  517 --
 .../tensorrt/nvinfer/IDequantizeLayer.java    |  104 -
 .../tensorrt/nvinfer/IDimensionExpr.java      |   46 -
 .../tensorrt/nvinfer/IElementWiseLayer.java   |   73 -
 .../tensorrt/nvinfer/IErrorRecorder.java      |  231 -
 .../tensorrt/nvinfer/IExecutionContext.java   |  654 --
 .../tensorrt/nvinfer/IExprBuilder.java        |   54 -
 .../bytedeco/tensorrt/nvinfer/IFillLayer.java |  229 -
 .../nvinfer/IFullyConnectedLayer.java         |  152 -
 .../tensorrt/nvinfer/IGatherLayer.java        |   76 -
 .../tensorrt/nvinfer/IGpuAllocator.java       |  110 -
 .../tensorrt/nvinfer/IHostMemory.java         |   72 -
 .../tensorrt/nvinfer/IIdentityLayer.java      |   39 -
 .../tensorrt/nvinfer/IInt8Calibrator.java     |  130 -
 .../nvinfer/IInt8EntropyCalibrator.java       |   48 -
 .../nvinfer/IInt8EntropyCalibrator2.java      |   48 -
 .../nvinfer/IInt8LegacyCalibrator.java        |  111 -
 .../nvinfer/IInt8MinMaxCalibrator.java        |   47 -
 .../tensorrt/nvinfer/IIteratorLayer.java      |   43 -
 .../bytedeco/tensorrt/nvinfer/ILRNLayer.java  |  134 -
 .../org/bytedeco/tensorrt/nvinfer/ILayer.java |  291 -
 .../bytedeco/tensorrt/nvinfer/ILogger.java    |   80 -
 .../org/bytedeco/tensorrt/nvinfer/ILoop.java  |  127 -
 .../tensorrt/nvinfer/ILoopBoundaryLayer.java  |   30 -
 .../tensorrt/nvinfer/ILoopOutputLayer.java    |   97 -
 .../nvinfer/IMatrixMultiplyLayer.java         |   72 -
 .../tensorrt/nvinfer/INetworkDefinition.java  | 1480 ----
 .../bytedeco/tensorrt/nvinfer/INoCopy.java    |   37 -
 .../nvinfer/IOptimizationProfile.java         |  231 -
 .../tensorrt/nvinfer/IPaddingLayer.java       |  162 -
 .../nvinfer/IParametricReLULayer.java         |   35 -
 .../bytedeco/tensorrt/nvinfer/IPlugin.java    |   27 -
 .../tensorrt/nvinfer/IPluginCreator.java      |  111 -
 .../bytedeco/tensorrt/nvinfer/IPluginExt.java |   27 -
 .../tensorrt/nvinfer/IPluginFactory.java      |   33 -
 .../tensorrt/nvinfer/IPluginLayer.java        |   27 -
 .../tensorrt/nvinfer/IPluginRegistry.java     |  135 -
 .../bytedeco/tensorrt/nvinfer/IPluginV2.java  |  297 -
 .../tensorrt/nvinfer/IPluginV2DynamicExt.java |  233 -
 .../tensorrt/nvinfer/IPluginV2Ext.java        |  193 -
 .../tensorrt/nvinfer/IPluginV2IOExt.java      |   99 -
 .../tensorrt/nvinfer/IPluginV2Layer.java      |   43 -
 .../tensorrt/nvinfer/IPoolingLayer.java       |  433 --
 .../bytedeco/tensorrt/nvinfer/IProfiler.java  |   57 -
 .../tensorrt/nvinfer/IQuantizeLayer.java      |  106 -
 .../tensorrt/nvinfer/IRNNv2Layer.java         |  339 -
 .../tensorrt/nvinfer/IRaggedSoftMaxLayer.java |   42 -
 .../tensorrt/nvinfer/IRecurrenceLayer.java    |   46 -
 .../tensorrt/nvinfer/IReduceLayer.java        |  102 -
 .../bytedeco/tensorrt/nvinfer/IRefitter.java  |  336 -
 .../tensorrt/nvinfer/IResizeLayer.java        |  323 -
 .../bytedeco/tensorrt/nvinfer/IRuntime.java   |  191 -
 .../tensorrt/nvinfer/IScaleLayer.java         |  190 -
 .../tensorrt/nvinfer/ISelectLayer.java        |   31 -
 .../tensorrt/nvinfer/IShapeLayer.java         |   40 -
 .../tensorrt/nvinfer/IShuffleLayer.java       |  225 -
 .../tensorrt/nvinfer/ISliceLayer.java         |  213 -
 .../tensorrt/nvinfer/ISoftMaxLayer.java       |   82 -
 .../bytedeco/tensorrt/nvinfer/ITensor.java    |  414 --
 .../tensorrt/nvinfer/ITimingCache.java        |  123 -
 .../bytedeco/tensorrt/nvinfer/ITopKLayer.java |  105 -
 .../tensorrt/nvinfer/ITripLimitLayer.java     |   29 -
 .../tensorrt/nvinfer/IUnaryLayer.java         |   54 -
 .../tensorrt/nvinfer/Permutation.java         |   48 -
 .../tensorrt/nvinfer/PluginField.java         |   81 -
 .../nvinfer/PluginFieldCollection.java        |   45 -
 .../tensorrt/nvinfer/PluginTensorDesc.java    |   58 -
 .../tensorrt/nvinfer/VActivationLayer.java    |   35 -
 .../bytedeco/tensorrt/nvinfer/VAlgorithm.java |   33 -
 .../tensorrt/nvinfer/VAlgorithmContext.java   |   33 -
 .../tensorrt/nvinfer/VAlgorithmIOInfo.java    |   31 -
 .../tensorrt/nvinfer/VAlgorithmVariant.java   |   30 -
 .../bytedeco/tensorrt/nvinfer/VBuilder.java   |   45 -
 .../tensorrt/nvinfer/VBuilderConfig.java      |   83 -
 .../tensorrt/nvinfer/VConcatenationLayer.java |   30 -
 .../tensorrt/nvinfer/VConstantLayer.java      |   32 -
 .../tensorrt/nvinfer/VConvolutionLayer.java   |   59 -
 .../tensorrt/nvinfer/VCudaEngine.java         |   63 -
 .../tensorrt/nvinfer/VDeconvolutionLayer.java |   57 -
 .../tensorrt/nvinfer/VDequantizeLayer.java    |   30 -
 .../tensorrt/nvinfer/VDimensionExpr.java      |   30 -
 .../tensorrt/nvinfer/VElementWiseLayer.java   |   31 -
 .../tensorrt/nvinfer/VExecutionContext.java   |   63 -
 .../tensorrt/nvinfer/VExprBuilder.java        |   33 -
 .../bytedeco/tensorrt/nvinfer/VFillLayer.java |   37 -
 .../nvinfer/VFullyConnectedLayer.java         |   34 -
 .../tensorrt/nvinfer/VGatherLayer.java        |   32 -
 .../tensorrt/nvinfer/VHostMemory.java         |   31 -
 .../tensorrt/nvinfer/VIdentityLayer.java      |   40 -
 .../tensorrt/nvinfer/VIteratorLayer.java      |   32 -
 .../bytedeco/tensorrt/nvinfer/VLRNLayer.java  |   36 -
 .../org/bytedeco/tensorrt/nvinfer/VLayer.java |   46 -
 .../org/bytedeco/tensorrt/nvinfer/VLoop.java  |   39 -
 .../tensorrt/nvinfer/VLoopBoundaryLayer.java  |   29 -
 .../tensorrt/nvinfer/VLoopOutputLayer.java    |   31 -
 .../nvinfer/VMatrixMultiplyLayer.java         |   31 -
 .../tensorrt/nvinfer/VNetworkDefinition.java  |  113 -
 .../nvinfer/VOptimizationProfile.java         |   51 -
 .../tensorrt/nvinfer/VPaddingLayer.java       |   36 -
 .../nvinfer/VParametricReLULayer.java         |   40 -
 .../tensorrt/nvinfer/VPluginLayer.java        |   29 -
 .../tensorrt/nvinfer/VPluginV2Layer.java      |   29 -
 .../tensorrt/nvinfer/VPoolingLayer.java       |   54 -
 .../tensorrt/nvinfer/VQuantizeLayer.java      |   30 -
 .../tensorrt/nvinfer/VRNNv2Layer.java         |   55 -
 .../tensorrt/nvinfer/VRaggedSoftMaxLayer.java |   40 -
 .../tensorrt/nvinfer/VRecurrenceLayer.java    |   40 -
 .../tensorrt/nvinfer/VReduceLayer.java        |   35 -
 .../bytedeco/tensorrt/nvinfer/VRefitter.java  |   61 -
 .../tensorrt/nvinfer/VResizeLayer.java        |   50 -
 .../org/bytedeco/tensorrt/nvinfer/VRoot.java  |   48 -
 .../bytedeco/tensorrt/nvinfer/VRuntime.java   |   36 -
 .../tensorrt/nvinfer/VScaleLayer.java         |   39 -
 .../tensorrt/nvinfer/VSelectLayer.java        |   27 -
 .../tensorrt/nvinfer/VShapeLayer.java         |   40 -
 .../tensorrt/nvinfer/VShuffleLayer.java       |   36 -
 .../tensorrt/nvinfer/VSliceLayer.java         |   37 -
 .../tensorrt/nvinfer/VSoftMaxLayer.java       |   30 -
 .../bytedeco/tensorrt/nvinfer/VTensor.java    |   52 -
 .../tensorrt/nvinfer/VTimingCache.java        |   31 -
 .../bytedeco/tensorrt/nvinfer/VTopKLayer.java |   35 -
 .../tensorrt/nvinfer/VTripLimitLayer.java     |   29 -
 .../tensorrt/nvinfer/VUnaryLayer.java         |   31 -
 .../bytedeco/tensorrt/nvinfer/Weights.java    |   58 -
 .../tensorrt/nvinfer/cublasContext.java       |   28 -
 .../tensorrt/nvinfer/cudnnContext.java        |   28 -
 .../DetectionOutputParameters.java            |   75 -
 .../nvinfer_plugin/GridAnchorParameters.java  |   63 -
 .../nvinfer_plugin/NMSParameters.java         |   67 -
 .../nvinfer_plugin/PriorBoxParameters.java    |   77 -
 .../tensorrt/nvinfer_plugin/Quadruple.java    |   49 -
 .../tensorrt/nvinfer_plugin/RPROIParams.java  |   67 -
 .../nvinfer_plugin/RegionParameters.java      |   56 -
 .../tensorrt/nvinfer_plugin/softmaxTree.java  |   56 -
 .../tensorrt/nvonnxparser/IParser.java        |  134 -
 .../tensorrt/nvonnxparser/IParserError.java   |   54 -
 .../nvonnxparser/SubGraphCollection_t.java    |   93 -
 .../tensorrt/nvonnxparser/SubGraph_t.java     |   45 -
 .../tensorrt/nvparsers/FieldCollection.java   |   46 -
 .../bytedeco/tensorrt/nvparsers/FieldMap.java |   54 -
 .../tensorrt/nvparsers/IBinaryProtoBlob.java  |   54 -
 .../tensorrt/nvparsers/IBlobNameToTensor.java |   51 -
 .../tensorrt/nvparsers/ICaffeParser.java      |  195 -
 .../tensorrt/nvparsers/IPluginFactoryV2.java  |   66 -
 .../tensorrt/nvparsers/IUffParser.java        |  180 -
 .../tritonserver/global/tritonserver.java     | 6613 +++++++++--------
 .../tritonserver/presets/tritonserver.java    |    5 +-
 172 files changed, 3315 insertions(+), 25200 deletions(-)
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
 delete mode 100644 tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java

diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index cc647a9e871..e051310dcf2 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -11,7 +11,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver</artifactId>
-  <version>8.0-${project.parent.version}</version>
+  <version>2.12-${project.parent.version}</version>
   <name>JavaCPP Presets for Tritonserver</name>
 
   <dependencies>
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
deleted file mode 100644
index 66234e0c548..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer.java
+++ /dev/null
@@ -1,3248 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.global;
-
-import org.bytedeco.tensorrt.nvinfer.*;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-public class nvinfer extends org.bytedeco.tensorrt.presets.nvinfer {
-    static { Loader.load(); }
-
-// Parsed from NvInferVersion.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
- /**
- /** \file NvInferVersion.h
- /**
- /** Defines the TensorRT version
- /** */
-
-// #ifndef NV_INFER_VERSION_H
-// #define NV_INFER_VERSION_H
-
-/** TensorRT major version. */
-public static final int NV_TENSORRT_MAJOR = 8;
-/** TensorRT minor version. */
-public static final int NV_TENSORRT_MINOR = 0;
-/** TensorRT patch version. */
-public static final int NV_TENSORRT_PATCH = 1;
-/** TensorRT build number. */
-public static final int NV_TENSORRT_BUILD = 6;
-
-/** Shared object library major version number. */
-public static final int NV_TENSORRT_SONAME_MAJOR = 8;
-/** Shared object library minor version number. */
-public static final int NV_TENSORRT_SONAME_MINOR = 0;
-/** Shared object library patch version number. */
-public static final int NV_TENSORRT_SONAME_PATCH = 1;
-
-// #endif // NV_INFER_VERSION_H
-
-
-// Parsed from NvInferRuntimeCommon.h
-
-/*
- * Copyright (c) 1993-2021 NVIDIA Corporation. All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_RUNTIME_COMMON_H
-// #define NV_INFER_RUNTIME_COMMON_H
-
-// #include "NvInferVersion.h"
-// #include <cstddef>
-
-
-/** Items that are marked as deprecated will be removed in a future release. */
-// #include <cstdint>
-// #if __cplusplus >= 201402L
-// #define TRT_DEPRECATED [[deprecated]]
-// #if __GNUC__ < 6
-// #define TRT_DEPRECATED_ENUM
-// #else
-// #define TRT_DEPRECATED_ENUM TRT_DEPRECATED
-// #endif
-// #ifdef _MSC_VER
-// #define TRT_DEPRECATED_API __declspec(dllexport)
-// #else
-// #define TRT_DEPRECATED_API [[deprecated]] __attribute__((visibility("default")))
-// #endif
-// #else
-// #ifdef _MSC_VER
-// #define TRT_DEPRECATED
-// #define TRT_DEPRECATED_ENUM
-// #define TRT_DEPRECATED_API __declspec(dllexport)
-// #else
-// #define TRT_DEPRECATED __attribute__((deprecated))
-// #define TRT_DEPRECATED_ENUM
-
-
-/** Defines which symbols are exported */
-// #define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default")))
-// #endif
-// #endif
-// #ifdef TENSORRT_BUILD_LIB
-// #ifdef _MSC_VER
-// #define TENSORRTAPI __declspec(dllexport)
-// #else
-// #define TENSORRTAPI __attribute__((visibility("default")))
-// #endif
-// #else
-// #define TENSORRTAPI
-// #endif
-
-//!
-//!
-//!
-// #define TRTNOEXCEPT
-/**
- *  \file NvInferRuntimeCommon.h
- * 
- *  This is the top-level API file for TensorRT core runtime library.
- *  */
-
-// forward declare some CUDA types to avoid an include dependency
-// Targeting ../nvinfer/cublasContext.java
-
-
-// Targeting ../nvinfer/cudnnContext.java
-
-
-
-    /** Forward declaration of cudaStream_t. */
-
-    /** Forward declaration of cudaEvent_t. */
-
-
-//!
-//!
-//!
-public static native @MemberGetter int NV_TENSORRT_VERSION();
-public static final int NV_TENSORRT_VERSION = NV_TENSORRT_VERSION();
-/**
- *  \namespace nvinfer1
- * 
- *  \brief The TensorRT API version 1 namespace.
- *  */
-
-@Namespace("nvinfer1") @MemberGetter public static native int kNV_TENSORRT_VERSION_IMPL();
-public static final int kNV_TENSORRT_VERSION_IMPL = kNV_TENSORRT_VERSION_IMPL(); // major, minor, patch
-
-/** char_t is the type used by TensorRT to represent all valid characters. */
-/** AsciiChar is the type used by TensorRT to represent valid ASCII characters. */
-
-/** Forward declare IErrorRecorder for use in other interfaces. */
-/** Forward declare IGpuAllocator for use in other interfaces. */
-/** Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type. */
- // namespace impl
-
-/** Maximum number of elements in an enumeration type. */
-
-
-/**
- *  \enum DataType
- *  \brief The type of weights and tensors.
- *  */
-@Namespace("nvinfer1") public enum DataType {
-    /** 32-bit floating point format. */
-    kFLOAT(0),
-
-    /** IEEE 16-bit floating-point format. */
-    kHALF(1),
-
-    /** 8-bit integer representing a quantized floating-point value. */
-    kINT8(2),
-
-    /** Signed 32-bit integer format. */
-    kINT32(3),
-
-    /** 8-bit boolean. 0 = false, 1 = true, other values undefined. */
-    kBOOL(4);
-
-    public final int value;
-    private DataType(int v) { this.value = v; }
-    private DataType(DataType e) { this.value = e.value; }
-    public DataType intern() { for (DataType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvinfer/EnumMaxImpl.java
-
-
-
-// Targeting ../nvinfer/Dims32.java
-
-
-
-/**
- *  Alias for Dims32.
- * 
- *  \warning: This alias might change in the future.
- *  */
-
-
-//!
-//!
-//!
-//!
-//!
-//!
-
-/**
- *  \enum TensorFormat
- * 
- *  \brief Format of the input/output tensors.
- * 
- *  This enum is extended to be used by both plugins and reformat-free network
- *  I/O tensors.
- * 
- *  @see IPluginV2::supportsFormat(), safe::ICudaEngine::getBindingFormat()
- * 
- *  For more information about data formats, see the topic "Data Format Description" located in the
- *  TensorRT Developer Guide.
- *  */
-@Namespace("nvinfer1") public enum TensorFormat {
-    /** Row major linear format.
-     *  For a tensor with dimensions {N, C, H, W} or {numbers, channels,
-     *  columns, rows}, the dimensional index corresponds to {3, 2, 1, 0}
-     *  and thus the order is W minor.
-     * 
-     *  For DLA usage, the tensor sizes are limited to C,H,W in the range [1,8192].
-     *  */
-    kLINEAR(0),
-
-    /** Two wide channel vectorized row major format. This format is bound to
-     *  FP16. It is only available for dimensions >= 3.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][(C+1)/2][H][W][2], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][c/2][h][w][c%2]. */
-    kCHW2(1),
-
-    /** Eight channel format where C is padded to a multiple of 8. This format
-     *  is bound to FP16. It is only available for dimensions >= 3.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to the array with dimensions
-     *  [N][H][W][(C+7)/8*8], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][h][w][c]. */
-    
-//!
-//!
-    kHWC8(2),
-
-    /** Four wide channel vectorized row major format. This format is bound to
-     *  INT8 or FP16. It is only available for dimensions >= 3.
-     *  For INT8, the C dimension must be a build-time constant.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][(C+3)/4][H][W][4], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][c/4][h][w][c%4].
-     * 
-     *  Deprecated usage:
-     * 
-     *  If running on the DLA, this format can be used for acceleration
-     *  with the caveat that C must be equal or lesser than 4.
-     *  If used as DLA input with allowGPUFallback disable, it needs to meet
-     *  line stride requirement of DLA format. Column stride in bytes should
-     *  be multiple of 32. */
-    
-//!
-//!
-    kCHW4(3),
-
-    /** Sixteen wide channel vectorized row major format. This format is bound
-     *  to FP16. It is only available for dimensions >= 3.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][(C+15)/16][H][W][16], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][c/16][h][w][c%16].
-     * 
-     *  For DLA usage, this format maps to the native image format for FP16,
-     *  and the tensor sizes are limited to C,H,W in the range [1,8192].
-     *  */
-    
-//!
-    kCHW16(4),
-
-    /** Thirty-two wide channel vectorized row major format. This format is
-     *  only available for dimensions >= 3.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][(C+31)/32][H][W][32], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][c/32][h][w][c%32].
-     * 
-     *  For DLA usage, this format maps to the native image format for INT8,
-     *  and the tensor sizes are limited to C,H,W in the range [1,8192]. */
-    kCHW32(5),
-
-    /** Eight channel format where C is padded to a multiple of 8. This format
-     *  is bound to FP16, and it is only available for dimensions >= 4.
-     *  For a tensor with dimensions {N, C, D, H, W},
-     *  the memory layout is equivalent to an array with dimensions
-     *  [N][D][H][W][(C+7)/8*8], with the tensor coordinates (n, c, d, h, w)
-     *  mapping to array subscript [n][d][h][w][c]. */
-    kDHWC8(6),
-
-    /** Thirty-two wide channel vectorized row major format. This format is
-     *  bound to FP16 and INT8 and is only available for dimensions >= 4.
-     *  For a tensor with dimensions {N, C, D, H, W},
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][(C+31)/32][D][H][W][32], with the tensor coordinates (n, c, d, h, w)
-     *  mapping to array subscript [n][c/32][d][h][w][c%32]. */
-    kCDHW32(7),
-
-    /** Non-vectorized channel-last format. This format is bound to FP32
-     *  and is only available for dimensions >= 3. */
-    
-//!
-    kHWC(8),
-
-    /** DLA planar format. For a tensor with dimension {N, C, H, W}, the W axis
-     *  always has unit stride. The stride for stepping along the H axis is
-     *  rounded up to 64 bytes.
-     * 
-     *  The memory layout is equivalent to a C array with dimensions
-     *  [N][C][H][roundUp(W, 64/elementSize)] where elementSize is
-     *  2 for FP16 and 1 for Int8, with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][c][h][w]. */
-    
-//!
-    kDLA_LINEAR(9),
-
-    /** DLA image format. For a tensor with dimension {N, C, H, W} the C axis
-     *  always has unit stride. The stride for stepping along the H axis is rounded up
-     *  to 32 bytes. C can only be 1, 3 or 4.
-     *  If C == 1, it will map to grayscale format.
-     *  If C == 3 or C == 4, it will map to color image format. And if C == 3,
-     *  the stride for stepping along the W axis needs to be padded to 4 in elements.
-     * 
-     *  When C is {1, 3, 4}, then C' is {1, 4, 4} respectively,
-     *  the memory layout is equivalent to a C array with dimensions
-     *  [N][H][roundUp(W, 32/C'/elementSize)][C'] where elementSize is 2 for FP16
-     *  and 1 for Int8. The tensor coordinates (n, c, h, w) mapping to array
-     *  subscript [n][h][w][c]. */
-    kDLA_HWC4(10),
-
-    /** Sixteen channel format where C is padded to a multiple of 16. This format
-     *  is bound to FP16. It is only available for dimensions >= 3.
-     *  For a tensor with dimensions {N, C, H, W},
-     *  the memory layout is equivalent to the array with dimensions
-     *  [N][H][W][(C+15)/16*16], with the tensor coordinates (n, c, h, w)
-     *  mapping to array subscript [n][h][w][c]. */
-    kHWC16(11);
-
-    public final int value;
-    private TensorFormat(int v) { this.value = v; }
-    private TensorFormat(TensorFormat e) { this.value = e.value; }
-    public TensorFormat intern() { for (TensorFormat e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/**
- *  \brief PluginFormat is reserved for backward compatibility.
- * 
- *  @see IPluginV2::supportsFormat()
- *  */
-/** Maximum number of elements in TensorFormat enum. @see TensorFormat */
-
-// Targeting ../nvinfer/PluginTensorDesc.java
-
-
-
-/** \struct PluginVersion
- * 
- *  \brief Definition of plugin versions.
- * 
- *  Tag for plug-in versions.  Used in upper byte of getTensorRTVersion().
- *  */
-@Namespace("nvinfer1") public enum PluginVersion {
-    /** IPluginV2 */
-    kV2((byte)(0)),
-    /** IPluginV2Ext */
-    kV2_EXT((byte)(1)),
-    /** IPluginV2IOExt */
-    kV2_IOEXT((byte)(2)),
-    /** IPluginV2DynamicExt */
-    kV2_DYNAMICEXT((byte)(3));
-
-    public final byte value;
-    private PluginVersion(byte v) { this.value = v; }
-    private PluginVersion(PluginVersion e) { this.value = e.value; }
-    public PluginVersion intern() { for (PluginVersion e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvinfer/IPluginV2.java
-
-
-// Targeting ../nvinfer/IPluginV2Ext.java
-
-
-// Targeting ../nvinfer/IPluginV2IOExt.java
-
-
-
-/**
- *  \enum FieldType
- *  \brief The possible field types for custom layer.
- *  */
-
-@Namespace("nvinfer1") public enum PluginFieldType {
-    /** FP16 field type. */
-    kFLOAT16(0),
-    /** FP32 field type. */
-    kFLOAT32(1),
-    /** FP64 field type. */
-    kFLOAT64(2),
-    /** INT8 field type. */
-    kINT8(3),
-    /** INT16 field type. */
-    kINT16(4),
-    /** INT32 field type. */
-    kINT32(5),
-    /** char field type. */
-    kCHAR(6),
-    /** nvinfer1::Dims field type. */
-    kDIMS(7),
-    /** Unknown field type. */
-    kUNKNOWN(8);
-
-    public final int value;
-    private PluginFieldType(int v) { this.value = v; }
-    private PluginFieldType(PluginFieldType e) { this.value = e.value; }
-    public PluginFieldType intern() { for (PluginFieldType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvinfer/PluginField.java
-
-
-// Targeting ../nvinfer/PluginFieldCollection.java
-
-
-// Targeting ../nvinfer/IPluginCreator.java
-
-
-// Targeting ../nvinfer/IPluginRegistry.java
-
-
-
-@Namespace("nvinfer1") public enum AllocatorFlag {
-    /** TensorRT may call realloc() on this allocation */
-    kRESIZABLE(0);
-
-    public final int value;
-    private AllocatorFlag(int v) { this.value = v; }
-    private AllocatorFlag(AllocatorFlag e) { this.value = e.value; }
-    public AllocatorFlag intern() { for (AllocatorFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in AllocatorFlag enum. @see AllocatorFlag */
- // namespace impl
-
-
-
-//!
-//!
-//!
-// Targeting ../nvinfer/IGpuAllocator.java
-
-
-// Targeting ../nvinfer/ILogger.java
-
-
-/** Maximum number of elements in ILogger::Severity enum. @see ILogger::Severity */
- // namespace impl
-
-/**
- *  \enum ErrorCode
- * 
- *  \brief Error codes that can be returned by TensorRT during execution.
- *  */
-@Namespace("nvinfer1") public enum ErrorCode {
-    /**
-     *  Execution completed successfully.
-     *  */
-    
-
-//!
-//!
-    kSUCCESS(0),
-
-    /**
-     *  An error that does not fall into any other category. This error is included for forward compatibility.
-     *  */
-    
-
-//!
-//!
-    kUNSPECIFIED_ERROR(1),
-
-    /**
-     *  A non-recoverable TensorRT error occurred. TensorRT is in an invalid internal state when this error is
-     *  emitted and any further calls to TensorRT will result in undefined behavior.
-     *  */
-    
-
-//!
-//!
-    kINTERNAL_ERROR(2),
-
-    /**
-     *  An argument passed to the function is invalid in isolation.
-     *  This is a violation of the API contract.
-     *  */
-    
-
-//!
-//!
-    kINVALID_ARGUMENT(3),
-
-    /**
-     *  An error occurred when comparing the state of an argument relative to other arguments. For example, the
-     *  dimensions for concat differ between two tensors outside of the channel dimension. This error is triggered
-     *  when an argument is correct in isolation, but not relative to other arguments. This is to help to distinguish
-     *  from the simple errors from the more complex errors.
-     *  This is a violation of the API contract.
-     *  */
-    
-
-//!
-//!
-    kINVALID_CONFIG(4),
-
-    /**
-     *  An error occurred when performing an allocation of memory on the host or the device.
-     *  A memory allocation error is normally fatal, but in the case where the application provided its own memory
-     *  allocation routine, it is possible to increase the pool of available memory and resume execution.
-     *  */
-    
-
-//!
-//!
-    kFAILED_ALLOCATION(5),
-
-    /**
-     *  One, or more, of the components that TensorRT relies on did not initialize correctly.
-     *  This is a system setup issue.
-     *  */
-    
-
-//!
-//!
-    kFAILED_INITIALIZATION(6),
-
-    /**
-     *  An error occurred during execution that caused TensorRT to end prematurely, either an asynchronous error or
-     *  other execution errors reported by CUDA/DLA. In a dynamic system, the
-     *  data can be thrown away and the next frame can be processed or execution can be retried.
-     *  This is either an execution error or a memory error.
-     *  */
-    
-
-//!
-//!
-    kFAILED_EXECUTION(7),
-
-    /**
-     *  An error occurred during execution that caused the data to become corrupted, but execution finished. Examples
-     *  of this error are NaN squashing or integer overflow. In a dynamic system, the data can be thrown away and the
-     *  next frame can be processed or execution can be retried.
-     *  This is either a data corruption error, an input error, or a range error.
-     *  This is not used in safety but may be used in standard.
-     *  */
-    
-
-//!
-//!
-//!
-    kFAILED_COMPUTATION(8),
-
-    /**
-     *  TensorRT was put into a bad state by incorrect sequence of function calls. An example of an invalid state is
-     *  specifying a layer to be DLA only without GPU fallback, and that layer is not supported by DLA. This can occur
-     *  in situations where a service is optimistically executing networks for multiple different configurations
-     *  without checking proper error configurations, and instead throwing away bad configurations caught by TensorRT.
-     *  This is a violation of the API contract, but can be recoverable.
-     * 
-     *  Example of a recovery:
-     *  GPU fallback is disabled and conv layer with large filter(63x63) is specified to run on DLA. This will fail due
-     *  to DLA not supporting the large kernel size. This can be recovered by either turning on GPU fallback
-     *  or setting the layer to run on the GPU.
-     *  */
-    
-
-//!
-//!
-    kINVALID_STATE(9),
-
-    /**
-     *  An error occurred due to the network not being supported on the device due to constraints of the hardware or
-     *  system. An example is running a unsafe layer in a safety certified context, or a resource requirement for the
-     *  current network is greater than the capabilities of the target device. The network is otherwise correct, but
-     *  the network and hardware combination is problematic. This can be recoverable.
-     *  Examples:
-     *   * Scratch space requests larger than available device memory and can be recovered by increasing allowed
-     *     workspace size.
-     *   * Tensor size exceeds the maximum element count and can be recovered by reducing the maximum batch size.
-     *  */
-    kUNSUPPORTED_STATE(10);
-
-    public final int value;
-    private ErrorCode(int v) { this.value = v; }
-    private ErrorCode(ErrorCode e) { this.value = e.value; }
-    public ErrorCode intern() { for (ErrorCode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ErrorCode enum. @see ErrorCode */
-
-// Targeting ../nvinfer/IErrorRecorder.java
-
- // class IErrorRecorder
-
- // namespace nvinfer1
-
-/**
- *  \brief Return the library version number.
- * 
- *  The format is as for TENSORRT_VERSION: (TENSORRT_MAJOR * 1000) + (TENSORRT_MINOR * 100) + TENSOR_PATCH.
- *  */
-public static native @NoException(true) int getInferLibVersion();
-
-// #endif // NV_INFER_RUNTIME_COMMON_H
-
-
-// Parsed from NvInferLegacyDims.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_LEGACY_DIMS_H
-// #define NV_INFER_LEGACY_DIMS_H
-
-
-
-//!
-//!
-//!
-
-//!
-//!
-//!
-// #include "NvInferRuntimeCommon.h"
-
-/**
- *  \file NvInferLegacyDims.h
- * 
- *  This file contains declarations of legacy dimensions types which use channel
- *  semantics in their names, and declarations on which those types rely.
- * 
- <p>
- * 
- *  \namespace nvinfer1
- * 
- *  \brief The TensorRT API version 1 namespace.
- *  */
-// Targeting ../nvinfer/Dims2.java
-
-
-// Targeting ../nvinfer/DimsHW.java
-
-
-// Targeting ../nvinfer/Dims3.java
-
-
-// Targeting ../nvinfer/Dims4.java
-
-
-
- // namespace nvinfer1
-
-// #endif // NV_INFER_LEGCY_DIMS_H
-
-
-// Parsed from NvInferRuntime.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_RUNTIME_H
-
-
-//!
-//!
-//!
-// #define NV_INFER_RUNTIME_H
-
-/**
- *  \file NvInferRuntime.h
- * 
- *  This is the top-level API file for TensorRT extended runtime library.
- *  */
-
-// #include "NvInferImpl.h"
-// #include "NvInferRuntimeCommon.h"
-// Targeting ../nvinfer/IPluginFactory.java
-
-
-// Targeting ../nvinfer/INoCopy.java
-
-
-
-/**
- *  \enum EngineCapability
- * 
- *  \brief List of supported engine capability flows.
- * 
- *  \details The EngineCapability determines the restrictions of a network during build time and what runtime
- *  it targets. When BuilderFlag::kSAFETY_SCOPE is not set (by default), EngineCapability::kSTANDARD does not provide
- *  any restrictions on functionality and the resulting serialized engine can be executed with TensorRT's standard
- *  runtime APIs in the nvinfer1 namespace. EngineCapability::kSAFETY provides a restricted subset of network
- *  operations that are safety certified and the resulting serialized engine can be executed with TensorRT's safe
- *  runtime APIs in the nvinfer1::safe namespace. EngineCapability::kDLA_STANDALONE provides a restricted subset of
- *  network operations that are DLA compatible and the resulting serialized engine can be executed using standalone
- *  DLA runtime APIs. See sampleNvmedia for an example of integrating NvMediaDLA APIs with TensorRT APIs.
- *  */
-
-@Namespace("nvinfer1") public enum EngineCapability {
-    /**
-     *  Standard: TensorRT flow without targeting the safety runtime.
-     *  This flow supports both DeviceType::kGPU and DeviceType::kDLA.
-     *  */
-    kSTANDARD(0),
-    
-
-//!
-//!
-    kDEFAULT(kSTANDARD.value),
-
-    /**
-     *  Safety: TensorRT flow with restrictions targeting the safety runtime.
-     *  See safety documentation for list of supported layers and formats.
-     *  This flow supports only DeviceType::kGPU.
-     *  */
-    kSAFETY(1),
-    
-
-//!
-//!
-    kSAFE_GPU(kSAFETY.value),
-
-    /**
-     *  DLA Standalone: TensorRT flow with restrictions targeting external, to TensorRT, DLA runtimes.
-     *  See DLA documentation for list of supported layers and formats.
-     *  This flow supports only DeviceType::kDLA.
-     *  */
-    kDLA_STANDALONE(2),
-    kSAFE_DLA(kDLA_STANDALONE.value);
-
-    public final int value;
-    private EngineCapability(int v) { this.value = v; }
-    private EngineCapability(EngineCapability e) { this.value = e.value; }
-    public EngineCapability intern() { for (EngineCapability e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in EngineCapability enum. @see EngineCapability */
-
-// Targeting ../nvinfer/Weights.java
-
-
-// Targeting ../nvinfer/IHostMemory.java
-
-
-
-/**
- *  \enum DimensionOperation
- * 
- *  \brief An operation on two IDimensionExpr, which represent integer expressions used in dimension computations.
- * 
- *  For example, given two IDimensionExpr x and y and an IExprBuilder& eb,
- *  eb.operation(DimensionOperation::kSUM, x, y) creates a representation of x+y.
- * 
- *  @see IDimensionExpr, IExprBuilder
- *  */
-@Namespace("nvinfer1") public enum DimensionOperation {
-    /** Sum of the two operands. */
-    kSUM(0),
-    /** Product of the two operands. */
-    kPROD(1),
-    /** Maximum of the two operands. */
-    kMAX(2),
-    /** Minimum of the two operands. */
-    kMIN(3),
-    /** Substract the second element from the first. */
-    kSUB(4),
-    /** 1 if operands are equal, 0 otherwise. */
-    kEQUAL(5),
-    /** 1 if first operand is less than second operand, 0 otherwise. */
-    kLESS(6),
-    /** Floor division of the first element by the second. */
-    kFLOOR_DIV(7),
-    /** Division rounding up */
-    kCEIL_DIV(8);
-
-    public final int value;
-    private DimensionOperation(int v) { this.value = v; }
-    private DimensionOperation(DimensionOperation e) { this.value = e.value; }
-    public DimensionOperation intern() { for (DimensionOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in DimensionOperation enum. @see DimensionOperation */
-
-
-/**
- *  \enum TensorLocation
- *  \brief The location for tensor data storage, device or host.
- *  */
-@Namespace("nvinfer1") public enum TensorLocation {
-    /** Data stored on device. */
-    kDEVICE(0),
-    /** Data stored on host. */
-    kHOST(1);
-
-    public final int value;
-    private TensorLocation(int v) { this.value = v; }
-    private TensorLocation(TensorLocation e) { this.value = e.value; }
-    public TensorLocation intern() { for (TensorLocation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in TensorLocation enum. @see TensorLocation */
-
-// Targeting ../nvinfer/IDimensionExpr.java
-
-
-// Targeting ../nvinfer/IExprBuilder.java
-
-
-// Targeting ../nvinfer/DimsExprs.java
-
-
-// Targeting ../nvinfer/DynamicPluginTensorDesc.java
-
-
-// Targeting ../nvinfer/IPluginV2DynamicExt.java
-
-
-// Targeting ../nvinfer/IProfiler.java
-
-
-
-/**
- *  \enum WeightsRole
- *  \brief How a layer uses particular Weights.
- * 
- *  The power weights of an IScaleLayer are omitted.  Refitting those is not supported.
- *  */
-@Namespace("nvinfer1") public enum WeightsRole {
-    /** kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer */
-    kKERNEL(0),
-    /** bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer */
-    kBIAS(1),
-    /** shift part of IScaleLayer */
-    kSHIFT(2),
-    /** scale part of IScaleLayer */
-    kSCALE(3),
-    /** weights for IConstantLayer */
-    kCONSTANT(4),
-    /** Any other weights role */
-    kANY(5);
-
-    public final int value;
-    private WeightsRole(int v) { this.value = v; }
-    private WeightsRole(WeightsRole e) { this.value = e.value; }
-    public WeightsRole intern() { for (WeightsRole e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in WeightsRole enum. @see WeightsRole */
-
-
-/**
- *  \enum DeviceType
- *  \brief The device that this layer/network will execute on.
- * 
- *  */
-@Namespace("nvinfer1") public enum DeviceType {
-    /** GPU Device */
-    kGPU(0),
-    /** DLA Core */
-    kDLA(1);
-
-    public final int value;
-    private DeviceType(int v) { this.value = v; }
-    private DeviceType(DeviceType e) { this.value = e.value; }
-    public DeviceType intern() { for (DeviceType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in DeviceType enum. @see DeviceType */
-
-// Targeting ../nvinfer/IRuntime.java
-
-
-// Targeting ../nvinfer/IRefitter.java
-
-
-
-/**
- *  \enum OptProfileSelector
- * 
- *  \brief When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dimensions),
- *         select whether we are interested in the minimum, optimum, or maximum values for these parameters.
- *         The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value
- *         is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime.
- * 
- *  @see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValues()
- *  */
-@Namespace("nvinfer1") public enum OptProfileSelector {
-    /** This is used to set or get the minimum permitted value for dynamic dimensions etc. */
-    kMIN(0),
-    /** This is used to set or get the value that is used in the optimization (kernel selection). */
-    kOPT(1),
-    /** This is used to set or get the maximum permitted value for dynamic dimensions etc. */
-    kMAX(2);
-
-    public final int value;
-    private OptProfileSelector(int v) { this.value = v; }
-    private OptProfileSelector(OptProfileSelector e) { this.value = e.value; }
-    public OptProfileSelector intern() { for (OptProfileSelector e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-// Targeting ../nvinfer/IOptimizationProfile.java
-
-
-
-/**
- *  \enum TacticSource
- * 
- *  \brief List of tactic sources for TensorRT.
- * 
- *  @see TacticSources, IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
- *  */
-@Namespace("nvinfer1") public enum TacticSource {
-    /** \note Disabling kCUBLAS will cause the cublas handle passed to plugins in attachToContext to be null. */
-    /** cuBLAS tactics. */
-    kCUBLAS(0),
-    /** cuBLAS LT tactics */
-    kCUBLAS_LT(1),
-    /** cuDNN tactics */
-    kCUDNN(2);
-
-    public final int value;
-    private TacticSource(int v) { this.value = v; }
-    private TacticSource(TacticSource e) { this.value = e.value; }
-    public TacticSource intern() { for (TacticSource e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-
-
-/**
- *  \brief Represents a collection of one or more TacticSource values
- *  combine using bitwise-OR operations.
- * 
- *  @see IBuilderConfig::setTacticSources(), IBuilderConfig::getTacticSources()
- *  */
-
-
-//!
-//!
-//!
-//!
-// Targeting ../nvinfer/ICudaEngine.java
-
-
-// Targeting ../nvinfer/IExecutionContext.java
-
- // class IExecutionContext
- // namespace nvinfer1
-
-/**
- *  Internal C entry point for creating IRuntime.
- *  \private
- *  */
-
-
-//!
-//!
-public static native @NoException(true) Pointer createInferRuntime_INTERNAL(Pointer logger, int version);
-
-/**
- *  Internal C entry point for creating IRefitter.
- *  \private
- *  */
-
-
-//!
-//!
-public static native @NoException(true) Pointer createInferRefitter_INTERNAL(Pointer engine, Pointer logger, int version);
-
-/**
- *  \brief Return the plugin registry
- *  */
-
-
-//!
-//!
-public static native @NoException(true) IPluginRegistry getPluginRegistry();
-
-/**
- *  \brief Return the logger object.
- *  */
-public static native @NoException(true) ILogger getLogger();
-/**
- *  \brief Create an instance of an IRuntime class.
- * 
- *  This class is the logging class for the runtime.
- *  */
-
-
-//!
-//!
-//!
-@Namespace("nvinfer1") public static native @NoException(true) IRuntime createInferRuntime(@ByRef ILogger logger);
-
-/**
- *  \brief Create an instance of an IRefitter class.
- * 
- *  This is the logging class for the refitter.
- *  */
-@Namespace("nvinfer1") public static native @NoException(true) IRefitter createInferRefitter(@ByRef ICudaEngine engine, @ByRef ILogger logger);
-
- // namespace
-
-/**
- *  \brief Register the plugin creator to the registry
- *  The static registry object will be instantiated when the plugin library is
- *  loaded. This static object will register all creators available in the
- *  library to the registry.
- * 
- *  \warning Statically registering plugins should be avoided in the automotive
- *   safety context as the application developer should first register an error recorder
- *   with the plugin registry via IPluginRegistry::setErrorRecorder() before using
- *   IPluginRegistry::registerCreator() or other methods.
- *  */
-
- // namespace nvinfer1
-
-// #define REGISTER_TENSORRT_PLUGIN(name)
-//     static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
-// #endif // NV_INFER_RUNTIME_H
-
-
-// Parsed from NvInfer.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_H
-// #define NV_INFER_H
-
-// #include "NvInferLegacyDims.h"
-
-
-//!
-//!
-//!
-//!
-
-//!
-//!
-//!
-
-//!
-//!
-//!
-// #include "NvInferRuntime.h"
-
-/**
- *  \mainpage
- * 
- *  This is the API documentation for the NVIDIA TensorRT library. It provides information on individual
- *  functions, classes and methods. Use the index on the left to navigate the documentation.
- * 
- *  Please see the accompanying user guide and samples for higher-level information and general advice on
- *  using TensorRT. */
-//
-/** TensorRT Versioning follows Semantic Versioning Guidelines specified here: https://semver.org/
-/**
-<p>
-/**
-/** \file NvInfer.h
-/**
-/** This is the top-level API file for TensorRT.
-/**
-<p>
-/**
-/** \namespace nvinfer1
-/**
-/** \brief The TensorRT API version 1 namespace.
-/** */
-
-/**
- *  \enum LayerType
- * 
- *  \brief The type values of layer classes.
- * 
- *  @see ILayer::getType()
- *  */
-@Namespace("nvinfer1") public enum LayerType {
-    /** Convolution layer. */
-    kCONVOLUTION(0),
-    /** Fully connected layer. */
-    kFULLY_CONNECTED(1),
-    /** Activation layer. */
-    kACTIVATION(2),
-    /** Pooling layer. */
-    kPOOLING(3),
-    /** LRN layer. */
-    kLRN(4),
-    /** Scale layer. */
-    kSCALE(5),
-    /** SoftMax layer. */
-    kSOFTMAX(6),
-    /** Deconvolution layer. */
-    kDECONVOLUTION(7),
-    /** Concatenation layer. */
-    kCONCATENATION(8),
-    /** Elementwise layer. */
-    kELEMENTWISE(9),
-    /** Plugin layer. */
-    kPLUGIN(10),
-    /** UnaryOp operation Layer. */
-    kUNARY(11),
-    /** Padding layer. */
-    kPADDING(12),
-    /** Shuffle layer. */
-    kSHUFFLE(13),
-    /** Reduce layer. */
-    kREDUCE(14),
-    /** TopK layer. */
-    kTOPK(15),
-    /** Gather layer. */
-    kGATHER(16),
-    /** Matrix multiply layer. */
-    kMATRIX_MULTIPLY(17),
-    /** Ragged softmax layer. */
-    kRAGGED_SOFTMAX(18),
-    /** Constant layer. */
-    kCONSTANT(19),
-    /** RNNv2 layer. */
-    kRNN_V2(20),
-    /** Identity layer. */
-    kIDENTITY(21),
-    /** PluginV2 layer. */
-    kPLUGIN_V2(22),
-    /** Slice layer. */
-    kSLICE(23),
-    /** Shape layer. */
-    kSHAPE(24),
-    /** Parametric ReLU layer. */
-    kPARAMETRIC_RELU(25),
-    /** Resize Layer. */
-    kRESIZE(26),
-    /** Loop Trip limit layer */
-    kTRIP_LIMIT(27),
-    /** Loop Recurrence layer */
-    kRECURRENCE(28),
-    /** Loop Iterator layer */
-    kITERATOR(29),
-    /** Loop output layer */
-    kLOOP_OUTPUT(30),
-    /** Select layer. */
-    kSELECT(31),
-    /** Fill layer */
-    kFILL(32),
-    /** Quantize layer */
-    kQUANTIZE(33),
-    /** Dequantize layer */
-    kDEQUANTIZE(34);
-
-    public final int value;
-    private LayerType(int v) { this.value = v; }
-    private LayerType(LayerType e) { this.value = e.value; }
-    public LayerType intern() { for (LayerType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in LayerType enum. @see LayerType */
-
-
-/**
- *  \brief It is capable of representing one or more TensorFormat by binary OR
- *  operations, e.g., 1U << TensorFormat::kCHW4 | 1U << TensorFormat::kCHW32.
- * 
- *  @see ITensor::getAllowedFormats(), ITensor::setAllowedFormats(),
- *  */
-
-
-//!
-//!
-//!
-
-/**
- *  \enum ActivationType
- * 
- *  \brief Enumerates the types of activation to perform in an activation layer.
- *  */
-@Namespace("nvinfer1") public enum ActivationType {
-    /** Rectified linear activation. */
-    kRELU(0),
-    /** Sigmoid activation. */
-    kSIGMOID(1),
-    /** TanH activation. */
-    kTANH(2),
-    /** LeakyRelu activation: x>=0 ? x : alpha * x. */
-    kLEAKY_RELU(3),
-    /** Elu activation: x>=0 ? x : alpha * (exp(x) - 1). */
-    kELU(4),
-    /** Selu activation: x>0 ? beta * x : beta * (alpha*exp(x) - alpha) */
-    kSELU(5),
-    /** Softsign activation: x / (1+|x|) */
-    kSOFTSIGN(6),
-    /** Parametric softplus activation: alpha*log(exp(beta*x)+1) */
-    kSOFTPLUS(7),
-    /** Clip activation: max(alpha, min(beta, x)) */
-    kCLIP(8),
-    /** Hard sigmoid activation: max(0, min(1, alpha*x+beta)) */
-    kHARD_SIGMOID(9),
-    /** Scaled tanh activation: alpha*tanh(beta*x) */
-    kSCALED_TANH(10),
-    /** Thresholded ReLU activation: x>alpha ? x : 0 */
-    kTHRESHOLDED_RELU(11);
-
-    public final int value;
-    private ActivationType(int v) { this.value = v; }
-    private ActivationType(ActivationType e) { this.value = e.value; }
-    public ActivationType intern() { for (ActivationType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ActivationType enum. @see ActivationType */
-
-// Targeting ../nvinfer/ITensor.java
-
-
-// Targeting ../nvinfer/ILayer.java
-
-
-
-/**
- *  \enum PaddingMode
- * 
- *  \brief Enumerates the modes of padding to perform in convolution, deconvolution and pooling layer,
- *  padding mode takes precedence if setPaddingMode() and setPrePadding() are also used.
- * 
- *  There are three padding styles, EXPLICIT, SAME, and CAFFE, with each style having two variants.
- *  The EXPLICIT and CAFFE styles determine if the final sampling location is used or not.
- *  The SAME style determine if the asymmetry in the padding is on the pre or post padding.
- * 
- *  <pre>{@code
- *  Shorthand:
- *      I = dimensions of input image.
- *      B = prePadding, before the image data. For deconvolution, prePadding is set before output.
- *      A = postPadding, after the image data. For deconvolution, postPadding is set after output.
- *      P = delta between input and output
- *      S = stride
- *      F = filter
- *      O = output
- *      D = dilation
- *      M = I + B + A ; The image data plus any padding
- *      DK = 1 + D * (F - 1)
- *  }</pre>
- * 
- *  Formulas for Convolution:
- *      - EXPLICIT_ROUND_DOWN:
- *  <pre>{@code
- *          O = floor((M - DK) / S) + 1
- *  }</pre>
- *      - CAFFE_ROUND_DOWN:
- *  <pre>{@code
- *          O = floor((I + B * 2 - DK) / S)
- *  }</pre>
- *      - EXPLICIT_ROUND_UP:
- *  <pre>{@code
- *          O = ceil((M - DK) / S) + 1
- *  }</pre>
- *      - CAFFE_ROUND_UP:
- *  <pre>{@code
- *          O = ceil((I + B * 2 - DK) / S)
- *  }</pre>
- *      - SAME_UPPER:
- *  <pre>{@code
- *          O = ceil(I / S)
- *          P = floor((I - 1) / S) * S + DK - I;
- *          B = floor(P / 2)
- *          A = P - B
- *  }</pre>
- *      - SAME_LOWER:
- *  <pre>{@code
- *          O = ceil(I / S)
- *          P = floor((I - 1) / S) * S + DK - I;
- *          A = floor(P / 2)
- *          B = P - A
- *  }</pre>
- * 
- *  Formulas for Deconvolution:
- *      - EXPLICIT_ROUND_DOWN:
- *      - CAFFE_ROUND_DOWN:
- *      - EXPLICIT_ROUND_UP:
- *      - CAFFE_ROUND_UP:
- *  <pre>{@code
- *          O = (I - 1) * S + DK - (B + A)
- *  }</pre>
- *      - SAME_UPPER:
- *  <pre>{@code
- *          O = min(I * S, (I - 1) * S + DK)
- *          P = max(DK - S, 0)
- *          B = floor(P / 2)
- *          A = P - B
- *  }</pre>
- *      - SAME_LOWER:
- *  <pre>{@code
- *          O = min(I * S, (I - 1) * S + DK)
- *          P = max(DK - S, 0)
- *          A = floor(P / 2)
- *          B = P - A
- *  }</pre>
- * 
- *  Formulas for Pooling:
- *      - EXPLICIT_ROUND_DOWN:
- *  <pre>{@code
- *          O = floor((M - F) / S) + 1
- *  }</pre>
- *      - EXPLICIT_ROUND_UP:
- *  <pre>{@code
- *          O = ceil((M - F) / S) + 1
- *  }</pre>
- *      - SAME_UPPER:
- *  <pre>{@code
- *          O = ceil(I / S)
- *          P = floor((I - 1) / S) * S + F - I;
- *          B = floor(P / 2)
- *          A = P - B
- *  }</pre>
- *      - SAME_LOWER:
- *  <pre>{@code
- *          O = ceil(I / S)
- *          P = floor((I - 1) / S) * S + F - I;
- *          A = floor(P / 2)
- *          B = P - A
- *  }</pre>
- *      - CAFFE_ROUND_DOWN:
- *  <pre>{@code
- *          EXPLICIT_ROUND_DOWN - ((EXPLICIT_ROUND_DOWN - 1) * S >= I + B)
- *  }</pre>
- *      - CAFFE_ROUND_UP:
- *  <pre>{@code
- *          EXPLICIT_ROUND_UP - ((EXPLICIT_ROUND_UP - 1) * S >= I + B)
- *  }</pre>
- * 
- *  Pooling Example 1:
- *  <pre>{@code
- *      Given I = {6, 6}, B = {3, 3}, A = {2, 2}, S = {2, 2}, F = {3, 3}. What is O?
- *      (B, A can be calculated for SAME_UPPER and SAME_LOWER mode)
- *  }</pre>
- * 
- *  - EXPLICIT_ROUND_DOWN:
- *  <pre>{@code
- *      Computation:
- *          M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
- *          O ==> floor((M - F) / S) + 1
- *            ==> floor(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
- *            ==> floor({8, 8} / {2, 2}) + {1, 1}
- *            ==> {5, 5}
- *  }</pre>
- *  - EXPLICIT_ROUND_UP:
- *  <pre>{@code
- *      Computation:
- *          M = {6, 6} + {3, 3} + {2, 2} ==> {11, 11}
- *          O ==> ceil((M - F) / S) + 1
- *            ==> ceil(({11, 11} - {3, 3}) / {2, 2}) + {1, 1}
- *            ==> ceil({8, 8} / {2, 2}) + {1, 1}
- *            ==> {5, 5}
- *  }</pre>
- *      The sample points are {0, 2, 4, 6, 8} in each dimension.
- * 
- *  - SAME_UPPER:
- *  <pre>{@code
- *      Computation:
- *          I = {6, 6}
- *          S = {2, 2}
- *          O = ceil(I / S) = {3, 3}
- *          P = floor((I - 1) / S) * S + F - I
- *              ==> floor(({6, 6} - {1, 1}) / {2, 2}) * {2, 2} + {3, 3} - {6, 6}
- *              ==> {4, 4} + {3, 3} - {6, 6}
- *              ==> {1, 1}
- *          B = floor({1, 1} / {2, 2})
- *              ==> {0, 0}
- *          A = {1, 1} - {0, 0}
- *              ==> {1, 1}
- *  }</pre>
- *  - SAME_LOWER:
- *  <pre>{@code
- *      Computation:
- *          I = {6, 6}
- *          S = {2, 2}
- *          O = ceil(I / S) = {3, 3}
- *          P = floor((I - 1) / S) * S + F - I
- *            ==> {1, 1}
- *          A = floor({1, 1} / {2, 2})
- *            ==> {0, 0}
- *          B = {1, 1} - {0, 0}
- *            ==> {1, 1}
- *  }</pre>
- *      The sample pointers are {0, 2, 4} in each dimension.
- *      SAMPLE_UPPER has {O0, O1, O2, pad} in output in each dimension.
- *      SAMPLE_LOWER has {pad, O0, O1, O2} in output in each dimension.
- * 
- *  Pooling Example 2:
- *  <pre>{@code
- *      Given I = {6, 6}, B = {3, 3}, A = {3, 3}, S = {2, 2}, F = {3, 3}. What is O?
- *  }</pre>
- * 
- *  - CAFFE_ROUND_DOWN:
- *  <pre>{@code
- *      Computation:
- *          M = {6, 6} + {3, 3} + {3, 3} ==> {12, 12}
- *          EXPLICIT_ROUND_DOWN ==> floor((M - F) / S) + 1
- *                              ==> floor(({12, 12} - {3, 3}) / {2, 2}) + {1, 1}
- *                              ==> {5, 5}
- *          DIFF = (((EXPLICIT_ROUND_DOWN - 1) * S >= I + B) ? {1, 1} : {0, 0})
- *            ==> ({5, 5} - {1, 1}) * {2, 2} >= {6, 6} + {3, 3} ? {1, 1} : {0,0}
- *            ==> {0, 0}
- *          O ==> EXPLICIT_ROUND_DOWN - DIFF
- *            ==> {5, 5} - {0, 0}
- *            ==> {5, 5}
- *  }</pre>
- *  - CAFFE_ROUND_UP:
- *  <pre>{@code
- *      Computation:
- *          M = {6, 6} + {3, 3} + {3, 3} ==> {12, 12}
- *          EXPLICIT_ROUND_UP ==> ceil((M - F) / S) + 1
- *                            ==> ceil(({12, 12} - {3, 3}) / {2, 2}) + {1, 1}
- *                            ==> {6, 6}
- *          DIFF = (((EXPLICIT_ROUND_UP - 1) * S >= I + B) ? {1, 1} : {0, 0})
- *            ==> ({6, 6} - {1, 1}) * {2, 2} >= {6, 6} + {3, 3} ? {1, 1} : {0,0}
- *            ==> {1, 1}
- *          O ==> EXPLICIT_ROUND_UP - DIFF
- *            ==> {6, 6} - {1, 1}
- *            ==> {5, 5}
- *  }</pre>
- * 
- *  The sample points are {0, 2, 4, 6, 8} in each dimension. <br>
- *  CAFFE_ROUND_DOWN and CAFFE_ROUND_UP have two restrictions each on usage with pooling operations.
- *  This will cause getDimensions to return an empty dimension and also to reject the network
- *  at validation time. <br>
- *  For more information on original reference code, see
- *  https://github.com/BVLC/caffe/blob/master/src/caffe/layers/pooling_layer.cpp
- * 
- *  - Restriction 1:
- *  <pre>{@code
- *      CAFFE_ROUND_DOWN: B >= F is an error if (B - S) < F
- *      CAFFE_ROUND_UP: (B + S) >= (F + 1) is an error if B < (F + 1)
- *  }</pre>
- * 
- *  - Restriction 2:
- *  <pre>{@code
- *      CAFFE_ROUND_DOWN: (B - S) >= F is an error if B >= F
- *      CAFFE_ROUND_UP: B >= (F + 1) is an error if (B + S) >= (F + 1)
- *  }</pre>
- *  */
-@Namespace("nvinfer1") public enum PaddingMode {
-    /** Use explicit padding, rounding output size down. */
-    kEXPLICIT_ROUND_DOWN(0),
-    /** Use explicit padding, rounding output size up. */
-    kEXPLICIT_ROUND_UP(1),
-    /** Use SAME padding, with prePadding <= postPadding. */
-    kSAME_UPPER(2),
-    /** Use SAME padding, with prePadding >= postPadding. */
-    kSAME_LOWER(3),
-    /** Use CAFFE padding, rounding output size down, uses prePadding value. */
-    kCAFFE_ROUND_DOWN(4),
-    /** Use CAFFE padding, rounding output size up, uses prePadding value. */
-    kCAFFE_ROUND_UP(5);
-
-    public final int value;
-    private PaddingMode(int v) { this.value = v; }
-    private PaddingMode(PaddingMode e) { this.value = e.value; }
-    public PaddingMode intern() { for (PaddingMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in PaddingMode enum. @see PaddingMode */
-
-// Targeting ../nvinfer/IConvolutionLayer.java
-
-
-// Targeting ../nvinfer/IFullyConnectedLayer.java
-
-
-// Targeting ../nvinfer/IActivationLayer.java
-
-
-
-/**
- *  \enum PoolingType
- * 
- *  \brief The type of pooling to perform in a pooling layer.
- *  */
-@Namespace("nvinfer1") public enum PoolingType {
-    kMAX(0),              // Maximum over elements
-    kAVERAGE(1),          // Average over elements. If the tensor is padded, the count includes the padding
-    kMAX_AVERAGE_BLEND(2);// Blending between max and average pooling: (1-blendFactor)*maxPool + blendFactor*avgPool
-
-    public final int value;
-    private PoolingType(int v) { this.value = v; }
-    private PoolingType(PoolingType e) { this.value = e.value; }
-    public PoolingType intern() { for (PoolingType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in PoolingType enum. @see PoolingType */
-
-// Targeting ../nvinfer/IPoolingLayer.java
-
-
-// Targeting ../nvinfer/ILRNLayer.java
-
-
-
-/**
- *  \brief Controls how shift, scale and power are applied in a Scale layer.
- * 
- *  @see IScaleLayer
- *  */
-@Namespace("nvinfer1") public enum ScaleMode {
-    /** Identical coefficients across all elements of the tensor. */
-    kUNIFORM(0),
-    /** Per-channel coefficients. */
-    kCHANNEL(1),
-    /** Elementwise coefficients. */
-    kELEMENTWISE(2);
-
-    public final int value;
-    private ScaleMode(int v) { this.value = v; }
-    private ScaleMode(ScaleMode e) { this.value = e.value; }
-    public ScaleMode intern() { for (ScaleMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in ScaleMode enum. @see ScaleMode */
-
-// Targeting ../nvinfer/IScaleLayer.java
-
-
-// Targeting ../nvinfer/ISoftMaxLayer.java
-
-
-// Targeting ../nvinfer/IConcatenationLayer.java
-
-
-// Targeting ../nvinfer/IDeconvolutionLayer.java
-
-
-
-/**
- *  \enum ElementWiseOperation
- * 
- *  \brief Enumerates the binary operations that may be performed by an ElementWise layer.
- * 
- *  @see IElementWiseLayer
- *  */
-@Namespace("nvinfer1") public enum ElementWiseOperation {
-    /** Sum of the two elements. */
-    kSUM(0),
-    /** Product of the two elements. */
-    kPROD(1),
-    /** Maximum of the two elements. */
-    kMAX(2),
-    /** Minimum of the two elements. */
-    kMIN(3),
-    /** Substract the second element from the first. */
-    kSUB(4),
-    /** Divide the first element by the second. */
-    kDIV(5),
-    /** The first element to the power of the second element. */
-    kPOW(6),
-    /** Floor division of the first element by the second. */
-    kFLOOR_DIV(7),
-    /** Logical AND of two elements. */
-    kAND(8),
-    /** Logical OR of two elements. */
-    kOR(9),
-    /** Logical XOR of two elements. */
-    kXOR(10),
-    /** Check if two elements are equal. */
-    kEQUAL(11),
-    /** Check if element in first tensor is greater than corresponding element in second tensor. */
-    kGREATER(12),
-    /** Check if element in first tensor is less than corresponding element in second tensor. */
-    kLESS(13);
-
-    public final int value;
-    private ElementWiseOperation(int v) { this.value = v; }
-    private ElementWiseOperation(ElementWiseOperation e) { this.value = e.value; }
-    public ElementWiseOperation intern() { for (ElementWiseOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ElementWiseOperation enum. @see ElementWiseOperation */
-
-// Targeting ../nvinfer/IElementWiseLayer.java
-
-
-// Targeting ../nvinfer/IGatherLayer.java
-
-
-
-/**
- *  \enum RNNOperation
- * 
- *  \brief Enumerates the RNN operations that may be performed by an RNN layer.
- * 
- *  __Equation definitions__
- * 
- *  In the equations below, we use the following naming convention:
- * 
- *  ~~~
- *  t := current time step
- * 
- *  i := input gate
- *  o := output gate
- *  f := forget gate
- *  z := update gate
- *  r := reset gate
- *  c := cell gate
- *  h := hidden gate
- * 
- *  g[t] denotes the output of gate g at timestep t, e.g.
- *  f[t] is the output of the forget gate f.
- * 
- *  X[t] := input tensor for timestep t
- *  C[t] := cell state for timestep t
- *  H[t] := hidden state for timestep t
- * 
- *  W[g] := W (input) parameter weight matrix for gate g
- *  R[g] := U (recurrent) parameter weight matrix for gate g
- *  Wb[g] := W (input) parameter bias vector for gate g
- *  Rb[g] := U (recurrent) parameter bias vector for gate g
- * 
- *  Unless otherwise specified, all operations apply pointwise
- *  to elements of each operand tensor.
- * 
- *  ReLU(X) := max(X, 0)
- *  tanh(X) := hyperbolic tangent of X
- *  sigmoid(X) := 1 / (1 + exp(-X))
- *  exp(X) := e^X
- * 
- *  A.B denotes matrix multiplication of A and B.
- *  A*B denotes pointwise multiplication of A and B.
- *  ~~~
- * 
- *  __Equations__
- * 
- *  Depending on the value of RNNOperation chosen, each sub-layer of the RNN
- *  layer will perform one of the following operations:
- * 
- *  ~~~
- *  ::kRELU
- * 
- *    H[t] := ReLU(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
- * 
- *  ::kTANH
- * 
- *    H[t] := tanh(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
- * 
- *  ::kLSTM
- * 
- *    i[t] := sigmoid(W[i].X[t] + R[i].H[t-1] + Wb[i] + Rb[i])
- *    f[t] := sigmoid(W[f].X[t] + R[f].H[t-1] + Wb[f] + Rb[f])
- *    o[t] := sigmoid(W[o].X[t] + R[o].H[t-1] + Wb[o] + Rb[o])
- *    c[t] :=    tanh(W[c].X[t] + R[c].H[t-1] + Wb[c] + Rb[c])
- * 
- *    C[t] := f[t]*C[t-1] + i[t]*c[t]
- *    H[t] := o[t]*tanh(C[t])
- * 
- *  ::kGRU
- * 
- *    z[t] := sigmoid(W[z].X[t] + R[z].H[t-1] + Wb[z] + Rb[z])
- *    r[t] := sigmoid(W[r].X[t] + R[r].H[t-1] + Wb[r] + Rb[r])
- *    h[t] := tanh(W[h].X[t] + r[t]*(R[h].H[t-1] + Rb[h]) + Wb[h])
- * 
- *    H[t] := (1 - z[t])*h[t] + z[t]*H[t-1]
- *  ~~~
- * 
- *  @see IRNNv2Layer
- *  */
-@Namespace("nvinfer1") public enum RNNOperation {
-    /** Single gate RNN w/ ReLU activation function. */
-    kRELU(0),
-    /** Single gate RNN w/ TANH activation function. */
-    kTANH(1),
-    /** Four-gate LSTM network w/o peephole connections. */
-    kLSTM(2),
-    /** Three-gate network consisting of Gated Recurrent Units. */
-    kGRU(3);
-
-    public final int value;
-    private RNNOperation(int v) { this.value = v; }
-    private RNNOperation(RNNOperation e) { this.value = e.value; }
-    public RNNOperation intern() { for (RNNOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in RNNOperation enum. @see RNNOperation */
-
-
-/**
- *  \enum RNNDirection
- * 
- *  \brief Enumerates the RNN direction that may be performed by an RNN layer.
- * 
- *  @see IRNNv2Layer
- *  */
-@Namespace("nvinfer1") public enum RNNDirection {
-    /** Network iterations from first input to last input. */
-    kUNIDIRECTION(0),
-    /** Network iterates from first to last and vice versa and outputs concatenated. */
-    kBIDIRECTION(1);
-
-    public final int value;
-    private RNNDirection(int v) { this.value = v; }
-    private RNNDirection(RNNDirection e) { this.value = e.value; }
-    public RNNDirection intern() { for (RNNDirection e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in RNNDirection enum. @see RNNDirection */
-
-
-/**
- *  \enum RNNInputMode
- * 
- *  \brief Enumerates the RNN input modes that may occur with an RNN layer.
- * 
- *  If the RNN is configured with RNNInputMode::kLINEAR, then for each gate {@code g} in the first layer of the RNN,
- *  the input vector {@code X[t]} (length {@code E}) is left-multiplied by the gate's corresponding weight matrix {@code W[g]}
- *  (dimensions {@code HxE}) as usual, before being used to compute the gate output as described by \ref RNNOperation.
- * 
- *  If the RNN is configured with RNNInputMode::kSKIP, then this initial matrix multiplication is "skipped"
- *  and {@code W[g]} is conceptually an identity matrix.  In this case, the input vector {@code X[t]} must have length {@code H}
- *  (the size of the hidden state).
- * 
- *  @see IRNNv2Layer
- *  */
-@Namespace("nvinfer1") public enum RNNInputMode {
-    /** Perform the normal matrix multiplication in the first recurrent layer. */
-    kLINEAR(0),
-    /** No operation is performed on the first recurrent layer. */
-    kSKIP(1);
-
-    public final int value;
-    private RNNInputMode(int v) { this.value = v; }
-    private RNNInputMode(RNNInputMode e) { this.value = e.value; }
-    public RNNInputMode intern() { for (RNNInputMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in RNNInputMode enum. @see RNNInputMode */
-
-
-/**
- *  \enum RNNGateType
- * 
- *  \brief Identifies an individual gate within an RNN cell.
- * 
- *  @see RNNOperation
- *  */
-@Namespace("nvinfer1") public enum RNNGateType {
-    /** Input gate  (i). */
-    kINPUT(0),
-    /** Output gate (o). */
-    kOUTPUT(1),
-    /** Forget gate (f). */
-    kFORGET(2),
-    /** Update gate (z). */
-    kUPDATE(3),
-    /** Reset gate  (r). */
-    kRESET(4),
-    /** Cell gate   (c). */
-    kCELL(5),
-    /** Hidden gate (h). */
-    kHIDDEN(6);
-
-    public final int value;
-    private RNNGateType(int v) { this.value = v; }
-    private RNNGateType(RNNGateType e) { this.value = e.value; }
-    public RNNGateType intern() { for (RNNGateType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-
-// Targeting ../nvinfer/IRNNv2Layer.java
-
-
-// Targeting ../nvinfer/IPluginV2Layer.java
-
-
-
-/**
- *  \enum UnaryOperation
- * 
- *  \brief Enumerates the unary operations that may be performed by a Unary layer.
- * 
- *  @see IUnaryLayer
- *  */
-@Namespace("nvinfer1") public enum UnaryOperation {
-    /** Exponentiation. */
-    kEXP(0),
-    /** Log (base e). */
-    kLOG(1),
-    /** Square root. */
-    kSQRT(2),
-    /** Reciprocal. */
-    kRECIP(3),
-    /** Absolute value. */
-    kABS(4),
-    /** Negation. */
-    kNEG(5),
-    /** Sine. */
-    kSIN(6),
-    /** Cosine. */
-    kCOS(7),
-    /** Tangent. */
-    kTAN(8),
-    /** Hyperbolic sine. */
-    kSINH(9),
-    /** Hyperbolic cosine. */
-    kCOSH(10),
-    /** Inverse sine. */
-    kASIN(11),
-    /** Inverse cosine. */
-    kACOS(12),
-    /** Inverse tangent. */
-    kATAN(13),
-    /** Inverse hyperbolic sine. */
-    kASINH(14),
-    /** Inverse hyperbolic cosine. */
-    kACOSH(15),
-    /** Inverse hyperbolic tangent. */
-    kATANH(16),
-    /** Ceiling. */
-    kCEIL(17),
-    /** Floor. */
-    kFLOOR(18),
-    /** Gauss error function. */
-    kERF(19),
-    /** Logical NOT. */
-    kNOT(20);
-
-    public final int value;
-    private UnaryOperation(int v) { this.value = v; }
-    private UnaryOperation(UnaryOperation e) { this.value = e.value; }
-    public UnaryOperation intern() { for (UnaryOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in UnaryOperation enum. @see UnaryOperation */
-
-// Targeting ../nvinfer/IUnaryLayer.java
-
-
-
-/**
- *  \enum ReduceOperation
- * 
- *  \brief Enumerates the reduce operations that may be performed by a Reduce layer.
- * 
- *  The table shows the result of reducing across an empty volume of a given type.
- * 
- *  Operation | kFLOAT and kHALF  | kINT32  | kINT8
- *  --------- | ----------------- | ------- | -----
- *  kSUM      | 0                 | 0       | 0
- *  kPROD     | 1                 | 1       | 1
- *  kMAX      | negative infinity | INT_MIN | -128
- *  kMIN      | positive infinity | INT_MAX | 127
- *  kAVG      | NaN               | 0       | -128
- * 
- *  The current version of TensorRT usually performs reduction for kINT8 via kFLOAT or kHALF.
- *  The kINT8 values show the quantized representations of the floating-point values.
- *  */
-@Namespace("nvinfer1") public enum ReduceOperation {
-    kSUM(0),
-    kPROD(1),
-    kMAX(2),
-    kMIN(3),
-    kAVG(4);
-
-    public final int value;
-    private ReduceOperation(int v) { this.value = v; }
-    private ReduceOperation(ReduceOperation e) { this.value = e.value; }
-    public ReduceOperation intern() { for (ReduceOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in ReduceOperation enum. @see ReduceOperation */
-
-// Targeting ../nvinfer/IReduceLayer.java
-
-
-// Targeting ../nvinfer/IPaddingLayer.java
-
-
-// Targeting ../nvinfer/Permutation.java
-
-
-// Targeting ../nvinfer/IShuffleLayer.java
-
-
-
-/**
- *  \brief Controls how ISliceLayer handles out of bounds coordinates.
- * 
- *  @see ISliceLayer
- *  */
-@Namespace("nvinfer1") public enum SliceMode {
-    /** Fail with error when the coordinates are out of bounds. This is the default. */
-    kDEFAULT(0),
-    /** Coordinates wrap around periodically. */
-    kWRAP(1);
-
-    public final int value;
-    private SliceMode(int v) { this.value = v; }
-    private SliceMode(SliceMode e) { this.value = e.value; }
-    public SliceMode intern() { for (SliceMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in SliceMode enum. @see SliceMode */
-
-// Targeting ../nvinfer/ISliceLayer.java
-
-
-// Targeting ../nvinfer/IShapeLayer.java
-
-
-
-/**
- *  \enum TopKOperation
- * 
- *  \brief Enumerates the operations that may be performed by a TopK layer.
- *  */
-@Namespace("nvinfer1") public enum TopKOperation {
-    /** Maximum of the elements. */
-    kMAX(0),
-    /** Minimum of the elements. */
-    kMIN(1);
-
-    public final int value;
-    private TopKOperation(int v) { this.value = v; }
-    private TopKOperation(TopKOperation e) { this.value = e.value; }
-    public TopKOperation intern() { for (TopKOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in TopKOperation enum. @see TopKOperation */
-
-// Targeting ../nvinfer/ITopKLayer.java
-
-
-
-/**
- *  \enum MatrixOperation
- * 
- *  \brief Enumerates the operations that may be performed on a tensor
- *         by IMatrixMultiplyLayer before multiplication.
- *  */
-@Namespace("nvinfer1") public enum MatrixOperation {
-    /** Treat x as a matrix if it has two dimensions, or as a collection of
-     *  matrices if x has more than two dimensions, where the last two dimensions
-     *  are the matrix dimensions.  x must have at least two dimensions. */
-    kNONE(0),
-
-    /** Like kNONE, but transpose the matrix dimensions. */
-    
-//!
-    kTRANSPOSE(1),
-
-    /** Treat x as a vector if it has one dimension, or as a collection of
-     *  vectors if x has more than one dimension.  x must have at least one dimension.
-     *  The first input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
-     *  with dimensions [M, 1, K] with MatrixOperation::kNONE, i.e. is treated as M row vectors of length K.
-     *  If MatrixOperation::kTRANSPOSE is specified, then the dimensions are [M, K, 1].
-     * 
-     *  The second input tensor with dimensions [M,K] used with MatrixOperation::kVECTOR is equivalent to a tensor
-     *  with dimensions [M, K, 1] with MatrixOperation::kNONE, i.e. is treated as M column vectors of length K.
-     *  If MatrixOperation::kTRANSPOSE is specified, then the dimensions are [M, 1, K]. */
-    kVECTOR(2);
-
-    public final int value;
-    private MatrixOperation(int v) { this.value = v; }
-    private MatrixOperation(MatrixOperation e) { this.value = e.value; }
-    public MatrixOperation intern() { for (MatrixOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in MatrixOperation enum. @see DataType */
-
-// Targeting ../nvinfer/IMatrixMultiplyLayer.java
-
-
-// Targeting ../nvinfer/IRaggedSoftMaxLayer.java
-
-
-// Targeting ../nvinfer/IIdentityLayer.java
-
-
-// Targeting ../nvinfer/IConstantLayer.java
-
-
-// Targeting ../nvinfer/IParametricReLULayer.java
-
-
-
-/** \enum ResizeMode
- * 
- *  \brief Enumerates various modes of resize in the resize layer.
- *         Resize mode set using setResizeMode().
- *  */
-@Namespace("nvinfer1") public enum ResizeMode {
-    /** ND (0 < N <= 8) nearest neighbor resizing. */
-    kNEAREST(0),
-    /** Can handle linear (1D), bilinear (2D), and trilinear (3D) resizing. */
-    kLINEAR(1);
-
-    public final int value;
-    private ResizeMode(int v) { this.value = v; }
-    private ResizeMode(ResizeMode e) { this.value = e.value; }
-    public ResizeMode intern() { for (ResizeMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ResizeMode enum. @see ResizeMode */
- // namespace impl
-
-/**
- *  \enum ResizeCoordinateTransformation
- * 
- *  \brief The resize coordinate transformation function.
- * 
- *  @see IResizeLayer::setCoordinateTransformation()
- *  */
-@Namespace("nvinfer1") public enum ResizeCoordinateTransformation {
-    /** We can think each value in tensor has a volume, and the coordinate is a point inside this volume.
-     *  The coordinate point is drawn as star(*) in below diagram, and multiple values range has a length.
-     *  Let's use x_origin as the coordinate of axis x in the input tensor, x_resized as the coordinate of axis x in the
-     *  output tensor, length_origin as length of the input tensor in axis x, and length_resize as length of the output
-     *  tensor in axis x.
-     * 
-     *  |<--------------length---------->|
-     *  |    0     |    1     |    2     |    3     |
-     *  *          *          *          *
-     * 
-     *  x_origin = x_resized * (length_origin - 1) / (length_resize - 1)
-     *  */
-    
-//!
-//!
-    kALIGN_CORNERS(0),
-
-    /** |<--------------length--------------------->|
-     *  |    0     |    1     |    2     |    3     |
-     *  *          *          *          *
-     * 
-     *  x_origin = x_resized * (length_origin / length_resize)
-     *  */
-    
-//!
-//!
-    kASYMMETRIC(1),
-
-    /** |<--------------length--------------------->|
-     *  |    0     |    1     |    2     |    3     |
-     *       *          *          *          *
-     * 
-     *  x_origin = (x_resized + 0.5) * (length_origin / length_resize) - 0.5
-     *  */
-    kHALF_PIXEL(2);
-
-    public final int value;
-    private ResizeCoordinateTransformation(int v) { this.value = v; }
-    private ResizeCoordinateTransformation(ResizeCoordinateTransformation e) { this.value = e.value; }
-    public ResizeCoordinateTransformation intern() { for (ResizeCoordinateTransformation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ResizeCoordinateTransformation enum. @see ResizeCoordinateTransformation */
- // namespace impl
-
-/**
- *  \enum ResizeSelector
- * 
- *  \brief The coordinate selector when resize to single pixel output.
- * 
- *  @see IResizeLayer::setSelectorForSinglePixel()
- *  */
-@Namespace("nvinfer1") public enum ResizeSelector {
-    /** Use formula to map the original index. */
-    kFORMULA(0),
-
-    /** Select the upper left pixel. */
-    kUPPER(1);
-
-    public final int value;
-    private ResizeSelector(int v) { this.value = v; }
-    private ResizeSelector(ResizeSelector e) { this.value = e.value; }
-    public ResizeSelector intern() { for (ResizeSelector e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ResizeSelector enum. @see ResizeSelector */
- // namespace impl
-
-/**
- *  \enum ResizeRoundMode
- * 
- *  \brief The rounding mode for nearest neighbor resize.
- * 
- * 
- *  @see IResizeLayer::setNearestRounding()
- *  */
-@Namespace("nvinfer1") public enum ResizeRoundMode {
-    /** Round half up. */
-    kHALF_UP(0),
-
-    /** Round half down. */
-    kHALF_DOWN(1),
-
-    /** Round to floor. */
-    kFLOOR(2),
-
-    /** Round to ceil. */
-    kCEIL(3);
-
-    public final int value;
-    private ResizeRoundMode(int v) { this.value = v; }
-    private ResizeRoundMode(ResizeRoundMode e) { this.value = e.value; }
-    public ResizeRoundMode intern() { for (ResizeRoundMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-/** Maximum number of elements in ResizeRoundMode enum. @see ResizeRoundMode */
-
-// Targeting ../nvinfer/IResizeLayer.java
-
-
-
-/** Enum that describes kinds of loop outputs. */
-@Namespace("nvinfer1") public enum LoopOutput {
-    /** Output value is value of tensor for last iteration. */
-    kLAST_VALUE(0),
-
-    /** Output value is concatenation of values of tensor for each iteration, in forward order. */
-    kCONCATENATE(1),
-
-    /** Output value is concatenation of values of tensor for each iteration, in reverse order. */
-    kREVERSE(2);
-
-    public final int value;
-    private LoopOutput(int v) { this.value = v; }
-    private LoopOutput(LoopOutput e) { this.value = e.value; }
-    public LoopOutput intern() { for (LoopOutput e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in LoopOutput enum. @see DataType */
-
-
-/** Enum that describes kinds of trip limits. */
-@Namespace("nvinfer1") public enum TripLimit {
-
-    /** Tensor is scalar of type kINT32 that contains the trip count. */
-    kCOUNT(0),
-    /** Tensor is a scalar of type kBOOL. Loop terminates when value is false. */
-    kWHILE(1);
-
-    public final int value;
-    private TripLimit(int v) { this.value = v; }
-    private TripLimit(TripLimit e) { this.value = e.value; }
-    public TripLimit intern() { for (TripLimit e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in TripLimit enum. @see DataType */
-
-// Targeting ../nvinfer/ILoopBoundaryLayer.java
-
-
-// Targeting ../nvinfer/IRecurrenceLayer.java
-
-
-// Targeting ../nvinfer/ILoopOutputLayer.java
-
-
-// Targeting ../nvinfer/ITripLimitLayer.java
-
-
-// Targeting ../nvinfer/IIteratorLayer.java
-
-
-// Targeting ../nvinfer/ILoop.java
-
-
-// Targeting ../nvinfer/ISelectLayer.java
-
-
-
-/**
- *  \enum FillOperation
- * 
- *  \brief Enumerates the tensor fill operations that may performed by a fill layer.
- * 
- *  @see IFillLayer
- *  */
-@Namespace("nvinfer1") public enum FillOperation {
-    /** Generate evenly spaced numbers over a specified interval. */
-    kLINSPACE(0),
-    /** Generate a tensor with random values drawn from a uniform distribution. */
-    kRANDOM_UNIFORM(1);
-
-    public final int value;
-    private FillOperation(int v) { this.value = v; }
-    private FillOperation(FillOperation e) { this.value = e.value; }
-    public FillOperation intern() { for (FillOperation e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in FillOperation enum. @see FillOperation */
-
-// Targeting ../nvinfer/IFillLayer.java
-
-
-// Targeting ../nvinfer/IQuantizeLayer.java
-
-
-// Targeting ../nvinfer/IDequantizeLayer.java
-
-
-// Targeting ../nvinfer/INetworkDefinition.java
-
-
-
-/**
- *  enum CalibrationAlgoType
- * 
- *  \brief Version of calibration algorithm to use.
- *  */
-@Namespace("nvinfer1") public enum CalibrationAlgoType {
-    kLEGACY_CALIBRATION(0),
-    kENTROPY_CALIBRATION(1),
-    kENTROPY_CALIBRATION_2(2),
-    kMINMAX_CALIBRATION(3);
-
-    public final int value;
-    private CalibrationAlgoType(int v) { this.value = v; }
-    private CalibrationAlgoType(CalibrationAlgoType e) { this.value = e.value; }
-    public CalibrationAlgoType intern() { for (CalibrationAlgoType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in CalibrationAlgoType enum. @see DataType */
-
-// Targeting ../nvinfer/IInt8Calibrator.java
-
-
-// Targeting ../nvinfer/IInt8EntropyCalibrator.java
-
-
-// Targeting ../nvinfer/IInt8EntropyCalibrator2.java
-
-
-// Targeting ../nvinfer/IInt8MinMaxCalibrator.java
-
-
-// Targeting ../nvinfer/IInt8LegacyCalibrator.java
-
-
-// Targeting ../nvinfer/IAlgorithmIOInfo.java
-
-
-// Targeting ../nvinfer/IAlgorithmVariant.java
-
-
-// Targeting ../nvinfer/IAlgorithmContext.java
-
-
-// Targeting ../nvinfer/IAlgorithm.java
-
-
-// Targeting ../nvinfer/IAlgorithmSelector.java
-
-
-
-/**
- *  \brief Represents one or more QuantizationFlag values using binary OR
- *  operations.
- * 
- *  @see IBuilderConfig::getQuantizationFlags(), IBuilderConfig::setQuantizationFlags()
- *  */
-
-
-//!
-//!
-//!
-//!
-
-/**
- *  \enum QuantizationFlag
- * 
- *  \brief List of valid flags for quantizing the network to int8
- * 
- *  @see IBuilderConfig::setQuantizationFlag(), IBuilderConfig::getQuantizationFlag()
- *  */
-@Namespace("nvinfer1") public enum QuantizationFlag {
-    /** IInt8EntropyCalibrator. We always run int8 calibration pass before layer fusion for
-     *  IInt8MinMaxCalibrator and IInt8EntropyCalibrator2. Disabled by default. */
-    kCALIBRATE_BEFORE_FUSION(0);
-
-    public final int value;
-    private QuantizationFlag(int v) { this.value = v; }
-    private QuantizationFlag(QuantizationFlag e) { this.value = e.value; }
-    public QuantizationFlag intern() { for (QuantizationFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of quantization flags in QuantizationFlag enum. @see QuantizationFlag */
-
-
-/**
- *  \brief Represents one or more QuantizationFlag values using binary OR
- *  operations, e.g., 1U << BuilderFlag::kFP16 | 1U << BuilderFlag::kDEBUG.
- * 
- *  @see IBuilderConfig::getFlags(), ITensor::setFlags(),
- *  */
-
-
-//!
-//!
-//!
-//!
-
-/**
- *  \enum BuilderFlag
- * 
- *  \brief List of valid modes that the builder can enable when creating an engine from a network definition.
- * 
- *  @see IBuilderConfig::setFlag(), IBuilderConfig::getFlag()
- *  */
-@Namespace("nvinfer1") public enum BuilderFlag {
-    /** Enable FP16 layer selection, with FP32 fallback. */
-    kFP16(0),
-    /** Enable Int8 layer selection, with FP32 fallback with FP16 fallback if kFP16 also specified. */
-    kINT8(1),
-    /** Enable debugging of layers via synchronizing after every layer. */
-    kDEBUG(2),
-    /** Enable layers marked to execute on GPU if layer cannot execute on DLA. */
-    kGPU_FALLBACK(3),
-    /** Enables strict type constraints. */
-    kSTRICT_TYPES(4),
-    /** Enable building a refittable engine. */
-    kREFIT(5),
-    /** Disable reuse of timing information across identical layers. */
-    kDISABLE_TIMING_CACHE(6),
-
-    /** Allow (but not require) computations on tensors of type DataType::kFLOAT to use TF32.
-     *  TF32 computes inner products by rounding the inputs to 10-bit mantissas before
-     *  multiplying, but accumulates the sum using 23-bit mantissas. Enabled by default. */
-    kTF32(7),
-
-    /** Allow the builder to examine weights and use optimized functions when weights have suitable sparsity. */
-    
-//!
-    kSPARSE_WEIGHTS(8),
-
-    /** Change the allowed parameters in the EngineCapability::kSTANDARD flow to
-     *  match the restrictions that EngineCapability::kSAFETY check against for DeviceType::kGPU
-     *  and EngineCapability::kDLA_STANDALONE check against the DeviceType::kDLA case. This flag
-     *  is forced to true if EngineCapability::kSAFETY at build time if it is unset.
-     *  */
-    kSAFETY_SCOPE(9);
-
-    public final int value;
-    private BuilderFlag(int v) { this.value = v; }
-    private BuilderFlag(BuilderFlag e) { this.value = e.value; }
-    public BuilderFlag intern() { for (BuilderFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of builder flags in BuilderFlag enum. @see BuilderFlag */
-
-
-/**
- *  \enum ProfilingVerbosity
- * 
- *  \brief List of verbosity levels of layer information exposed in NVTX annotations.
- * 
- *  @see IBuilderConfig::setProfilingVerbosity(),
- *       IBuilderConfig::getProfilingVerbosity()
- *  */
-@Namespace("nvinfer1") public enum ProfilingVerbosity {
-    /** Register layer names in NVTX message field. */
-    kDEFAULT(0),
-    /** Turn off NVTX traces. */
-    kNONE(1),
-    /** Register layer names in NVTX message field and register layer detail in NVTX JSON payload field. */
-    kVERBOSE(2);
-
-    public final int value;
-    private ProfilingVerbosity(int v) { this.value = v; }
-    private ProfilingVerbosity(ProfilingVerbosity e) { this.value = e.value; }
-    public ProfilingVerbosity intern() { for (ProfilingVerbosity e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of profile verbosity levels in ProfilingVerbosity enum. @see ProfilingVerbosity */
-
-// Targeting ../nvinfer/ITimingCache.java
-
-
-// Targeting ../nvinfer/IBuilderConfig.java
-
-
-
-/** \brief Represents one or more NetworkDefinitionCreationFlag flags
- *  using binary OR operations.
- *   e.g., 1U << NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
- * 
- *  @see IBuilder::createNetworkV2
- *  */
-
-//!
-//!
-//!
-
-/** \enum NetworkDefinitionCreationFlag
- * 
- *  \brief List of immutable network properties expressed at network creation time.
- *  NetworkDefinitionCreationFlag is used with createNetworkV2 to specify immutable properties of the network.
- *  The createNetwork() function always had an implicit batch dimension being specified by the
- *  maxBatchSize builder parameter. createNetworkV2 with kDEFAULT flag mimics that behaviour.
- * 
- *  @see IBuilder::createNetworkV2
- *  */
-@Namespace("nvinfer1") public enum NetworkDefinitionCreationFlag {
-    /** Dynamic shape support requires that the kEXPLICIT_BATCH flag is set.
-     *  With dynamic shapes, any of the input dimensions can vary at run-time,
-     *  and there are no implicit dimensions in the network specification. This is specified by using the
-     *  wildcard dimension value -1. */
-    /** Mark the network to be an explicit batch network */
-    kEXPLICIT_BATCH(0),
-
-    /** Setting the network to be an explicit precision network has the following implications:
-     *  1) Precision of all input tensors to the network have to be specified with ITensor::setType() function
-     *  2) Precision of all layer output tensors in the network have to be specified using ILayer::setOutputType()
-     *  function
-     *  3) The builder will not quantize the weights of any layer including those running in lower precision(INT8). It
-     *  will
-     *  simply cast the weights into the required precision.
-     *  4) Dynamic ranges must not be provided to run the network in int8 mode. Dynamic ranges of each tensor in the
-     *  explicit
-     *  precision network is [-127,127].
-     *  5) Quantizing and dequantizing activation values between higher (FP32) and lower (INT8) precision
-     *  will be performed using explicit Scale layers with input/output precision set appropriately. */
-    kEXPLICIT_PRECISION(1);/** <-- Deprecated, used for backward compatibility */
-
-    public final int value;
-    private NetworkDefinitionCreationFlag(int v) { this.value = v; }
-    private NetworkDefinitionCreationFlag(NetworkDefinitionCreationFlag e) { this.value = e.value; }
-    public NetworkDefinitionCreationFlag intern() { for (NetworkDefinitionCreationFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Maximum number of elements in NetworkDefinitionCreationFlag enum. @see NetworkDefinitionCreationFlag */
-
-// Targeting ../nvinfer/IBuilder.java
-
-
-
- // namespace nvinfer1
-
-/**
- *  Internal C entry point for creating IBuilder.
- *  \private
- *  */
-public static native @NoException(true) Pointer createInferBuilder_INTERNAL(Pointer logger, int version);
-
-/**
- *  \brief Create an instance of an IBuilder class.
- * 
- *  This is the logging class for the builder.
- * 
- *  unnamed namespace avoids linkage surprises when linking objects built with different versions of this header.
- *  */
-@Namespace("nvinfer1") public static native @NoException(true) IBuilder createInferBuilder(@ByRef ILogger logger);
-
- // namespace
- // namespace nvinfer1
-
-// #endif // NV_INFER_H
-
-
-// Parsed from NvInferImpl.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_IMPL_H
-// #define NV_INFER_IMPL_H
-
-// #include "NvInferLegacyDims.h"
-// #include "NvInferRuntimeCommon.h"
-// Targeting ../nvinfer/IPlugin.java
-
-
-// Targeting ../nvinfer/IPluginExt.java
-
-
-// Targeting ../nvinfer/IPluginLayer.java
-
-
-
-/** enum class nvinfer1::ActivationType */
-;
-/** enum class nvinfer1::BuilderFlag */
-;
-/** enum class nvinfer1::CalibrationAlgoType */
-;
-/** enum class nvinfer1::DeviceType */
-;
-/** enum class nvinfer1::DimensionOperation */
-;
-/** enum class nvinfer1::ElementWiseOperation */
-;
-/** enum class nvinfer1::EngineCapability */
-;
-/** enum class nvinfer1::FillOperation */
-;
-/** enum class nvinfer1::LayerType */
-;
-/** enum class nvinfer1::LoopOutput */
-;
-/** enum class nvinfer1::MatrixOperation */
-;
-/** enum class nvinfer1::NetworkDefinitionCreationFlag */
-;
-/** enum class nvinfer1::OptProfileSelector */
-;
-/** enum class nvinfer1::PaddingMode */
-;
-/** enum class nvinfer1::PoolingType */
-;
-/** enum class nvinfer1::ProfilingVerbosity */
-;
-/** enum class nvinfer1::QuantizationFlag */
-;
-/** enum class nvinfer1::ReduceOperation */
-;
-/** enum class nvinfer1::ResizeCoordinateTransformation */
-;
-/** enum class nvinfer1::ResizeMode */
-;
-/** enum class nvinfer1::ResizeRoundMode */
-;
-/** enum class nvinfer1::ResizeSelector */
-;
-/** enum class nvinfer1::RNNDirection */
-;
-/** enum class nvinfer1::RNNGateType */
-;
-/** enum class nvinfer1::RNNInputMode */
-;
-/** enum class nvinfer1::RNNOperation */
-;
-/** enum class nvinfer1::ScaleMode */
-;
-/** enum class nvinfer1::SliceMode */
-;
-/** enum class nvinfer1::TensorLocation */
-;
-/** enum class nvinfer1::TopKOperation */
-;
-/** enum class nvinfer1::TripLimit */
-;
-/** enum class nvinfer1::UnaryOperation */
-;
-/** enum class nvinfer1::WeightsRole */
-;
-
-
-//!
-//!
-//!
-// Targeting ../nvinfer/VRoot.java
-
-
-// Targeting ../nvinfer/VHostMemory.java
-
-
-// Targeting ../nvinfer/VDimensionExpr.java
-
-
-// Targeting ../nvinfer/VExprBuilder.java
-
-
-// Targeting ../nvinfer/VRuntime.java
-
-
-// Targeting ../nvinfer/VRefitter.java
-
-
-// Targeting ../nvinfer/VOptimizationProfile.java
-
-
-// Targeting ../nvinfer/VCudaEngine.java
-
-
-// Targeting ../nvinfer/VExecutionContext.java
-
-
-// Targeting ../nvinfer/VTensor.java
-
-
-// Targeting ../nvinfer/VLayer.java
-
-
-// Targeting ../nvinfer/VConvolutionLayer.java
-
-
-// Targeting ../nvinfer/VFullyConnectedLayer.java
-
-
-// Targeting ../nvinfer/VActivationLayer.java
-
-
-// Targeting ../nvinfer/VPoolingLayer.java
-
-
-// Targeting ../nvinfer/VLRNLayer.java
-
-
-// Targeting ../nvinfer/VScaleLayer.java
-
-
-// Targeting ../nvinfer/VSoftMaxLayer.java
-
-
-// Targeting ../nvinfer/VConcatenationLayer.java
-
-
-// Targeting ../nvinfer/VDeconvolutionLayer.java
-
-
-// Targeting ../nvinfer/VElementWiseLayer.java
-
-
-// Targeting ../nvinfer/VGatherLayer.java
-
-
-// Targeting ../nvinfer/VRNNv2Layer.java
-
-
-// Targeting ../nvinfer/VPluginLayer.java
-
-
-// Targeting ../nvinfer/VPluginV2Layer.java
-
-
-// Targeting ../nvinfer/VUnaryLayer.java
-
-
-// Targeting ../nvinfer/VReduceLayer.java
-
-
-// Targeting ../nvinfer/VPaddingLayer.java
-
-
-// Targeting ../nvinfer/VShuffleLayer.java
-
-
-// Targeting ../nvinfer/VSliceLayer.java
-
-
-// Targeting ../nvinfer/VShapeLayer.java
-
-
-// Targeting ../nvinfer/VTopKLayer.java
-
-
-// Targeting ../nvinfer/VMatrixMultiplyLayer.java
-
-
-// Targeting ../nvinfer/VRaggedSoftMaxLayer.java
-
-
-// Targeting ../nvinfer/VIdentityLayer.java
-
-
-// Targeting ../nvinfer/VConstantLayer.java
-
-
-// Targeting ../nvinfer/VParametricReLULayer.java
-
-
-// Targeting ../nvinfer/VResizeLayer.java
-
-
-// Targeting ../nvinfer/VLoopBoundaryLayer.java
-
-
-// Targeting ../nvinfer/VRecurrenceLayer.java
-
-
-// Targeting ../nvinfer/VLoopOutputLayer.java
-
-
-// Targeting ../nvinfer/VTripLimitLayer.java
-
-
-// Targeting ../nvinfer/VIteratorLayer.java
-
-
-// Targeting ../nvinfer/VLoop.java
-
-
-// Targeting ../nvinfer/VSelectLayer.java
-
-
-// Targeting ../nvinfer/VFillLayer.java
-
-
-// Targeting ../nvinfer/VQuantizeLayer.java
-
-
-// Targeting ../nvinfer/VDequantizeLayer.java
-
-
-// Targeting ../nvinfer/VNetworkDefinition.java
-
-
-// Targeting ../nvinfer/VAlgorithmIOInfo.java
-
-
-// Targeting ../nvinfer/VAlgorithmVariant.java
-
-
-// Targeting ../nvinfer/VAlgorithmContext.java
-
-
-// Targeting ../nvinfer/VAlgorithm.java
-
-
-// Targeting ../nvinfer/VTimingCache.java
-
-
-// Targeting ../nvinfer/VBuilderConfig.java
-
-
-// Targeting ../nvinfer/VBuilder.java
-
-
-
- // namespace apiv
- // namespace nvinfer1
-
-// #endif // NV_INFER_RUNTIME_IMPL_H
-
-
-// Parsed from NvUtils.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_UTILS_H
-// #define NV_UTILS_H
-
-
-
-//!
-//!
-//!
-// #include "NvInfer.h"
-
-/**
- *  \file NvUtils.h
- * 
- *  This file includes various utility functions
- *  */
-
-/**
- *  @param input The input weights to reshape.
- *  @param shape The shape of the weights.
- *  @param shapeOrder The order of the dimensions to process for the output.
- *  @param data The location where the output data is placed.
- *  @param nbDims The number of dimensions to process.
- * 
- *  \brief Reformat the input weights of the given shape based on the new
- *  order of dimensions.
- * 
- *  Take the weights specified by \p input with the dimensions specified by
- *  \p shape and re-order the weights based on the new dimensions specified
- *  by \p shapeOrder. The size of each dimension and the input data is not
- *  modified. The output volume pointed to by \p data must be the same as
- *  he \p input volume.
- * 
- *  Example usage:
- *  float *out = new float[N*C*H*W];
- *  Weights input{DataType::kFLOAT, {0 ... N*C*H*W-1}, N*C*H*W size};
- *  int32_t order[4]{1, 0, 3, 2};
- *  int32_t shape[4]{C, N, W, H};
- *  reshapeWeights(input, shape, order, out, 4);
- *  Weights reshaped{input.type, out, input.count};
- * 
- *  Input Matrix{3, 2, 3, 2}:
- *  { 0  1}, { 2  3}, { 4  5} <-- {0, 0, *, *}
- *  { 6  7}, { 8  9}, {10 11} <-- {0, 1, *, *}
- *  {12 13}, {14 15}, {16 17} <-- {1, 0, *, *}
- *  {18 19}, {20 21}, {22 23} <-- {1, 1, *, *}
- *  {24 25}, {26 27}, {28 29} <-- {2, 0, *, *}
- *  {30 31}, {32 33}, {34 35} <-- {2, 1, *, *}
- * 
- *  Output Matrix{2, 3, 2, 3}:
- *  { 0  2  4}, { 1  3  5} <-- {0, 0, *, *}
- *  {12 14 16}, {13 15 17} <-- {0, 1, *, *}
- *  {24 26 28}, {25 27 29} <-- {0, 2, *, *}
- *  { 6  8 10}, { 7  9 11} <-- {1, 0, *, *}
- *  {18 20 22}, {19 21 23} <-- {1, 1, *, *}
- *  {30 32 34}, {31 33 35} <-- {1, 2, *, *}
- * 
- *  @return True on success, false on failure.
- * 
- *  \warning This file will be removed in TensorRT 10.0.
- *  */
-
-
-//!
-//!
-//!
-//!
-//!
-//!
-//!
-//!
-//!
-//!
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
-    @Const @ByRef Weights input, @Const IntPointer shape, @Const IntPointer shapeOrder, Pointer data, int nbDims);
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
-    @Const @ByRef Weights input, @Const IntBuffer shape, @Const IntBuffer shapeOrder, Pointer data, int nbDims);
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reshapeWeights(
-    @Const @ByRef Weights input, @Const int[] shape, @Const int[] shapeOrder, Pointer data, int nbDims);
-
-/**
- *  @param input The input data to re-order.
- *  @param order The new order of the data sub-buffers.
- *  @param num The number of data sub-buffers to re-order.
- *  @param size The size of each data sub-buffer in bytes.
- * 
- *  \brief Takes an input stream and re-orders \p num chunks of the data
- *  given the \p size and \p order.
- * 
- *  In some frameworks, the ordering of the sub-buffers within a dimension
- *  is different than the way that TensorRT expects them.
- *  TensorRT expects the gate/bias sub-buffers for LSTM's to be in fico order.
- *  TensorFlow however formats the sub-buffers in icfo order.
- *  This helper function solves this in a generic fashion.
- * 
- *  Example usage output of reshapeWeights above:
- *  int32_t indir[1]{1, 0}
- *  int32_t stride = W*H;
- *  for (int32_t x = 0, y = N*C; x < y; ++x)
- *  reorderSubBuffers(out + x * stride, indir, H, W);
- * 
- *  Input Matrix{2, 3, 2, 3}:
- *  { 0  2  4}, { 1  3  5} <-- {0, 0, *, *}
- *  {12 14 16}, {13 15 17} <-- {0, 1, *, *}
- *  {24 26 28}, {25 27 29} <-- {0, 2, *, *}
- *  { 6  8 10}, { 7  9 11} <-- {1, 0, *, *}
- *  {18 20 22}, {19 21 23} <-- {1, 1, *, *}
- *  {30 32 34}, {31 33 35} <-- {1, 2, *, *}
- * 
- *  Output Matrix{2, 3, 2, 3}:
- *  { 1  3  5}, { 0  2  4} <-- {0, 0, *, *}
- *  {13 15 17}, {12 14 16} <-- {0, 1, *, *}
- *  {25 27 29}, {24 26 28} <-- {0, 2, *, *}
- *  { 7  9 11}, { 6  8 10} <-- {1, 0, *, *}
- *  {19 21 23}, {18 20 22} <-- {1, 1, *, *}
- *  {31 33 35}, {30 32 34} <-- {1, 2, *, *}
- * 
- *  @return True on success, false on failure.
- * 
- *  @see reshapeWeights()
- * 
- *  \warning This file will be removed in TensorRT 10.0.
- *  */
-
-
-//!
-//!
-//!
-//!
-//!
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
-    Pointer input, @Const IntPointer order, int num, int size);
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
-    Pointer input, @Const IntBuffer order, int num, int size);
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean reorderSubBuffers(
-    Pointer input, @Const int[] order, int num, int size);
-
-/**
- *  @param input The input data to transpose.
- *  @param type The type of the data to transpose.
- *  @param num The number of data sub-buffers to transpose.
- *  @param height The size of the height dimension to transpose.
- *  @param width The size of the width dimension to transpose.
- * 
- *  \brief Transpose \p num sub-buffers of \p height * \p width.
- * 
- *  @return True on success, false on failure.
- * 
- *  \warning This file will be removed in TensorRT 10.0.
- *  */
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean transposeSubBuffers(
-    Pointer input, DataType type, int num, int height, int width);
-@Namespace("nvinfer1::utils") public static native @Cast("bool") @Deprecated @NoException(true) boolean transposeSubBuffers(
-    Pointer input, @Cast("nvinfer1::DataType") int type, int num, int height, int width);
-
- // namespace utils
- // namespace nvinfer1
-// #endif // NV_UTILS_H
-
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
deleted file mode 100644
index 4fed32f0113..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvinfer_plugin.java
+++ /dev/null
@@ -1,391 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.global;
-
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-public class nvinfer_plugin extends org.bytedeco.tensorrt.presets.nvinfer_plugin {
-    static { Loader.load(); }
-
-// Parsed from NvInferPlugin.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_PLUGIN_H
-// #define NV_INFER_PLUGIN_H
-
-// #include "NvInfer.h"
-
-//!
-//!
-//!
-// #include "NvInferPluginUtils.h"
-/**
- *  \file NvInferPlugin.h
- * 
- *  This is the API for the Nvidia provided TensorRT plugins.
- *  */
-    /**
-     *  \brief Create a plugin layer that fuses the RPN and ROI pooling using user-defined parameters.
-     *  Registered plugin type "RPROI_TRT". Registered plugin version "1".
-     *  @param featureStride Feature stride.
-     *  @param preNmsTop Number of proposals to keep before applying NMS.
-     *  @param nmsMaxOut Number of remaining proposals after applying NMS.
-     *  @param iouThreshold IoU threshold.
-     *  @param minBoxSize Minimum allowed bounding box size before scaling.
-     *  @param spatialScale Spatial scale between the input image and the last feature map.
-     *  @param pooling Spatial dimensions of pooled ROIs.
-     *  @param anchorRatios Aspect ratios for generating anchor windows.
-     *  @param anchorScales Scales for generating anchor windows.
-     * 
-     *  @return Returns a FasterRCNN fused RPN+ROI pooling plugin. Returns nullptr on invalid inputs.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createRPNROIPlugin(int featureStride, int preNmsTop, int nmsMaxOut,
-            float iouThreshold, float minBoxSize, float spatialScale, @ByVal DimsHW pooling,
-            @ByVal Weights anchorRatios, @ByVal Weights anchorScales);
-
-    /**
-     *  \brief The Normalize plugin layer normalizes the input to have L2 norm of 1 with scale learnable.
-     *  Registered plugin type "Normalize_TRT". Registered plugin version "1".
-     *  @param scales Scale weights that are applied to the output tensor.
-     *  @param acrossSpatial Whether to compute the norm over adjacent channels (acrossSpatial is true) or nearby
-     *  spatial locations (within channel in which case acrossSpatial is false).
-     *  @param channelShared Whether the scale weight(s) is shared across channels.
-     *  @param eps Epsilon for not dividing by zero.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createNormalizePlugin(
-            @Const Weights scales, @Cast("bool") boolean acrossSpatial, @Cast("bool") boolean channelShared, float eps);
-
-    /**
-     *  \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
-     *  dimensions (H x W). PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer.
-     *  Registered plugin type "PriorBox_TRT". Registered plugin version "1".
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createPriorBoxPlugin(@ByVal PriorBoxParameters param);
-
-    /**
-     *  \brief The Grid Anchor Generator plugin layer generates the prior boxes of
-     *  designated sizes and aspect ratios across all dimensions (H x W) for all feature maps.
-     *  GridAnchorParameters defines a set of parameters for creating the GridAnchorGenerator plugin layer.
-     *  Registered plugin type "GridAnchor_TRT". Registered plugin version "1".
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createAnchorGeneratorPlugin(
-            GridAnchorParameters param, int numLayers);
-
-    /**
-     *  \brief The DetectionOutput plugin layer generates the detection output based on location and confidence
-     *  predictions by doing non maximum suppression. DetectionOutputParameters defines a set of parameters for creating
-     *  the DetectionOutput plugin layer. Registered plugin type "NMS_TRT". Registered plugin version "1".
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createNMSPlugin(@ByVal DetectionOutputParameters param);
-
-    /**
-     *  \brief The Reorg plugin reshapes input of shape CxHxW into a (C*stride*stride)x(H/stride)x(W/stride) shape, used
-     *  in YOLOv2. It does that by taking 1 x stride x stride slices from tensor and flattening them into
-     *  (stride x stride) x 1 x 1 shape. Registered plugin type "Reorg_TRT". Registered plugin version "1".
-     *  @param stride Strides in H and W, it should divide both H and W. Also stride * stride should be less than or equal to C.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createReorgPlugin(int stride);
-
-    /**
-     *  \brief The Region plugin layer performs region proposal calculation: generate 5 bounding boxes per cell (for
-     *  yolo9000, generate 3 bounding boxes per cell). For each box, calculating its probablities of objects detections
-     *  from 80 pre-defined classifications (yolo9000 has 9416 pre-defined classifications, and these 9416 items are
-     *  organized as work-tree structure). RegionParameters defines a set of parameters for creating the Region plugin
-     *  layer. Registered plugin type "Region_TRT". Registered plugin version "1".
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public static native IPluginV2 createRegionPlugin(@ByVal RegionParameters params);
-
-    /**
-     *  \brief The BatchedNMS Plugin performs non_max_suppression on the input boxes, per batch, across all classes.
-     *  It greedily selects a subset of bounding boxes in descending order of
-     *  score. Prunes away boxes that have a high intersection-over-union (IOU)
-     *  overlap with previously selected boxes. Bounding boxes are supplied as [y1, x1, y2, x2],
-     *  where (y1, x1) and (y2, x2) are the coordinates of any
-     *  diagonal pair of box corners and the coordinates can be provided as normalized
-     *  (i.e., lying in the interval [0, 1]) or absolute.
-     *  The plugin expects two inputs.
-     *  Input0 is expected to be 4-D float boxes tensor of shape [batch_size, num_boxes,
-     *  q, 4], where q can be either 1 (if shareLocation is true) or num_classes.
-     *  Input1 is expected to be a 3-D float scores tensor of shape [batch_size, num_boxes, num_classes]
-     *  representing a single score corresponding to each box.
-     *  The plugin returns four outputs.
-     *  num_detections : A [batch_size] int32 tensor indicating the number of valid
-     *  detections per batch item. Can be less than keepTopK. Only the top num_detections[i] entries in
-     *  nmsed_boxes[i], nmsed_scores[i] and nmsed_classes[i] are valid.
-     *  nmsed_boxes : A [batch_size, max_detections, 4] float32 tensor containing
-     *  the co-ordinates of non-max suppressed boxes.
-     *  nmsed_scores : A [batch_size, max_detections] float32 tensor containing the
-     *  scores for the boxes.
-     *  nmsed_classes :  A [batch_size, max_detections] float32 tensor containing the
-     *  classes for the boxes.
-     * 
-     *  Registered plugin type "BatchedNMS_TRT". Registered plugin version "1".
-     * 
-     *  The batched NMS plugin can require a lot of workspace due to intermediate buffer usage. To get the
-     *  estimated workspace size for the plugin for a batch size, use the API {@code plugin->getWorkspaceSize(batchSize)}.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createBatchedNMSPlugin(@ByVal NMSParameters param);
-
-    /**
-     *  \brief The Split Plugin performs a split operation on the input tensor. It
-     *  splits the input tensor into several output tensors, each of a length corresponding to output_lengths.
-     *  The split occurs along the axis specified by axis.
-     *  @param axis The axis to split on.
-     *  @param output_lengths The lengths of the output tensors.
-     *  @param noutput The number of output tensors.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createSplitPlugin(int axis, IntPointer output_lengths, int noutput);
-    public static native IPluginV2 createSplitPlugin(int axis, IntBuffer output_lengths, int noutput);
-    public static native IPluginV2 createSplitPlugin(int axis, int[] output_lengths, int noutput);
-
-    /**
-     *  \brief The Instance Normalization Plugin computes the instance normalization of an input tensor.
-     *  The instance normalization is calculated as found in the paper https://arxiv.org/abs/1607.08022.
-     *  The calculation is y = scale * (x - mean) / sqrt(variance + epsilon) + bias where mean and variance
-     *  are computed per instance per channel.
-     *  @param epsilon The epsilon value to use to avoid division by zero.
-     *  @param scale_weights The input 1-dimensional scale weights of size C to scale.
-     *  @param bias_weights The input 1-dimensional bias weights of size C to offset.
-     *  */
-    
-    
-    //!
-    //!
-    public static native IPluginV2 createInstanceNormalizationPlugin(
-            float epsilon, @ByVal Weights scale_weights, @ByVal Weights bias_weights);
-
-    /**
-     *  \brief Initialize and register all the existing TensorRT plugins to the Plugin Registry with an optional
-     *  namespace. The plugin library author should ensure that this function name is unique to the library. This
-     *  function should be called once before accessing the Plugin Registry.
-     *  @param logger Logger object to print plugin registration information
-     *  @param libNamespace Namespace used to register all the plugins in this library
-     *  */
-    public static native @Cast("bool") boolean initLibNvInferPlugins(Pointer logger, String libNamespace);
-    public static native @Cast("bool") boolean initLibNvInferPlugins(Pointer logger, @Cast("const char*") BytePointer libNamespace); // extern "C"
-
-// #endif // NV_INFER_PLUGIN_H
-
-
-// Parsed from NvInferPluginUtils.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_INFER_PLUGIN_UTILS_H
-// #define NV_INFER_PLUGIN_UTILS_H
-
-
-
-//!
-//!
-//!
-// #include "NvInferRuntimeCommon.h"
-
-/**
- *  \file NvInferPluginUtils.h
- * 
- *  This is the API for the Nvidia provided TensorRT plugin utilities.
- *  It lists all the parameters utilized by the TensorRT plugins.
- *  */
-// Targeting ../nvinfer_plugin/Quadruple.java
-
-
-// Targeting ../nvinfer_plugin/PriorBoxParameters.java
-
-
-// Targeting ../nvinfer_plugin/RPROIParams.java
-
-
-// Targeting ../nvinfer_plugin/GridAnchorParameters.java
-
-
-
-/**
- *  \enum CodeTypeSSD
- *  \brief The type of encoding used for decoding the bounding boxes and loc_data.
- *  */
-@Namespace("nvinfer1::plugin") public enum CodeTypeSSD {
-    /** Use box corners. */
-    CORNER(0),
-    /** Use box centers and size. */
-    CENTER_SIZE(1),
-    /** Use box centers and size. */
-    CORNER_SIZE(2),
-    /** Use box centers and size but flip x and y coordinates. */
-    TF_CENTER(3);
-
-    public final int value;
-    private CodeTypeSSD(int v) { this.value = v; }
-    private CodeTypeSSD(CodeTypeSSD e) { this.value = e.value; }
-    public CodeTypeSSD intern() { for (CodeTypeSSD e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvinfer_plugin/DetectionOutputParameters.java
-
-
-// Targeting ../nvinfer_plugin/softmaxTree.java
-
-
-// Targeting ../nvinfer_plugin/RegionParameters.java
-
-
-// Targeting ../nvinfer_plugin/NMSParameters.java
-
-
-
- // namespace plugin
- // namespace nvinfer1
-
-// #endif // NV_INFER_PLUGIN_UTILS_H
-
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
deleted file mode 100644
index 10b0296119f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvonnxparser.java
+++ /dev/null
@@ -1,169 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.global;
-
-import org.bytedeco.tensorrt.nvonnxparser.*;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-public class nvonnxparser extends org.bytedeco.tensorrt.presets.nvonnxparser {
-    static { Loader.load(); }
-
-// Targeting ../nvonnxparser/SubGraphCollection_t.java
-
-
-// Targeting ../nvonnxparser/SubGraph_t.java
-
-
-// Parsed from NvOnnxParser.h
-
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-// #ifndef NV_ONNX_PARSER_H
-// #define NV_ONNX_PARSER_H
-
-// #include "NvInfer.h"
-// #include <stddef.h>
-
-
-//!
-//!
-//!
-// #include <vector>
-
-/**
- *  \file NvOnnxParser.h
- * 
- *  This is the API for the ONNX Parser
- *  */
-
-public static final int NV_ONNX_PARSER_MAJOR = 0;
-public static final int NV_ONNX_PARSER_MINOR = 1;
-public static final int NV_ONNX_PARSER_PATCH = 0;
-
-
-//!
-//!
-@MemberGetter public static native int NV_ONNX_PARSER_VERSION();
-public static final int NV_ONNX_PARSER_VERSION = NV_ONNX_PARSER_VERSION();
-
-/** \typedef SubGraph_t
- * 
- *  \brief The data structure containing the parsing capability of
- *  a set of nodes in an ONNX graph.
- *  */
-
-//!
-//!
-
-/** \typedef SubGraphCollection_t
- * 
- *  \brief The data structure containing all SubGraph_t partitioned
- *  out of an ONNX graph.
- *  */
-
-
-//!
-//!
-//!
-
-/**
- *  \namespace nvonnxparser
- * 
- *  \brief The TensorRT ONNX parser API namespace
- *  */
-
-@Namespace("nvonnxparser") public static native @Name("EnumMax<nvonnxparser::ErrorCode>") int ErrorCodeEnumMax();
-
-/** \enum ErrorCode
- *
- * \brief the type of parser error
- */
-@Namespace("nvonnxparser") public enum ErrorCode {
-    kSUCCESS(0),
-    kINTERNAL_ERROR(1),
-    kMEM_ALLOC_FAILED(2),
-    kMODEL_DESERIALIZE_FAILED(3),
-    kINVALID_VALUE(4),
-    kINVALID_GRAPH(5),
-    kINVALID_NODE(6),
-    kUNSUPPORTED_GRAPH(7),
-    kUNSUPPORTED_NODE(8);
-
-    public final int value;
-    private ErrorCode(int v) { this.value = v; }
-    private ErrorCode(ErrorCode e) { this.value = e.value; }
-    public ErrorCode intern() { for (ErrorCode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvonnxparser/IParserError.java
-
-
-// Targeting ../nvonnxparser/IParser.java
-
-
-
- // namespace nvonnxparser
-
-public static native Pointer createNvOnnxParser_INTERNAL(Pointer network, Pointer logger, int version);
-public static native int getNvOnnxParserVersion();
-
-/** \brief Create a new parser object
- *
- * @param network The network definition that the parser will write to
- * @param logger The logger to use
- * @return a new parser object or NULL if an error occurred
- *
- * Any input dimensions that are constant should not be changed after parsing,
- * because correctness of the translation may rely on those constants.
- * Changing a dynamic input dimension, i.e. one that translates to -1 in
- * TensorRT, to a constant is okay if the constant is consistent with the model.
- *
- * @see IParser
- */
-@Namespace("nvonnxparser") public static native IParser createParser(@ByRef INetworkDefinition network, @ByRef ILogger logger);
-
- // namespace
-
- // namespace nvonnxparser
-
-// #endif // NV_ONNX_PARSER_H
-
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
deleted file mode 100644
index 2dcb64d38ed..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/global/nvparsers.java
+++ /dev/null
@@ -1,320 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.global;
-
-import org.bytedeco.tensorrt.nvparsers.*;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-public class nvparsers extends org.bytedeco.tensorrt.presets.nvparsers {
-    static { Loader.load(); }
-
-// Parsed from NvCaffeParser.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_CAFFE_PARSER_H
-// #define NV_CAFFE_PARSER_H
-
-
-
-//!
-//!
-//!
-
-//!
-//!
-//!
-// #include "NvInfer.h"
-
-/**
- *  \file NvCaffeParser.h
- * 
- *  This is the API for the Caffe Parser
- * 
- <p>
- * 
- *  \namespace nvcaffeparser1
- * 
- *  \brief The TensorRT Caffe parser API namespace.
- *  */
-// Targeting ../nvparsers/IBlobNameToTensor.java
-
-
-// Targeting ../nvparsers/IBinaryProtoBlob.java
-
-
-// Targeting ../nvparsers/IPluginFactoryV2.java
-
-
-// Targeting ../nvparsers/ICaffeParser.java
-
-
-
-/**
- *  \brief Creates a ICaffeParser object.
- * 
- *  @return A pointer to the ICaffeParser object is returned.
- * 
- *  @see nvcaffeparser1::ICaffeParser
- * 
- *  @deprecated ICaffeParser will be removed in TensorRT 9.0. Plan to migrate your workflow to
- *  use nvonnxparser::IParser for deployment.
- *  */
-
-
-//!
-//!
-//!
-@Namespace("nvcaffeparser1") public static native @NoException(true) ICaffeParser createCaffeParser();
-
-/**
- *  \brief Shuts down protocol buffers library.
- * 
- *  \note No part of the protocol buffers library can be used after this function is called.
- *  */
-@Namespace("nvcaffeparser1") public static native @NoException(true) void shutdownProtobufLibrary();
- // namespace nvcaffeparser1
-
-/**
- *  Internal C entry point for creating ICaffeParser.
- *  \private
- *  */
-public static native @NoException(true) Pointer createNvCaffeParser_INTERNAL();
-// #endif
-
-
-// Parsed from NvUffParser.h
-
-/*
- * Copyright 1993-2021 NVIDIA Corporation.  All rights reserved.
- *
- * NOTICE TO LICENSEE:
- *
- * This source code and/or documentation ("Licensed Deliverables") are
- * subject to NVIDIA intellectual property rights under U.S. and
- * international Copyright laws.
- *
- * These Licensed Deliverables contained herein is PROPRIETARY and
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
- * conditions of a form of NVIDIA software license agreement by and
- * between NVIDIA and Licensee ("License Agreement") or electronically
- * accepted by Licensee.  Notwithstanding any terms or conditions to
- * the contrary in the License Agreement, reproduction or disclosure
- * of the Licensed Deliverables to any third party without the express
- * written consent of NVIDIA is prohibited.
- *
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
- * OF THESE LICENSED DELIVERABLES.
- *
- * U.S. Government End Users.  These Licensed Deliverables are a
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
- * 1995), consisting of "commercial computer software" and "commercial
- * computer software documentation" as such terms are used in 48
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
- * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
- * U.S. Government End Users acquire the Licensed Deliverables with
- * only those rights set forth herein.
- *
- * Any use of the Licensed Deliverables in individual and commercial
- * software must include, in the user documentation and internal
- * comments to the code, the above Disclaimer and U.S. Government End
- * Users Notice.
- */
-
-// #ifndef NV_UFF_PARSER_H
-// #define NV_UFF_PARSER_H
-
-
-
-//!
-//!
-//!
-// #include "NvInfer.h"
-
-/**
- *  \file NvUffParser.h
- * 
- *  This is the API for the UFF Parser
- *  */
-
-// Current supported Universal Framework Format (UFF) version for the parser.
-public static final int UFF_REQUIRED_VERSION_MAJOR = 0;
-public static final int UFF_REQUIRED_VERSION_MINOR = 6;
-
-
-//!
-//!
-//!
-public static final int UFF_REQUIRED_VERSION_PATCH = 9;
-
-/**
- *  \namespace nvuffparser
- * 
- *  \brief The TensorRT UFF parser API namespace.
- *  */
-
-/**
- *  \enum UffInputOrder
- *  \brief The different possible supported input order.
- *  */
-@Namespace("nvuffparser") public enum UffInputOrder {
-    /** NCHW order. */
-    kNCHW(0),
-    /** NHWC order. */
-    kNHWC(1),
-    /** NC order. */
-    kNC(2);
-
-    public final int value;
-    private UffInputOrder(int v) { this.value = v; }
-    private UffInputOrder(UffInputOrder e) { this.value = e.value; }
-    public UffInputOrder intern() { for (UffInputOrder e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/**
- *  \enum FieldType
- *  \brief The possible field types for custom layer.
- *  */
-
-@Namespace("nvuffparser") public enum FieldType {
-    /** FP32 field type. */
-    kFLOAT(0),
-    /** INT32 field type. */
-    kINT32(1),
-    /** char field type. String for length>1. */
-    kCHAR(2),
-    /** nvinfer1::Dims field type. */
-    kDIMS(4),
-    /** nvinfer1::DataType field type. */
-    kDATATYPE(5),
-    kUNKNOWN(6);
-
-    public final int value;
-    private FieldType(int v) { this.value = v; }
-    private FieldType(FieldType e) { this.value = e.value; }
-    public FieldType intern() { for (FieldType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../nvparsers/FieldMap.java
-
-
-// Targeting ../nvparsers/FieldCollection.java
-
-
-// Targeting ../nvparsers/IUffParser.java
-
-
-
-/**
- *  \brief Creates a IUffParser object.
- * 
- *  @return A pointer to the IUffParser object is returned.
- * 
- *  @see nvuffparser::IUffParser
- * 
- *  @deprecated IUffParser will be removed in TensorRT 9.0. Plan to migrate your workflow to
- *  use nvonnxparser::IParser for deployment.
- *  */
-
-
-//!
-//!
-//!
-@Namespace("nvuffparser") public static native @NoException(true) IUffParser createUffParser();
-
-/**
- *  \brief Shuts down protocol buffers library.
- * 
- *  \note No part of the protocol buffers library can be used after this function is called.
- *  */
-
- // namespace nvuffparser
-
-/**
- *  Internal C entry point for creating IUffParser
- *  \private
- *  */
-public static native @NoException(true) Pointer createNvUffParser_INTERNAL();
-
-// #endif /* !NV_UFF_PARSER_H */
-
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
deleted file mode 100644
index 5a9a79efcf6..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims2.java
+++ /dev/null
@@ -1,59 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-/**
- *  \class Dims2
- *  \brief Descriptor for two-dimensional data.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Dims2 extends Dims32 {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Dims2(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Dims2(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public Dims2 position(long position) {
-        return (Dims2)super.position(position);
-    }
-    @Override public Dims2 getPointer(long i) {
-        return new Dims2((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Construct an empty Dims2 object.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public Dims2() { super((Pointer)null); allocate(); }
-    private native void allocate();
-
-    /**
-     *  \brief Construct a Dims2 from 2 elements.
-     * 
-     *  @param d0 The first element.
-     *  @param d1 The second element.
-     *  */
-    public Dims2(int d0, int d1) { super((Pointer)null); allocate(d0, d1); }
-    private native void allocate(int d0, int d1);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
deleted file mode 100644
index 65319fcf60e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims3.java
+++ /dev/null
@@ -1,61 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class Dims3
- *  \brief Descriptor for three-dimensional data.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Dims3 extends Dims32 {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Dims3(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Dims3(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public Dims3 position(long position) {
-        return (Dims3)super.position(position);
-    }
-    @Override public Dims3 getPointer(long i) {
-        return new Dims3((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Construct an empty Dims3 object.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public Dims3() { super((Pointer)null); allocate(); }
-    private native void allocate();
-
-    /**
-     *  \brief Construct a Dims3 from 3 elements.
-     * 
-     *  @param d0 The first element.
-     *  @param d1 The second element.
-     *  @param d2 The third element.
-     *  */
-    public Dims3(int d0, int d1, int d2) { super((Pointer)null); allocate(d0, d1, d2); }
-    private native void allocate(int d0, int d1, int d2);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
deleted file mode 100644
index 5a6532071af..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims32.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class Dims
- *  \brief Structure to define the dimensions of a tensor.
- * 
- *  TensorRT can also return an invalid dims structure. This structure is represented by nbDims == -1
- *  and d[i] == 0 for all d.
- * 
- *  TensorRT can also return an "unknown rank" dims structure. This structure is represented by nbDims == -1
- *  and d[i] == -1 for all d.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Dims32 extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public Dims32() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Dims32(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Dims32(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public Dims32 position(long position) {
-        return (Dims32)super.position(position);
-    }
-    @Override public Dims32 getPointer(long i) {
-        return new Dims32((Pointer)this).offsetAddress(i);
-    }
-
-    /** The maximum number of dimensions supported for a tensor. */
-    @MemberGetter public static native int MAX_DIMS();
-    public static final int MAX_DIMS = MAX_DIMS();
-    /** The number of dimensions. */
-    public native int nbDims(); public native Dims32 nbDims(int setter);
-    /** The extent of each dimension. */
-    public native int d(int i); public native Dims32 d(int i, int setter);
-    @MemberGetter public native IntPointer d();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
deleted file mode 100644
index 368ff810bb6..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Dims4.java
+++ /dev/null
@@ -1,62 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class Dims4
- *  \brief Descriptor for four-dimensional data.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Dims4 extends Dims32 {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Dims4(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Dims4(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public Dims4 position(long position) {
-        return (Dims4)super.position(position);
-    }
-    @Override public Dims4 getPointer(long i) {
-        return new Dims4((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Construct an empty Dims4 object.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public Dims4() { super((Pointer)null); allocate(); }
-    private native void allocate();
-
-    /**
-     *  \brief Construct a Dims4 from 4 elements.
-     * 
-     *  @param d0 The first element.
-     *  @param d1 The second element.
-     *  @param d2 The third element.
-     *  @param d3 The fourth element.
-     *  */
-    public Dims4(int d0, int d1, int d2, int d3) { super((Pointer)null); allocate(d0, d1, d2, d3); }
-    private native void allocate(int d0, int d1, int d2, int d3);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
deleted file mode 100644
index 76cd6e7f3ec..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsExprs.java
+++ /dev/null
@@ -1,50 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class DimsExprs
- * 
- *  Analog of class Dims with expressions instead of constants for the dimensions.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class DimsExprs extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public DimsExprs() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public DimsExprs(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public DimsExprs(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public DimsExprs position(long position) {
-        return (DimsExprs)super.position(position);
-    }
-    @Override public DimsExprs getPointer(long i) {
-        return new DimsExprs((Pointer)this).offsetAddress(i);
-    }
-
-    /** The number of dimensions. */
-    public native int nbDims(); public native DimsExprs nbDims(int setter);
-    /** The extent of each dimension. */
-    public native @Const IDimensionExpr d(int i); public native DimsExprs d(int i, IDimensionExpr setter);
-    @MemberGetter public native @Cast("const nvinfer1::IDimensionExpr**") PointerPointer d();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
deleted file mode 100644
index ddcc05fffb7..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DimsHW.java
+++ /dev/null
@@ -1,101 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class DimsHW
- *  \brief Descriptor for two-dimensional spatial data.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class DimsHW extends Dims2 {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public DimsHW(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public DimsHW(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public DimsHW position(long position) {
-        return (DimsHW)super.position(position);
-    }
-    @Override public DimsHW getPointer(long i) {
-        return new DimsHW((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Construct an empty DimsHW object.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public DimsHW() { super((Pointer)null); allocate(); }
-    private native void allocate();
-
-    /**
-     *  \brief Construct a DimsHW given height and width.
-     * 
-     *  @param height the height of the data
-     *  @param width the width of the data
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public DimsHW(int height, int width) { super((Pointer)null); allocate(height, width); }
-    private native void allocate(int height, int width);
-
-    /**
-     *  \brief Get the height.
-     * 
-     *  @return The height.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByRef IntPointer h();
-
-    /**
-     *  \brief Get the height.
-     * 
-     *  @return The height.
-     *  */
-
-    /**
-     *  \brief Get the width.
-     * 
-     *  @return The width.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByRef IntPointer w();
-
-    /**
-     *  \brief Get the width.
-     * 
-     *  @return The width.
-     *  */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
deleted file mode 100644
index c3728e51cd4..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/DynamicPluginTensorDesc.java
+++ /dev/null
@@ -1,53 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class DynamicPluginTensorDesc
- * 
- *  Summarizes tensors that a plugin might see for an input or output.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class DynamicPluginTensorDesc extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public DynamicPluginTensorDesc() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public DynamicPluginTensorDesc(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public DynamicPluginTensorDesc(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public DynamicPluginTensorDesc position(long position) {
-        return (DynamicPluginTensorDesc)super.position(position);
-    }
-    @Override public DynamicPluginTensorDesc getPointer(long i) {
-        return new DynamicPluginTensorDesc((Pointer)this).offsetAddress(i);
-    }
-
-    /** Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension. */
-    public native @ByRef PluginTensorDesc desc(); public native DynamicPluginTensorDesc desc(PluginTensorDesc setter);
-
-    /** Lower bounds on tensor’s dimensions */
-    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 min(); public native DynamicPluginTensorDesc min(Dims32 setter);
-
-    /** Upper bounds on tensor’s dimensions */
-    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 max(); public native DynamicPluginTensorDesc max(Dims32 setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
deleted file mode 100644
index 95f399a78bf..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/EnumMaxImpl.java
+++ /dev/null
@@ -1,43 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-/** Maximum number of elements in DataType enum. @see DataType */
-@Name("nvinfer1::impl::EnumMaxImpl<nvinfer1::DataType>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class EnumMaxImpl extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public EnumMaxImpl() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public EnumMaxImpl(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public EnumMaxImpl(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public EnumMaxImpl position(long position) {
-        return (EnumMaxImpl)super.position(position);
-    }
-    @Override public EnumMaxImpl getPointer(long i) {
-        return new EnumMaxImpl((Pointer)this).offsetAddress(i);
-    }
-
-    // Declaration of kVALUE that represents maximum number of elements in DataType enum
-    @MemberGetter public static native int kVALUE();
-    public static final int kVALUE = kVALUE();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
deleted file mode 100644
index f6a05189adc..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IActivationLayer.java
+++ /dev/null
@@ -1,115 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IActivationLayer
- * 
- *  \brief An Activation layer in a network definition.
- * 
- *  This layer applies a per-element activation function to its input.
- * 
- *  The output has the same shape as the input.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IActivationLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IActivationLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the type of activation to be performed.
-     * 
-     *  On the DLA, the valid activation types are kRELU, kSIGMOID, kTANH, and kCLIP.
-     * 
-     *  @see getActivationType(), ActivationType
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setActivationType(ActivationType type);
-    public native @NoException(true) void setActivationType(@Cast("nvinfer1::ActivationType") int type);
-
-    /**
-     *  \brief Get the type of activation to be performed.
-     * 
-     *  @see setActivationType(), ActivationType
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ActivationType getActivationType();
-
-    /**
-     *  \brief Set the alpha parameter (must be finite).
-     * 
-     *  This parameter is used by the following activations:
-     *  LeakyRelu, Elu, Selu, Softplus, Clip, HardSigmoid, ScaledTanh,
-     *  ThresholdedRelu.
-     * 
-     *  It is ignored by the other activations.
-     * 
-     *  @see getAlpha(), setBeta() */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAlpha(float alpha);
-
-    /**
-     *  \brief Set the beta parameter (must be finite).
-     * 
-     *  This parameter is used by the following activations:
-     *  Selu, Softplus, Clip, HardSigmoid, ScaledTanh.
-     * 
-     *  It is ignored by the other activations.
-     * 
-     *  @see getBeta(), setAlpha() */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setBeta(float beta);
-
-    /**
-     *  \brief Get the alpha parameter.
-     * 
-     *  @see getBeta(), setAlpha() */
-    
-    
-    //!
-    //!
-    public native @NoException(true) float getAlpha();
-
-    /**
-     *  \brief Get the beta parameter.
-     * 
-     *  @see getAlpha(), setBeta() */
-    public native @NoException(true) float getBeta();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
deleted file mode 100644
index 51ee6acea48..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithm.java
+++ /dev/null
@@ -1,90 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IAlgorithm
- *  \brief Describes a variation of execution of a layer.
- *         An algorithm is represented by IAlgorithmVariant and the IAlgorithmIOInfo for each of its inputs and outputs.
- *         An algorithm can be selected or reproduced using AlgorithmSelector::selectAlgorithms()."
- *  @see IAlgorithmIOInfo, IAlgorithmVariant, IAlgorithmSelector::selectAlgorithms()
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IAlgorithm extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IAlgorithm(Pointer p) { super(p); }
-
-    /**
-     *  \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
-     *         followed by algorithm outputs.
-     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
-     *               and the outputs.
-     * 
-     *  @return a reference to IAlgorithmIOInfo specified by index or the first algorithm if index is out of range.
-     * 
-     *  @deprecated API will be removed in TensorRT 10.0, use IAlgorithm::getAlgorithmIOInfoByIndex instead.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Const @Deprecated @ByRef @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfo(int index);
-
-    /**
-     *  \brief Returns the algorithm variant.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Const @ByRef @NoException(true) IAlgorithmVariant getAlgorithmVariant();
-
-    /**
-     *  \brief The time in milliseconds to execute the algorithm.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) float getTimingMSec();
-
-    /**
-     *  \brief The size of the GPU temporary memory in bytes which the algorithm uses at execution time.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("std::size_t") @NoException(true) long getWorkspaceSize();
-
-    /**
-     *  \brief Returns the format of an Algorithm input or output. Algorithm inputs are incrementally numbered first,
-     *         followed by algorithm outputs.
-     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
-     *               and the outputs.
-     * 
-     *  @return a pointer to a IAlgorithmIOInfo interface or nullptr if index is out of range.
-     *  */
-    public native @Const @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfoByIndex(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
deleted file mode 100644
index 66ccf379bba..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmContext.java
+++ /dev/null
@@ -1,72 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IAlgorithmContext
- * 
- *  \brief Describes the context and requirements, that could be fulfilled by one or more instances of IAlgorithm.
- *  @see IAlgorithm
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IAlgorithmContext extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IAlgorithmContext(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return name of the algorithm node.
-     *  This is a unique identifier for the IAlgorithmContext.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Get the minimum / optimum / maximum dimensions for input or output tensor.
-     *  @param index Index of the input or output of the algorithm. Incremental numbers assigned to indices of inputs
-     *               and the outputs.
-     *  @param select Which of the minimum, optimum, or maximum dimensions to be queried.
-     *  */
-    
-    
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, @Cast("nvinfer1::OptProfileSelector") int select);
-
-    /**
-     *  \brief Return number of inputs of the algorithm.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getNbInputs();
-
-    /**
-     *  \brief Return number of outputs of the algorithm.
-     *  */
-    public native @NoException(true) int getNbOutputs();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
deleted file mode 100644
index d095cc4a020..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmIOInfo.java
+++ /dev/null
@@ -1,60 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IAlgorithmIOInfo
- * 
- *  \brief Carries information about input or output of the algorithm.
- *         IAlgorithmIOInfo for all the input and output along with IAlgorithmVariant denotes the variation of algorithm
- *         and can be used to select or reproduce an algorithm using IAlgorithmSelector::selectAlgorithms().
- *  @see IAlgorithmVariant, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IAlgorithmIOInfo extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IAlgorithmIOInfo(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return TensorFormat of the input/output of algorithm.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) TensorFormat getTensorFormat();
-
-    /**
-     *  \brief Return DataType of the input/output of algorithm.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) DataType getDataType();
-
-    /**
-     *  \brief Return strides of the input/output tensor of algorithm.
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
deleted file mode 100644
index 3bf2098255b..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmSelector.java
+++ /dev/null
@@ -1,76 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // IAlgorithm
-
-/**
- *  \class IAlgorithmSelector
- * 
- *  \brief Interface implemented by application for selecting and reporting algorithms of a layer provided by the
- *         builder.
- *  \note A layer in context of algorithm selection may be different from ILayer in INetworkDefiniton.
- *        For example, an algorithm might be implementing a conglomeration of multiple ILayers in INetworkDefinition.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IAlgorithmSelector extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IAlgorithmSelector(Pointer p) { super(p); }
-
-    /**
-     *  \brief Select Algorithms for a layer from the given list of algorithm choices.
-     * 
-     *  @return The number of choices selected from [0, nbChoices-1].
-     *  @param context The context for which the algorithm choices are valid.
-     *  @param choices The list of algorithm choices to select for implementation of this layer.
-     *  @param nbChoices Number of algorithm choices.
-     *  @param selection The user writes indices of selected choices in to selection buffer which is of size nbChoices.
-     * 
-     *  \note TensorRT uses its default algorithm selection to choose from the list provided.
-     *        If return value is 0, TensorRT’s default algorithm selection is used unless strict type constraints are
-     *        set. The list of choices is valid only for this specific algorithm context.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Cast("const nvinfer1::IAlgorithm*const*") PointerPointer choices,
-            int nbChoices, IntPointer selection);
-    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
-            int nbChoices, IntPointer selection);
-    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
-            int nbChoices, IntBuffer selection);
-    public native @NoException(true) int selectAlgorithms(@Const @ByRef IAlgorithmContext context, @Const @ByPtrPtr IAlgorithm choices,
-            int nbChoices, int[] selection);
-    /**
-     *  \brief Called by TensorRT to report choices it made.
-     * 
-     *  \note For a given optimization profile, this call comes after all calls to selectAlgorithms.
-     *  algoChoices[i] is the choice that TensorRT made for algoContexts[i], for i in [0, nbAlgorithms-1]
-     * 
-     *  @param algoContexts The list of all algorithm contexts.
-     *  @param algoChoices The list of algorithm choices made by TensorRT
-     *  @param nbAlgorithms The size of algoContexts as well as algoChoices.
-     *  */
-    public native @NoException(true) void reportAlgorithms(@Cast("const nvinfer1::IAlgorithmContext*const*") PointerPointer algoContexts, @Cast("const nvinfer1::IAlgorithm*const*") PointerPointer algoChoices,
-            int nbAlgorithms);
-    public native @NoException(true) void reportAlgorithms(@Const @ByPtrPtr IAlgorithmContext algoContexts, @Const @ByPtrPtr IAlgorithm algoChoices,
-            int nbAlgorithms);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
deleted file mode 100644
index b3cd1c32538..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IAlgorithmVariant.java
+++ /dev/null
@@ -1,52 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IAlgorithmVariant
- * 
- *  \brief provides a unique 128-bit identifier, which along with the input and output information
- *         denotes the variation of algorithm and can be used to select or reproduce an algorithm,
- *         using IAlgorithmSelector::selectAlgorithms()
- *  @see IAlgorithmIOInfo, IAlgorithm, IAlgorithmSelector::selectAlgorithms()
- *  \note A single implementation can have multiple tactics.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IAlgorithmVariant extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IAlgorithmVariant(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return implementation of the algorithm.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("int64_t") @NoException(true) long getImplementation();
-
-    /**
-     *  \brief Return tactic of the algorithm.
-     *  */
-    public native @Cast("int64_t") @NoException(true) long getTactic();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
deleted file mode 100644
index 29aeb415fd7..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilder.java
+++ /dev/null
@@ -1,325 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IBuilder
- * 
- *  \brief Builds an engine from a network definition.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IBuilder extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IBuilder() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IBuilder(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IBuilder(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IBuilder position(long position) {
-        return (IBuilder)super.position(position);
-    }
-    @Override public IBuilder getPointer(long i) {
-        return new IBuilder((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Set the maximum batch size.
-     * 
-     *  @param batchSize The maximum batch size which can be used at execution time, and also the batch size for which
-     *  the engine will be optimized.
-     * 
-     *  @see getMaxBatchSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setMaxBatchSize(int batchSize);
-
-    /**
-     *  \brief Get the maximum batch size.
-     * 
-     *  @return The maximum batch size.
-     * 
-     *  @see setMaxBatchSize()
-     *  @see getMaxDLABatchSize()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getMaxBatchSize();
-
-    /**
-     *  \brief Determine whether the platform has fast native fp16.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
-
-    /**
-     *  \brief Determine whether the platform has fast native int8.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
-
-    /**
-     *  \brief Destroy this object.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Get the maximum batch size DLA can support.
-     *  For any tensor the total volume of index dimensions combined(dimensions other than CHW) with the requested
-     *  batch size should not exceed the value returned by this function.
-     * 
-     *  \warning getMaxDLABatchSize does not work with dynamic shapes.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getMaxDLABatchSize();
-
-    /**
-     *  \brief Return the number of DLA engines available to this builder.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbDLACores();
-
-    /**
-     *  \brief Set the GPU allocator.
-     *  @param allocator Set the GPU allocator to be used by the builder. All GPU memory acquired will use this
-     *  allocator. If NULL is passed, the default allocator will be used.
-     * 
-     *  Default: uses cudaMalloc/cudaFree.
-     * 
-     *  \note This allocator will be passed to any engines created via the builder; thus the lifetime of the allocator
-     *  must span the lifetime of those engines as
-     *  well as that of the builder. If nullptr is passed, the default allocator will be used.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
-
-    /**
-     *  \brief Create a builder configuration object.
-     * 
-     *  @see IBuilderConfig
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IBuilderConfig createBuilderConfig();
-
-    /**
-     *  \brief Builds an engine for the given INetworkDefinition and given IBuilderConfig.
-     * 
-     *  It enables the builder to build multiple engines based on the same network definition, but with different
-     *  builder configurations.
-     * 
-     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
-     * 
-     *  @deprecated API will be removed in TensorRT 10.0, use IBuilder::buildSerializedNetwork instead.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) ICudaEngine buildEngineWithConfig(
-            @ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
-
-    /** \brief Create a network definition object
-     * 
-     *  Creates a network definition object with immutable properties specified using the flags parameter. Providing
-     *  the kDEFAULT flag as parameter mimics the behaviour of createNetwork(). CreateNetworkV2 supports dynamic shapes
-     *  and explicit batch dimensions when used with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
-     * 
-     *  @param flags Bitset of NetworkDefinitionCreationFlags specifying network properties combined with bitwise OR.
-     *              e.g., 1U << NetworkDefinitionCreationFlag::kEXPLICIT_BATCH
-     * 
-     *  @see INetworkDefinition, NetworkDefinitionCreationFlags
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) INetworkDefinition createNetworkV2(@Cast("nvinfer1::NetworkDefinitionCreationFlags") int flags);
-
-    /** \brief Create a new optimization profile.
-     * 
-     *  If the network has any dynamic input tensors, the appropriate calls to setDimensions() must be made.
-     *  Likewise, if there are any shape input tensors, the appropriate calls to setShapeValues() are required.
-     *  The builder retains ownership of the created optimization profile and returns a raw pointer, i.e. the users
-     *  must not attempt to delete the returned pointer.
-     * 
-     *  @see IOptimizationProfile
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IOptimizationProfile createOptimizationProfile();
-
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class.
-     *  A nullptr will be returned if setErrorRecorder has not been called.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Resets the builder state to default values.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void reset();
-
-    /**
-     *  \brief Determine whether the platform has TF32 support.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean platformHasTf32();
-
-    /**
-     *  \brief Builds and serializes a network for the given INetworkDefinition and IBuilderConfig.
-     * 
-     *  This function allows building and serialization of a network without creating an engine.
-     * 
-     *  @param network Network definition.
-     *  @param config Builder configuration.
-     * 
-     *  @return A pointer to a IHostMemory object that contains a serialized network.
-     * 
-     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
-     * 
-     *  @see INetworkDefinition, IBuilderConfig, IHostMemory
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
-
-    /**
-     *  \brief Checks that a network is within the scope of the IBuilderConfig settings.
-     * 
-     *  @param network The network definition to check for configuration compliance.
-     *  @param config The configuration of the builder to use when checking \p network.
-     * 
-     *  Given an INetworkDefinition, \p network, and an IBuilderConfig, \p config, check if
-     *  the network falls within the constraints of the builder configuration based on the
-     *  EngineCapability, BuilderFlag, and DeviceType. If the network is within the constraints,
-     *  then the function returns true, and false if a violation occurs. This function reports
-     *  the conditions that are violated to the registered ErrorRecorder.
-     * 
-     *  @return True if network is within the scope of the restrictions specified by the builder config,
-     *  false otherwise.
-     * 
-     *  \note This function will synchronize the cuda stream returned by \p config.getProfileStream() before returning.
-     *  */
-    public native @Cast("bool") @NoException(true) boolean isNetworkSupported(@Const @ByRef INetworkDefinition network, @Const @ByRef IBuilderConfig config);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
deleted file mode 100644
index 18193a4675a..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IBuilderConfig.java
+++ /dev/null
@@ -1,769 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IBuilderConfig
- * 
- *  \brief Holds properties for configuring a builder to produce an engine. @see BuilderFlags
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IBuilderConfig extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IBuilderConfig() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IBuilderConfig(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IBuilderConfig(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IBuilderConfig position(long position) {
-        return (IBuilderConfig)super.position(position);
-    }
-    @Override public IBuilderConfig getPointer(long i) {
-        return new IBuilderConfig((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Set the number of minimization iterations used when timing layers.
-     * 
-     *  When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
-     *  controls the number of iterations used in minimization. The builder may sometimes run layers for more
-     *  iterations to improve timing accuracy if this parameter is set to a small value and the runtime of the
-     *  layer is short.
-     * 
-     *  @see getMinTimingIterations()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setMinTimingIterations(int minTiming);
-
-    /**
-     *  \brief Query the number of minimization iterations.
-     * 
-     *  By default the minimum number of iterations is 2.
-     * 
-     *  @see setMinTimingIterations()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getMinTimingIterations();
-
-    /**
-     *  \brief Set the number of averaging iterations used when timing layers.
-     * 
-     *  When timing layers, the builder minimizes over a set of average times for layer execution. This parameter
-     *  controls the number of iterations used in averaging.
-     * 
-     *  @see getAvgTimingIterations()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAvgTimingIterations(int avgTiming);
-
-    /**
-     *  \brief Query the number of averaging iterations.
-     * 
-     *  By default the number of averaging iterations is 1.
-     * 
-     *  @see setAvgTimingIterations()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getAvgTimingIterations();
-
-    /**
-     *  \brief Configure the builder to target specified EngineCapability flow.
-     * 
-     *  The flow means a sequence of API calls that allow an application to set up a runtime, engine,
-     *  and execution context in order to run inference.
-     * 
-     *  The supported flows are specified in the EngineCapability enum.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setEngineCapability(EngineCapability capability);
-    public native @NoException(true) void setEngineCapability(@Cast("nvinfer1::EngineCapability") int capability);
-
-    /**
-     *  \brief Query EngineCapability flow configured for the builder.
-     * 
-     *  By default it returns EngineCapability::kSTANDARD.
-     * 
-     *  @see setEngineCapability()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) EngineCapability getEngineCapability();
-
-    /**
-     *  \brief Set Int8 Calibration interface.
-     * 
-     *  The calibrator is to minimize the information loss during the INT8 quantization process.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setInt8Calibrator(IInt8Calibrator calibrator);
-
-    /**
-     *  \brief Get Int8 Calibration interface.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IInt8Calibrator getInt8Calibrator();
-
-    /**
-     *  \brief Set the maximum workspace size.
-     * 
-     *  @param workspaceSize The maximum GPU temporary memory which the engine can use at execution time.
-     * 
-     *  @see getMaxWorkspaceSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setMaxWorkspaceSize(@Cast("std::size_t") long workspaceSize);
-
-    /**
-     *  \brief Get the maximum workspace size.
-     * 
-     *  By default the workspace size is 0, which means there is no temporary memory.
-     * 
-     *  @return The maximum workspace size.
-     * 
-     *  @see setMaxWorkspaceSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("std::size_t") @NoException(true) long getMaxWorkspaceSize();
-
-    /**
-     *  \brief Set the build mode flags to turn on builder options for this network.
-     * 
-     *  The flags are listed in the BuilderFlags enum.
-     *  The flags set configuration options to build the network.
-     * 
-     *  @param builderFlags The build option for an engine.
-     * 
-     *  \note This function will override the previous set flags, rather than bitwise ORing the new flag.
-     * 
-     *  @see getFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setFlags(@Cast("nvinfer1::BuilderFlags") int builderFlags);
-
-    /**
-     *  \brief Get the build mode flags for this builder config. Defaults to 0.
-     * 
-     *  @return The build options as a bitmask.
-     * 
-     *  @see setFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("nvinfer1::BuilderFlags") @NoException(true) int getFlags();
-
-    /**
-     *  \brief clear a single build mode flag.
-     * 
-     *  clears the builder mode flag from the enabled flags.
-     * 
-     *  @see setFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void clearFlag(BuilderFlag builderFlag);
-    public native @NoException(true) void clearFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-
-    /**
-     *  \brief Set a single build mode flag.
-     * 
-     *  Add the input builder mode flag to the already enabled flags.
-     * 
-     *  @see setFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setFlag(BuilderFlag builderFlag);
-    public native @NoException(true) void setFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-
-    /**
-     *  \brief Returns true if the build mode flag is set
-     * 
-     *  @see getFlags()
-     * 
-     *  @return True if flag is set, false if unset.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getFlag(BuilderFlag builderFlag);
-    public native @Cast("bool") @NoException(true) boolean getFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-
-    /**
-     *  \brief Set the device that this layer must execute on.
-     *  @param deviceType that this layer must execute on.
-     *  If DeviceType is not set or is reset, TensorRT will use the default DeviceType set in the builder.
-     * 
-     *  \note The device type for a layer must be compatible with the safety flow (if specified).
-     *  For example a layer cannot be marked for DLA execution while the builder is configured for kSAFETY.
-     * 
-     *  @see getDeviceType()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setDeviceType(@Const ILayer layer, DeviceType deviceType);
-    public native @NoException(true) void setDeviceType(@Const ILayer layer, @Cast("nvinfer1::DeviceType") int deviceType);
-
-    /**
-     *  \brief Get the device that this layer executes on.
-     *  @return Returns DeviceType of the layer.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) DeviceType getDeviceType(@Const ILayer layer);
-
-    /**
-     *  \brief whether the DeviceType has been explicitly set for this layer
-     *  @return true if device type is not default
-     *  @see setDeviceType() getDeviceType() resetDeviceType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isDeviceTypeSet(@Const ILayer layer);
-
-    /**
-     *  \brief reset the DeviceType for this layer
-     * 
-     *  @see setDeviceType() getDeviceType() isDeviceTypeSet()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void resetDeviceType(@Const ILayer layer);
-
-    /**
-     *  \brief Checks if a layer can run on DLA.
-     *  @return status true if the layer can on DLA else returns false.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean canRunOnDLA(@Const ILayer layer);
-
-    /**
-     *  \brief Sets the DLA core used by the network.
-     *  @param dlaCore The DLA core to execute the engine on (0 to N-1). Default value is 0.
-     * 
-     *  It can be used to specify which DLA core to use via indexing, if multiple DLA cores are available.
-     * 
-     *  @see IRuntime::setDLACore() getDLACore()
-     * 
-     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDLACore(int dlaCore);
-
-    /**
-     *  \brief Get the DLA core that the engine executes on.
-     *  @return If setDLACore is called, returns DLA core from 0 to N-1, else returns 0.
-     * 
-     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getDLACore();
-
-    /**
-     *  \brief Sets the default DeviceType to be used by the builder. It ensures that all the layers that can run on
-     *  this device will run on it, unless setDeviceType is used to override the default DeviceType for a layer.
-     *  @see getDefaultDeviceType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDefaultDeviceType(DeviceType deviceType);
-    public native @NoException(true) void setDefaultDeviceType(@Cast("nvinfer1::DeviceType") int deviceType);
-
-    /**
-     *  \brief Get the default DeviceType which was set by setDefaultDeviceType.
-     * 
-     *  By default it returns DeviceType::kGPU.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) DeviceType getDefaultDeviceType();
-
-    /**
-     *  \brief Resets the builder configuration to defaults.
-     * 
-     *  When initializing a builder config object, we can call this function.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void reset();
-
-    /**
-     *  \brief De-allocates any internally allocated memory.
-     * 
-     *  When destroying a builder config object, we can call this function.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Set the cuda stream that is used to profile this network.
-     * 
-     *  @param stream The cuda stream used for profiling by the builder.
-     * 
-     *  @see getProfileStream()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setProfileStream(CUstream_st stream);
-
-    /**
-     *  \brief Get the cuda stream that is used to profile this network.
-     * 
-     *  @return The cuda stream set by setProfileStream, nullptr if setProfileStream has not been called.
-     * 
-     *  @see setProfileStream()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) CUstream_st getProfileStream();
-
-    /**
-     *  \brief Add an optimization profile.
-     * 
-     *  This function must be called at least once if the network has dynamic or shape input tensors.
-     *  This function may be called at most once when building a refittable engine, as more than
-     *  a single optimization profile are not supported for refittable engines.
-     * 
-     *  @param profile The new optimization profile, which must satisfy profile->isValid() == true
-     *  @return The index of the optimization profile (starting from 0) if the input is valid, or -1 if the input is
-     *          not valid.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int addOptimizationProfile(@Const IOptimizationProfile profile);
-
-    /**
-     *  \brief Get number of optimization profiles.
-     * 
-     *  This is one higher than the index of the last optimization profile that has be defined (or
-     *  zero, if none has been defined yet).
-     * 
-     *  @return The number of the optimization profiles.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOptimizationProfiles();
-
-    /**
-     *  \brief Set verbosity level of layer information exposed in NVTX annotations.
-     * 
-     *  Control how much layer information will be exposed in NVTX annotations.
-     * 
-     *  @see ProfilingVerbosity, getProfilingVerbosity()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setProfilingVerbosity(ProfilingVerbosity verbosity);
-    public native @NoException(true) void setProfilingVerbosity(@Cast("nvinfer1::ProfilingVerbosity") int verbosity);
-
-    /**
-     *  \brief Get verbosity level of layer information exposed in NVTX annotations.
-     * 
-     *  Get the current setting of verbosity level of layer information exposed in
-     *  NVTX annotations. Default value is ProfilingVerbosity::kDEFAULT.
-     * 
-     *  @see ProfilingVerbosity, setProfilingVerbosity()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) ProfilingVerbosity getProfilingVerbosity();
-
-    /**
-     *  \brief Set Algorithm Selector.
-     * 
-     *  @param selector The algorithm selector to be set in the build config. */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setAlgorithmSelector(IAlgorithmSelector selector);
-
-    /**
-     *  \brief Get Algorithm Selector.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IAlgorithmSelector getAlgorithmSelector();
-
-    /**
-     *  \brief Add a calibration profile.
-     * 
-     *  Calibration optimization profile must be set if int8 calibration is used to set scales for a network with
-     *  runtime dimensions.
-     * 
-     *  @param profile The new calibration profile, which must satisfy profile->isValid() == true or be nullptr.
-     *  MIN and MAX values will be overwritten by kOPT.
-     *  @return True if the calibration profile was set correctly.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setCalibrationProfile(@Const IOptimizationProfile profile);
-
-    /**
-     *  \brief Get the current calibration profile.
-     * 
-     *  @return A pointer to the current calibration profile or nullptr if calibration profile is unset.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Const @NoException(true) IOptimizationProfile getCalibrationProfile();
-
-    /**
-     *  \brief Set the quantization flags.
-     * 
-     *  The flags are listed in the QuantizationFlag enum.
-     *  The flags set configuration options to quantize the network in int8.
-     * 
-     *  @param flags The quantization flags.
-     * 
-     *  \note This function will override the previous set flags, rather than bitwise ORing the new flag.
-     * 
-     *  @see getQuantizationFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setQuantizationFlags(@Cast("nvinfer1::QuantizationFlags") int flags);
-
-    /**
-     *  \brief Get the quantization flags.
-     * 
-     *  @return The quantization flags as a bitmask.
-     * 
-     *  @see setQuantizationFlag()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("nvinfer1::QuantizationFlags") @NoException(true) int getQuantizationFlags();
-
-    /**
-     *  \brief clear a quantization flag.
-     * 
-     *  Clears the quantization flag from the enabled quantization flags.
-     * 
-     *  @see setQuantizationFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void clearQuantizationFlag(QuantizationFlag flag);
-    public native @NoException(true) void clearQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-
-    /**
-     *  \brief Set a single quantization flag.
-     * 
-     *  Add the input quantization flag to the already enabled quantization flags.
-     * 
-     *  @see setQuantizationFlags()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setQuantizationFlag(QuantizationFlag flag);
-    public native @NoException(true) void setQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-
-    /**
-     *  \brief Returns true if the quantization flag is set.
-     * 
-     *  @see getQuantizationFlags()
-     * 
-     *  @return True if quantization flag is set, false if unset.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(QuantizationFlag flag);
-    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-
-    /**
-     *  \brief Set tactic sources.
-     * 
-     *  This bitset controls which tactic sources TensorRT is allowed to use for tactic
-     *  selection.
-     * 
-     *  By default, kCUBLAS and kCUDNN are always enabled. kCUBLAS_LT is enabled for x86
-     *  platforms as well as non-x86 platforms when CUDA >= 11.0.
-     * 
-     *  Multiple tactic sources may be combined with a bitwise OR operation. For example,
-     *  to enable cublas and cublasLt as tactic sources, use a value of:
-     * 
-     *  1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U <<
-     *  static_cast<uint32_t>(TacticSource::kCUBLAS_LT)
-     * 
-     *  @see getTacticSources
-     * 
-     *  @return true if the tactic sources in the build configuration were updated.
-     *          The tactic sources in the build configuration will not be updated if the provided value is invalid.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setTacticSources(@Cast("nvinfer1::TacticSources") int tacticSources);
-
-    /**
-     *  \brief Get tactic sources.
-     * 
-     *  Get the tactic sources currently set in the engine build
-     *  configuration.
-     * 
-     *  @see setTacticSources
-     * 
-     *  @return tactic sources
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
-
-    /**
-     *  \brief Create timing cache
-     * 
-     *  Create ITimingCache instance from serialized raw data. The created timing cache doesn’t belong to
-     *  a specific IBuilderConfig. It can be shared by multiple builder instances. Call setTimingCache()
-     *  before launching a builder to attach cache to builder instance.
-     * 
-     *  @param blob A pointer to the raw data that contains serialized timing cache
-     *  @param size The size in bytes of the serialized timing cache. Size 0 means create a new cache from scratch
-     * 
-     *  @see setTimingCache
-     * 
-     *  @return the pointer to ITimingCache created
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITimingCache createTimingCache(@Const Pointer blob, @Cast("std::size_t") long size);
-
-    /**
-     *  \brief Attach a timing cache to IBuilderConfig
-     * 
-     *  The timing cache has verification header to make sure the provided cache can be used in current environment.
-     *  A failure will be reported if the CUDA device property in the provided cache is different from current
-     *  environment. ignoreMismatch = true skips strict verification and allows loading cache created from a different
-     *  device.
-     * 
-     *  The cache must not be destroyed until after the engine is built.
-     * 
-     *  @param cache the timing cache to be used
-     *  @param ignoreMismatch whether or not allow using a cache that contains different CUDA device property
-     * 
-     *  @return true if set successfully, false otherwise
-     * 
-     *  \warning Using cache generated from devices with different CUDA device properties may lead to
-     *           functional/performance bugs.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setTimingCache(@Const @ByRef ITimingCache cache, @Cast("bool") boolean ignoreMismatch);
-
-    /**
-     *  \brief Get the pointer to the timing cache from current IBuilderConfig
-     * 
-     *  @return pointer to the timing cache used in current IBuilderConfig
-     *  */
-    public native @Const @NoException(true) ITimingCache getTimingCache();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
deleted file mode 100644
index f578ba089af..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConcatenationLayer.java
+++ /dev/null
@@ -1,63 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IConcatenationLayer
- * 
- *  \brief A concatenation layer in a network definition.
- * 
- *  The output dimension along the concatenation axis is the sum of the corresponding input dimensions.
- *  Every other output dimension is the same as the corresponding dimension of the inputs.
- * 
- *  \warning All tensors must have the same dimensions except along the concatenation axis.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IConcatenationLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IConcatenationLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the axis along which concatenation occurs.
-     * 
-     *  0 is the major axis (excluding the batch dimension). The default is the number of non-batch axes in the tensor
-     *  minus three (e.g. for an NCHW input it would be 0), or 0 if there are fewer than 3 non-batch axes.
-     * 
-     *  When running this layer on the DLA, only concat across the Channel axis is valid.
-     * 
-     *  @param axis The axis along which concatenation occurs.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAxis(int axis);
-
-    /**
-     *  \brief Get the axis along which concatenation occurs.
-     * 
-     *  @see setAxis()
-     *  */
-    public native @NoException(true) int getAxis();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
deleted file mode 100644
index e4da5b8331c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConstantLayer.java
+++ /dev/null
@@ -1,87 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IConstantLayer
- * 
- *  \brief Layer that represents a constant value.
- *  \note This layer does not support boolean types.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IConstantLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IConstantLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the weights for the layer.
-     * 
-     *  If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
-     *  Otherwise the output is a tensor of real values and the output type will be
-     *  follow TensorRT's normal precision rules.
-     * 
-     *  @see getWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the weights for the layer.
-     * 
-     *  @see setWeights
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getWeights();
-
-    /**
-     *  \brief Set the dimensions for the layer.
-     * 
-     *  @param dimensions The dimensions of the layer
-     * 
-     *  @see setDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the dimensions for the layer.
-     * 
-     *  @return the dimensions for the layer
-     * 
-     *  @see getDimensions
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
deleted file mode 100644
index fae11cf4fae..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IConvolutionLayer.java
+++ /dev/null
@@ -1,556 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class IConvolutionLayer
- * 
- *  \brief A convolution layer in a network definition.
- * 
- *  This layer performs a correlation operation between 3-dimensional filter with a 4-dimensional tensor to produce
- *  another 4-dimensional tensor.
- * 
- *  An optional bias argument is supported, which adds a per-channel constant to each value in the output.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IConvolutionLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IConvolutionLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the HW kernel size of the convolution.
-     * 
-     *  If executing this layer on DLA, both height and width of kernel size must be in the range [1,32].
-     * 
-     *  @see getKernelSize()
-     * 
-     *  @deprecated Superseded by setKernelSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
-
-    /**
-     *  \brief Get the HW kernel size of the convolution.
-     * 
-     *  @see setKernelSize()
-     * 
-     *  @deprecated Superseded by getKernelSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getKernelSize();
-
-    /**
-     *  \brief Set the number of output maps for the convolution.
-     * 
-     *  If executing this layer on DLA, the number of output maps must be in the range [1,8192].
-     * 
-     *  @see getNbOutputMaps()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
-
-    /**
-     *  \brief Get the number of output maps for the convolution.
-     * 
-     *  @see setNbOutputMaps()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputMaps();
-
-    /**
-     *  \brief Get the stride of the convolution.
-     * 
-     *  Default: (1,1)
-     * 
-     *  If executing this layer on DLA, both height and width of stride must be in the range [1,8].
-     * 
-     *  @see getStride()
-     * 
-     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
-
-    /**
-     *  \brief Get the stride of the convolution.
-     * 
-     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
-
-    /**
-     *  \brief Set the padding of the convolution.
-     * 
-     *  The input will be zero-padded by this number of elements in the height and width directions.
-     *  Padding is symmetric.
-     * 
-     *  Default: (0,0)
-     * 
-     *  If executing this layer on DLA, both height and width of padding must be in the range [0,31],
-     *  and the padding size must be less than the kernel size.
-     * 
-     *  @see getPadding()
-     * 
-     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
-
-    /**
-     *  \brief Get the padding of the convolution. If the padding is asymmetric, the pre-padding is returned.
-     * 
-     *  @see setPadding()
-     * 
-     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
-
-    /**
-     *  \brief Set the number of groups for a convolution.
-     * 
-     *  The input tensor channels are  divided into \p nbGroups groups, and a convolution is executed for each group,
-     *  using a filter per group. The results of the group convolutions are concatenated to form the output.
-     * 
-     *  \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group
-     *  count) must be a multiple of 4 for both input and output.
-     * 
-     *  Default: 1
-     * 
-     *  If executing this layer on DLA, the max number of groups is 8192.
-     * 
-     *  @see getNbGroups()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbGroups(int nbGroups);
-
-    /**
-     *  \brief Get the number of groups of the convolution.
-     * 
-     *  @see setNbGroups()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbGroups();
-
-    /**
-     *  \brief Set the kernel weights for the convolution.
-     * 
-     *  The weights are specified as a contiguous array in \p GKCRS order, where \p G is the number of groups, \p K
-     *  the number of output feature maps, \p C the number of input channels, and \p R and \p S are the height and
-     *  width of the filter.
-     * 
-     *  @see getKernelWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the kernel weights of the convolution.
-     * 
-     *  @see setKernelWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-
-    /**
-     *  \brief Set the bias weights for the convolution.
-     * 
-     *  Bias is optional. To omit bias, set the count value of the weights structure to zero.
-     * 
-     *  The bias is applied per-channel, so the number of weights (if non-zero) must be equal to the number of output
-     *  feature maps.
-     * 
-     *  @see getBiasWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the bias weights for the convolution.
-     * 
-     *  @see setBiasWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-
-    /**
-     *  \brief Set the dilation for a convolution.
-     * 
-     *  Default: (1,1)
-     * 
-     *  If executing this layer on DLA, both height and width must be in the range [1,32].
-     * 
-     *  @see getDilation()
-     * 
-     *  @deprecated Superseded by setDilationNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setDilation(@ByVal DimsHW dilation);
-
-    /**
-     *  \brief Get the dilation for a convolution.
-     * 
-     *  @see setDilation()
-     * 
-     *  @deprecated Superseded by getDilationNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getDilation();
-
-    /**
-     *  \brief Set the multi-dimension pre-padding of the convolution.
-     * 
-     *  The start of the input will be zero-padded by this number of elements in each dimension.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,31], and the padding must be less than the kernel size.
-     * 
-     *  @see getPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the pre-padding.
-     * 
-     *  @see setPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-
-    /**
-     *  \brief Set the multi-dimension post-padding of the convolution.
-     * 
-     *  The end of the input will be zero-padded by this number of elements in each dimension.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,31], and the padding must be less than the kernel size.
-     * 
-     *  @see getPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the post-padding.
-     * 
-     *  @see setPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-
-    /**
-     *  \brief Set the padding mode.
-     * 
-     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see getPaddingMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-
-    /**
-     *  \brief Get the padding mode.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see setPaddingMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) PaddingMode getPaddingMode();
-
-    /**
-     *  \brief Set the multi-dimension kernel size of the convolution.
-     * 
-     *  If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in the
-     *  range [1,32].
-     * 
-     *  @see getKernelSizeNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
-
-    /**
-     *  \brief Get the multi-dimension kernel size of the convolution.
-     * 
-     *  @see setKernelSizeNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
-
-    /**
-     *  \brief Set the multi-dimension stride of the convolution.
-     * 
-     *  Default: (1, 1, ..., 1)
-     * 
-     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
-     *  [1,8].
-     * 
-     *  @see getStrideNd() setStride() getStride()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-
-    /**
-     *  \brief Get the multi-dimension stride of the convolution.
-     * 
-     *  @see setStrideNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-
-    /**
-     *  \brief Set the multi-dimension padding of the convolution.
-     * 
-     *  The input will be zero-padded by this number of elements in each dimension.
-     *  Padding is symmetric.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,31], and the padding must be less than the kernel size.
-     * 
-     *  @see getPaddingNd() setPadding() getPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the multi-dimension padding of the convolution.
-     * 
-     *  If the padding is asymmetric, the pre-padding is returned.
-     * 
-     *  @see setPaddingNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-
-    /**
-     *  \brief Set the multi-dimension dilation of the convolution.
-     * 
-     *  Default: (1, 1, ..., 1)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width must be in the range [1,32].
-     * 
-     *  @see getDilation()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
-
-    /**
-     *  \brief Get the multi-dimension dilation of the convolution.
-     * 
-     *  @see setDilation()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor
-     * 
-     *  For a IConvolutionLayer, only index 0 is valid unless explicit precision mode is enabled.
-     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
-     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
-     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
-     *  via appropriate set call.
-     * 
-     *  The indices are as follows:
-     * 
-     *  - 0: The input activation tensor.
-     *  - 1: The kernel weights tensor (a constant tensor).
-     * 
-     *  If this function is called with a value greater than 0, then the function getNbInputs() changes */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
deleted file mode 100644
index 1a7c8ea978a..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ICudaEngine.java
+++ /dev/null
@@ -1,651 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ICudaEngine
- * 
- *  \brief An engine for executing inference on a built network, with functionally unsafe features.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ICudaEngine extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public ICudaEngine() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public ICudaEngine(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ICudaEngine(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public ICudaEngine position(long position) {
-        return (ICudaEngine)super.position(position);
-    }
-    @Override public ICudaEngine getPointer(long i) {
-        return new ICudaEngine((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Get the number of binding indices.
-     * 
-     *  There are separate binding indices for each optimization profile.
-     *  This method returns the total over all profiles.
-     *  If the engine has been built for K profiles, the first getNbBindings() / K bindings are used by profile
-     *  number 0, the following getNbBindings() / K bindings are used by profile number 1 etc.
-     * 
-     *  @see getBindingIndex();
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbBindings();
-
-    /**
-     *  \brief Retrieve the binding index for a named tensor.
-     * 
-     *  IExecutionContext::enqueue() and IExecutionContext::execute() require an array of buffers.
-     * 
-     *  Engine bindings map from tensor names to indices in this array.
-     *  Binding indices are assigned at engine build time, and take values in the range [0 ... n-1] where n is the total
-     *  number of inputs and outputs.
-     * 
-     *  To get the binding index of the name in an optimization profile with index k > 0,
-     *  mangle the name by appending " [profile k]", as described for method getBindingName().
-     * 
-     *  @param name The tensor name.
-     *  @return The binding index for the named tensor, or -1 if the name is not found.
-     * 
-     *  @see getNbBindings() getBindingName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getBindingIndex(String name);
-    public native @NoException(true) int getBindingIndex(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Retrieve the name corresponding to a binding index.
-     * 
-     *  This is the reverse mapping to that provided by getBindingIndex().
-     * 
-     *  For optimization profiles with an index k > 0, the name is mangled by appending
-     *  " [profile k]", with k written in decimal.  For example, if the tensor in the
-     *  INetworkDefinition had the name "foo", and bindingIndex refers to that tensor in the
-     *  optimization profile with index 3, getBindingName returns "foo [profile 3]".
-     * 
-     *  @param bindingIndex The binding index.
-     *  @return The name corresponding to the index, or nullptr if the index is out of range.
-     * 
-     *  @see getBindingIndex()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getBindingName(int bindingIndex);
-
-    /**
-     *  \brief Determine whether a binding is an input binding.
-     * 
-     *  @param bindingIndex The binding index.
-     *  @return True if the index corresponds to an input binding and the index is in range.
-     * 
-     *  @see getBindingIndex()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean bindingIsInput(int bindingIndex);
-
-    /**
-     *  \brief Get the dimensions of a binding.
-     * 
-     *  @param bindingIndex The binding index.
-     *  @return The dimensions of the binding if the index is in range, otherwise Dims().
-     *          Has -1 for any dimension that varies within the optimization profile.
-     * 
-     *  For example, suppose an INetworkDefinition has an input with shape [-1,-1]
-     *  that becomes a binding b in the engine.  If the associated optimization profile
-     *  specifies that b has minimum dimensions as [6,9] and maximum dimensions [7,9],
-     *  getBindingDimensions(b) returns [-1,9], despite the second dimension being
-     *  dynamic in the INetworkDefinition.
-     * 
-     *  Because each optimization profile has separate bindings, the returned value can
-     *  differ across profiles. Consider another binding b' for the same network input,
-     *  but for another optimization profile.  If that other profile specifies minimum
-     *  dimensions [5,8] and maximum dimensions [5,9], getBindingDimensions(b') returns [5,-1].
-     * 
-     *  @see getBindingIndex()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
-
-    /**
-     *  \brief Determine the required data type for a buffer from its binding index.
-     * 
-     *  @param bindingIndex The binding index.
-     *  @return The type of the data in the buffer.
-     * 
-     *  @see getBindingIndex()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getBindingDataType(int bindingIndex);
-
-    /**
-     *  \brief Get the maximum batch size which can be used for inference.
-     * 
-     *  For an engine built from an INetworkDefinition without an implicit batch dimension, this will always return 1.
-     * 
-     *  @return The maximum batch size for this engine.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getMaxBatchSize();
-
-    /**
-     *  \brief Get the number of layers in the network.
-     * 
-     *  The number of layers in the network is not necessarily the number in the original network definition, as layers
-     *  may be combined or eliminated as the engine is optimized. This value can be useful when building per-layer
-     *  tables, such as when aggregating profiling data over a number of executions.
-     * 
-     *  @return The number of layers in the network.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbLayers();
-
-    /**
-     *  \brief Serialize the network to a stream.
-     * 
-     *  @return A IHostMemory object that contains the serialized engine.
-     * 
-     *  The network may be deserialized with IRuntime::deserializeCudaEngine().
-     * 
-     *  @see IRuntime::deserializeCudaEngine()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IHostMemory serialize();
-
-    /**
-     *  \brief Create an execution context.
-     * 
-     *  If the engine supports dynamic shapes, each execution context in concurrent use must use a separate optimization
-     *  profile. The first execution context created will call setOptimizationProfile(0) implicitly. For other execution
-     *  contexts, setOptimizationProfile() must be called with unique profile index before calling execute or enqueue.
-     *  If an error recorder has been set for the engine, it will also be passed to the execution context.
-     * 
-     *  @see IExecutionContext.
-     *  @see IExecutionContext::setOptimizationProfile()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IExecutionContext createExecutionContext();
-
-    /**
-     *  \brief Destroy this object;
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Get location of binding
-     * 
-     *  This lets you know whether the binding should be a pointer to device or host memory.
-     * 
-     *  @see ITensor::setLocation() ITensor::getLocation()
-     * 
-     *  @param bindingIndex The binding index.
-     *  @return The location of the bound tensor with given index.
-     *  */
-    
-    //!
-    //!
-    public native @NoException(true) TensorLocation getLocation(int bindingIndex);
-
-    /** \brief create an execution context without any device memory allocated
-     * 
-     *  The memory for execution of this device context must be supplied by the application.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) IExecutionContext createExecutionContextWithoutDeviceMemory();
-
-    /**
-     *  \brief Return the amount of device memory required by an execution context.
-     * 
-     *  @see IExecutionContext::setDeviceMemory()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("size_t") @NoException(true) long getDeviceMemorySize();
-
-    /**
-     *  \brief Return true if an engine can be refit.
-     * 
-     *  @see nvinfer1::createInferRefitter()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isRefittable();
-
-    /**
-     *  \brief Return the number of bytes per component of an element.
-     * 
-     *  The vector component size is returned if getBindingVectorizedDim() != -1.
-     * 
-     *  @param bindingIndex The binding Index.
-     * 
-     *  @see ICudaEngine::getBindingVectorizedDim()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getBindingBytesPerComponent(int bindingIndex);
-
-    /**
-     *  \brief Return the number of components included in one element.
-     * 
-     *  The number of elements in the vectors is returned if getBindingVectorizedDim() != -1.
-     * 
-     *  @param bindingIndex The binding Index.
-     * 
-     *  @see ICudaEngine::getBindingVectorizedDim()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getBindingComponentsPerElement(int bindingIndex);
-
-    /**
-     *  \brief Return the binding format.
-     * 
-     *  @param bindingIndex The binding Index.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) TensorFormat getBindingFormat(int bindingIndex);
-
-    /**
-     *  \brief Return the human readable description of the tensor format.
-     * 
-     *  The description includes the order, vectorization, data type, strides,
-     *  and etc. Examples are shown as follows:
-     *    Example 1: kCHW + FP32
-     *      "Row major linear FP32 format"
-     *    Example 2: kCHW2 + FP16
-     *      "Two wide channel vectorized row major FP16 format"
-     *    Example 3: kHWC8 + FP16 + Line Stride = 32
-     *      "Channel major FP16 format where C % 8 == 0 and H Stride % 32 == 0"
-     * 
-     *  @param bindingIndex The binding Index.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getBindingFormatDesc(int bindingIndex);
-
-    /**
-     *  \brief Return the dimension index that the buffer is vectorized.
-     * 
-     *  Specifically -1 is returned if scalars per vector is 1.
-     * 
-     *  @param bindingIndex The binding Index.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getBindingVectorizedDim(int bindingIndex);
-
-    /**
-     *  \brief Returns the name of the network associated with the engine.
-     * 
-     *  The name is set during network creation and is retrieved after
-     *  building or deserialization.
-     * 
-     *  @see INetworkDefinition::setName(), INetworkDefinition::getName()
-     * 
-     *  @return A zero delimited C-style string representing the name of the network.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Get the number of optimization profiles defined for this engine.
-     * 
-     *  @return Number of optimization profiles. It is always at least 1.
-     * 
-     *  @see IExecutionContext::setOptimizationProfile() */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOptimizationProfiles();
-
-    /**
-     *  \brief Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile.
-     * 
-     *  @param bindingIndex The binding index, which must belong to the given profile,
-     *         or be between 0 and bindingsPerProfile-1 as described below.
-     * 
-     *  @param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1.
-     * 
-     *  @param select Whether to query the minimum, optimum, or maximum dimensions for this binding.
-     * 
-     *  @return The minimum / optimum / maximum dimensions for this binding in this profile.
-     *          If the profileIndex or bindingIndex are invalid, return Dims with nbDims=-1.
-     * 
-     *  For backwards compatibility with earlier versions of TensorRT, if the bindingIndex
-     *  does not belong to the current optimization profile, but is between 0 and bindingsPerProfile-1,
-     *  where bindingsPerProfile = getNbBindings()/getNbOptimizationProfiles,
-     *  then a corrected bindingIndex is used instead, computed by:
-     * 
-     *      profileIndex * bindingsPerProfile + bindingIndex % bindingsPerProfile
-     * 
-     *  Otherwise the bindingIndex is considered invalid.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, @Cast("nvinfer1::OptProfileSelector") int select);
-
-    /**
-     *  \brief Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
-     * 
-     *  @param profileIndex The profile index (must be between 0 and getNbOptimizationProfiles()-1)
-     * 
-     *  @param inputIndex The input index (must be between 0 and getNbBindings() - 1)
-     * 
-     *  @param select Whether to query the minimum, optimum, or maximum shape values for this binding.
-     * 
-     *  @return If the binding is an input shape binding, return a pointer to an array that has
-     *          the same number of elements as the corresponding tensor, i.e. 1 if dims.nbDims == 0, or dims.d[0]
-     *          if dims.nbDims == 1, where dims = getBindingDimensions(inputIndex). The array contains
-     *          the elementwise minimum / optimum / maximum values for this shape binding under the profile.
-     *          If either of the indices is out of range, or if the binding is not an input shape binding, return
-     *          nullptr.
-     * 
-     *  For backwards compatibility with earlier versions of TensorRT, a bindingIndex that does not belong
-     *  to the profile is corrected as described for getProfileDimensions.
-     * 
-     *  @see ICudaEngine::getProfileDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Const @NoException(true) IntPointer getProfileShapeValues(int profileIndex, int inputIndex, OptProfileSelector select);
-    public native @Const @NoException(true) IntBuffer getProfileShapeValues(int profileIndex, int inputIndex, @Cast("nvinfer1::OptProfileSelector") int select);
-
-    /**
-     *  \brief True if tensor is required as input for shape calculations or output from them.
-     * 
-     *  TensorRT evaluates a network in two phases:
-     * 
-     *  1. Compute shape information required to determine memory allocation requirements
-     *     and validate that runtime sizes make sense.
-     * 
-     *  2. Process tensors on the device.
-     * 
-     *  Some tensors are required in phase 1.  These tensors are called "shape tensors", and always
-     *  have type Int32 and no more than one dimension.  These tensors are not always shapes
-     *  themselves, but might be used to calculate tensor shapes for phase 2.
-     * 
-     *  isShapeBinding(i) returns true if the tensor is a required input or an output computed in phase 1.
-     *  isExecutionBinding(i) returns true if the tensor is a required input or an output computed in phase 2.
-     * 
-     *  For example, if a network uses an input tensor with binding i as an addend
-     *  to an IElementWiseLayer that computes the "reshape dimensions" for IShuffleLayer,
-     *  then isShapeBinding(i) == true.
-     * 
-     *  It's possible to have a tensor be required by both phases.  For instance, a tensor
-     *  can be used for the "reshape dimensions" and as the indices for an IGatherLayer
-     *  collecting floating-point data.
-     * 
-     *  It's also possible to have a tensor be required by neither phase, but nonetheless
-     *  shows up in the engine's inputs.  For example, if an input tensor is used only
-     *  as an input to IShapeLayer, only its shape matters and its values are irrelevant.
-     * 
-     *  @see isExecutionBinding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isShapeBinding(int bindingIndex);
-
-    /**
-     *  \brief True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
-     * 
-     *  For example, if a network uses an input tensor with binding i ONLY as the "reshape dimensions"
-     *  input of IShuffleLayer, then isExecutionBinding(i) is false, and a nullptr can be
-     *  supplied for it when calling IExecutionContext::execute or IExecutionContext::enqueue.
-     * 
-     *  @see isShapeBinding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isExecutionBinding(int bindingIndex);
-
-    /**
-     *  \brief Determine what execution capability this engine has.
-     * 
-     *  If the engine has EngineCapability::kSTANDARD, then all engine functionality is valid.
-     *  If the engine has EngineCapability::kSAFETY, then only the functionality in safe engine is valid.
-     *  If the engine has EngineCapability::kDLA_STANDALONE, then only serialize, destroy, and const-accessor functions are
-     *  valid.
-     * 
-     *  @return The EngineCapability flag that the engine was built for.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) EngineCapability getEngineCapability();
-
-    /** \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief Get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
-     *  an error handler has not been set.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Query whether the engine was built with an implicit batch dimension.
-     * 
-     *  @return True if tensors have implicit batch dimension, false otherwise.
-     * 
-     *  This is an engine-wide property.  Either all tensors in the engine
-     *  have an implicit batch dimension or none of them do.
-     * 
-     *  hasImplicitBatchDimension() is true if and only if the INetworkDefinition
-     *  from which this engine was built was created with createNetwork() or
-     *  createNetworkV2() without NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
-     * 
-     *  @see createNetworkV2
-     *  */
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
-
-    /** \brief return the tactic sources required by this engine
-     * 
-     *  @see IBuilderConfig::setTacticSources()
-     *  */
-    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
deleted file mode 100644
index 3f7e3a76abb..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDeconvolutionLayer.java
+++ /dev/null
@@ -1,517 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IDeconvolutionLayer
- * 
- *  \brief A deconvolution layer in a network definition.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IDeconvolutionLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IDeconvolutionLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the HW kernel size of the convolution.
-     * 
-     *  If executing this layer on DLA, both height and width of kernel size must be in the range [1,32], or the
-     *  combinations of [64, 96, 128] in one dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid,
-     *  but not [64x64].
-     * 
-     *  @see getKernelSize()
-     * 
-     *  @deprecated Superseded by setKernelSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
-
-    /**
-     *  \brief Get the HW kernel size of the deconvolution.
-     * 
-     *  @see setKernelSize()
-     * 
-     *  @deprecated Superseded by getKernelSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getKernelSize();
-
-    /**
-     *  \brief Set the number of output feature maps for the deconvolution.
-     * 
-     *  If executing this layer on DLA, the number of output maps must be in the range [1,8192].
-     * 
-     *  @see getNbOutputMaps()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
-
-    /**
-     *  \brief Get the number of output feature maps for the deconvolution.
-     * 
-     *  @see setNbOutputMaps()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputMaps();
-
-    /**
-     *  \brief Get the stride of the deconvolution.
-     * 
-     *  If executing this layer on DLA, both height and width of stride must be in the range [1,32] or the combinations
-     *  of [64, 96, 128] in one dimension and 1 in the other dimensions, i.e. [1x64] or [64x1] are valid, but not
-     *  [64x64].
-     * 
-     *  @see setStride()
-     * 
-     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
-
-    /**
-     *  \brief Get the stride of the deconvolution.
-     * 
-     *  Default: (1,1)
-     * 
-     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
-
-    /**
-     *  \brief Set the padding of the deconvolution.
-     * 
-     *  The output will be trimmed by this number of elements on each side in the height and width directions.
-     *  In other words, it resembles the inverse of a convolution layer with this padding size.
-     *  Padding is symmetric, and negative padding is not supported.
-     * 
-     *  Default: (0,0)
-     * 
-     *  If executing this layer on DLA, both height and width of padding must be 0.
-     * 
-     *  @see getPadding()
-     * 
-     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
-
-    /**
-     *  \brief Get the padding of the deconvolution.
-     * 
-     *  Default: (0, 0)
-     * 
-     *  @see setPadding()
-     * 
-     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
-
-    /**
-     *  \brief Set the number of groups for a deconvolution.
-     * 
-     *  The input tensor channels are divided into \p nbGroups groups, and a deconvolution is executed for each group,
-     *  using a filter per group. The results of the group convolutions are concatenated to form the output.
-     * 
-     *  If executing this layer on DLA, nbGroups must be one
-     * 
-     *  \note When using groups in int8 mode, the size of the groups (i.e. the channel count divided by the group count)
-     *  must be a multiple of 4 for both input and output.
-     * 
-     *  Default: 1
-     * 
-     *  @see getNbGroups()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbGroups(int nbGroups);
-
-    /**
-     *  \brief Get the number of groups for a deconvolution.
-     * 
-     *  @see setNbGroups()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbGroups();
-
-    /**
-     *  \brief Set the kernel weights for the deconvolution.
-     * 
-     *  The weights are specified as a contiguous array in \p CKRS order, where \p C the number of
-     *  input channels, \p K the number of output feature maps, and \p R and \p S are the height and width
-     *  of the filter.
-     * 
-     *  @see getWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the kernel weights for the deconvolution.
-     * 
-     *  @see setNbGroups()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-
-    /**
-     *  \brief Set the bias weights for the deconvolution.
-     * 
-     *  Bias is optional. To omit bias, set the count value of the weights structure to zero.
-     * 
-     *  The bias is applied per-feature-map, so the number of weights (if non-zero) must be equal to the number of
-     *  output feature maps.
-     * 
-     *  @see getBiasWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the bias weights for the deconvolution.
-     * 
-     *  @see getBiasWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-
-    /**
-     *  \brief Set the multi-dimension pre-padding of the deconvolution.
-     * 
-     *  The output will be trimmed by this number of elements on the start of every dimension.
-     *  In other words, it resembles the inverse of a convolution layer with this padding size.
-     *  Negative padding is not supported.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, padding must be 0.
-     * 
-     *  @see getPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the pre-padding.
-     * 
-     *  @see setPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-
-    /**
-     *  \brief Set the multi-dimension post-padding of the deconvolution.
-     * 
-     *  The output will be trimmed by this number of elements on the end of every dimension.
-     *  In other words, it resembles the inverse of a convolution layer with this padding size.
-     *  Negative padding is not supported.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, padding must be 0.
-     * 
-     *  @see getPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the padding.
-     * 
-     *  @see setPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-
-    /**
-     *  \brief Set the padding mode.
-     * 
-     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see getPaddingMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-
-    /**
-     *  \brief Get the padding mode.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see setPaddingMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) PaddingMode getPaddingMode();
-
-    /**
-     *  \brief Set the multi-dimension kernel size of the deconvolution.
-     * 
-     *  If executing this layer on DLA, only support 2D kernel size, both height and width of kernel size must be in
-     *  the range [1-32].
-     * 
-     *  @see getKernelSizeNd() setKernelSize() getKernelSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
-
-    /**
-     *  \brief Get the multi-dimension kernel size of the deconvolution.
-     * 
-     *  @see setKernelSizeNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
-
-    /**
-     *  \brief Set the multi-dimension stride of the deconvolution.
-     * 
-     *  Default: (1, 1, ..., 1)
-     * 
-     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
-     *  [1-32].
-     * 
-     *  @see getStrideNd() setStride() getStride()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-
-    /**
-     *  \brief Get the multi-dimension stride of the deconvolution.
-     * 
-     *  @see setStrideNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-
-    /**
-     *  \brief Set the multi-dimension padding of the deconvolution.
-     * 
-     *  The output will be trimmed by this number of elements on both sides of every dimension.
-     *  In other words, it resembles the inverse of a convolution layer with this padding size.
-     *  Padding is symmetric, and negative padding is not supported.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, padding must be 0.
-     * 
-     *  @see getPaddingNd() setPadding() getPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the multi-dimension padding of the deconvolution.
-     * 
-     *  If the padding is asymmetric, the pre-padding is returned.
-     * 
-     *  @see setPaddingNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor
-     * 
-     *  For a IDeconvolutionLayer, only index 0 is valid unless explicit precision mode is enabled.
-     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
-     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
-     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
-     *  via appropriate set call. The indices are as follows:
-     * 
-     *  - 0: The input activation tensor.
-     *  - 1: The kernel weights tensor (a constant tensor).
-     * 
-     *  If this function is called with a value greater than 0, then the function getNbInputs() changes
-     *  */
-    
-    //!
-    //!
-    //!
-
-    /** \brief Set the multi-dimension dilation of the deconvolution.
-     * 
-     *  Default: (1, 1, ..., 1)
-     * 
-     *  @see getDilationNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
-
-    /**
-     *  \brief Get the multi-dimension dilation of the deconvolution.
-     * 
-     *  @see setDilationNd()
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
deleted file mode 100644
index d8a4f22979d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDequantizeLayer.java
+++ /dev/null
@@ -1,104 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IDequantizeLayer
- * 
- *  \brief A Dequantize layer in a network definition.
- * 
- *  This layer accepts a signed 8-bit integer input tensor, and uses the configured scale and zeroPt inputs to
- *  dequantize the input according to:
- *  \p output = (\p input - \p zeroPt) * \p scale
- * 
- *  The first input (index 0) is the tensor to be quantized.
- *  The second (index 1) and third (index 2) are the scale and zero point respectively.
- *  Each of \p scale and \p zeroPt must be either a scalar, or a 1D tensor.
- * 
- *  The \p zeroPt tensor is optional, and if not set, will be assumed to be zero.  Its data type must be
- *  DataType::kINT8. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
- *  supported.
- *  The \p scale value must be either a scalar for per-tensor quantization, or a 1D tensor for per-channel
- *  quantization. All \p scale coefficients must have positive values.  The size of the 1-D \p scale tensor must match
- *  the size of the quantization axis. The size of the \p scale must match the size of the \p zeroPt.
- * 
- *  The subgraph which terminates with the \p scale tensor must be a build-time constant.  The same restrictions apply
- *  to the \p zeroPt.
- *  The output type, if constrained, must be constrained to DataType::kINT8. The input type, if constrained, must be
- *  constrained to DataType::kFLOAT (FP16 input is not supported).
- *  The output size is the same as the input size. The quantization axis is in reference to the input tensor's
- *  dimensions.
- * 
- *  IDequantizeLayer only supports DataType::kINT8 precision and will default to this precision during instantiation.
- *  IDequantizeLayer only supports DataType::kFLOAT output.
- * 
- *  As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
- *  single scale coefficient (referred to as per-tensor quantization):
- *      For each n in N:
- *          For each c in C:
- *              For each h in H:
- *                  For each w in W:
- *                      output[n,c,h,w] = (\p input[n,c,h,w] - \p zeroPt) * \p scale
- * 
- *  Per-channel dequantization is supported only for input that is rooted at an IConstantLayer (i.e. weights).
- *  Activations cannot be quantized per-channel. As an example of per-channel operation, imagine a 4D KCRS weights input
- *  and K (dimension 0) as the quantization axis. The scale is an array of coefficients, which is the same size as the
- *  quantization axis.
- *      For each k in K:
- *          For each c in C:
- *              For each r in R:
- *                  For each s in S:
- *                      output[k,c,r,s] = (\p input[k,c,r,s] - \p zeroPt[k]) * \p scale[k]
- * 
- *  \note Only symmetric quantization is supported.
- *  \note Currently the only allowed build-time constant \p scale and \zeroPt subgraphs are:
- *  1. Constant -> Quantize
- *  2. Constant -> Cast -> Quantize
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IDequantizeLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IDequantizeLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Get the quantization axis.
-     * 
-     *  @return axis parameter set by setAxis().
-     *  The return value is the index of the quantization axis in the input tensor's dimensions.
-     *  A value of -1 indicates per-tensor quantization.
-     *  The default value is -1.
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getAxis();
-    /**
-     *  \brief Set the quantization axis.
-     * 
-     *  Set the index of the quantization axis (with reference to the input tensor's dimensions).
-     *  The axis must be a valid axis if the scale tensor has more than one coefficient.
-     *  The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
-     *  */
-    public native @NoException(true) void setAxis(int axis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
deleted file mode 100644
index 036a9c3875f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IDimensionExpr.java
+++ /dev/null
@@ -1,46 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class IDimensionExpr
- * 
- *  An IDimensionExpr represents an integer expression constructed from constants,
- *  input dimensions, and binary operations.  These expressions are can be used
- *  in overrides of IPluginV2DynamicExt::getOutputDimensions to define output
- *  dimensions in terms of input dimensions.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- * 
- *  @see DimensionOperation, IPluginV2DynamicExt::getOutputDimensions
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IDimensionExpr extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IDimensionExpr(Pointer p) { super(p); }
-
-    /** Return true if expression is a build-time constant. */
-    public native @Cast("bool") @NoException(true) boolean isConstant();
-
-    /** If isConstant(), returns value of the constant.
-     *  If !isConstant(), return std::numeric_limits<int32_t>::min(). */
-    public native @NoException(true) int getConstantValue();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
deleted file mode 100644
index 56810205721..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IElementWiseLayer.java
+++ /dev/null
@@ -1,73 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class IElementWiseLayer
- * 
- *  \brief A elementwise layer in a network definition.
- * 
- *  This layer applies a per-element binary operation between corresponding elements of two tensors.
- * 
- *  The input tensors must have the same number of dimensions. For each dimension, their lengths must
- *  match, or one of them must be one. In the latter case, the tensor is broadcast along that axis.
- * 
- *  The output tensor has the same number of dimensions as the inputs. For each output dimension,
- *  its length is equal to the lengths of the corresponding input dimensions if they match,
- *  otherwise it is equal to the length that is not one. */
-//！
-/** \warning When running this layer on the DLA with Int8 data type, the dynamic ranges of two input tensors shall be
-/** equal. If the dynamic ranges are generated using calibrator, the largest value shall be used.
-/**
-/** \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
-/** */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IElementWiseLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IElementWiseLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the binary operation for the layer.
-     * 
-     *  DLA supports only kSUM, kPROD, kMAX and kMIN.
-     * 
-     *  @see getOperation(), ElementWiseOperation
-     * 
-     *  @see getBiasWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOperation(ElementWiseOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::ElementWiseOperation") int op);
-
-    /**
-     *  \brief Get the binary operation for the layer.
-     * 
-     *  @see setOperation(), ElementWiseOperation
-     * 
-     *  @see setBiasWeights()
-     *  */
-    public native @NoException(true) ElementWiseOperation getOperation();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
deleted file mode 100644
index e9b481bc821..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IErrorRecorder.java
+++ /dev/null
@@ -1,231 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class IErrorRecorder
- * 
- *  \brief Reference counted application-implemented error reporting interface for TensorRT objects.
- * 
- *  The error reporting mechanism is a user defined object that interacts with the internal state of the object
- *  that it is assigned to in order to determine information about abnormalities in execution. The error recorder
- *  gets both an error enum that is more descriptive than pass/fail and also a string description that gives more
- *  detail on the exact failure modes. In the safety context, the error strings are all limited to 128 characters
- *  in length.
- *  The ErrorRecorder gets passed along to any class that is created from another class that has an ErrorRecorder
- *  assigned to it. For example, assigning an ErrorRecorder to an IBuilder allows all INetwork's, ILayer's, and
- *  ITensor's to use the same error recorder. For functions that have their own ErrorRecorder accessor functions.
- *  This allows registering a different error recorder or de-registering of the error recorder for that specific
- *  object.
- * 
- *  The ErrorRecorder object implementation must be thread safe if the same ErrorRecorder is passed to different
- *  interface objects being executed in parallel in different threads. All locking and synchronization is
- *  pushed to the interface implementation and TensorRT does not hold any synchronization primitives when accessing
- *  the interface functions.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IErrorRecorder extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IErrorRecorder(Pointer p) { super(p); }
-
-    /**
-     *  A typedef of a c-style string for reporting error descriptions.
-     *  */
-    
-    
-    //!
-    //!
-
-    /**
-     *  The length limit for an error description, excluding the '\0' string terminator.
-     *  */
-    
-    
-    //!
-    //!
-    @MemberGetter public static native @Cast("const size_t") long kMAX_DESC_LENGTH();
-    public static final long kMAX_DESC_LENGTH = kMAX_DESC_LENGTH();
-
-    /**
-     *  A typedef of a 32bit integer for reference counting.
-     *  */
-
-    // Public API used to retrieve information from the error recorder.
-
-    /**
-     *  \brief Return the number of errors
-     * 
-     *  Determines the number of errors that occurred between the current point in execution
-     *  and the last time that the clear() was executed. Due to the possibility of asynchronous
-     *  errors occuring, a TensorRT API can return correct results, but still register errors
-     *  with the Error Recorder. The value of getNbErrors must monotonically increases until clear()
-     *  is called.
-     * 
-     *  @return Returns the number of errors detected, or 0 if there are no errors.
-     * 
-     *  @see clear
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbErrors();
-
-    /**
-     *  \brief Returns the ErrorCode enumeration.
-     * 
-     *  @param errorIdx A 32bit integer that indexes into the error array.
-     * 
-     *  The errorIdx specifies what error code from 0 to getNbErrors()-1 that the application
-     *  wants to analyze and return the error code enum.
-     * 
-     *  @return Returns the enum corresponding to errorIdx.
-     * 
-     *  @see getErrorDesc, ErrorCode
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ErrorCode getErrorCode(int errorIdx);
-
-    /**
-     *  \brief Returns the c-style string description of the error.
-     * 
-     *  @param errorIdx A 32bit integer that indexes into the error array.
-     * 
-     *  For the error specified by the idx value, return the string description of the error. The
-     *  error string is a c-style string that is zero delimited. In the safety context there is a
-     *  constant length requirement to remove any dynamic memory allocations and the error message
-     *  may be truncated. The format of the string is "<EnumAsStr> - <Description>".
-     * 
-     *  @return Returns a string representation of the error along with a description of the error.
-     * 
-     *  @see getErrorCode
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getErrorDesc(int errorIdx);
-
-    /**
-     *  \brief Determine if the error stack has overflowed.
-     * 
-     *  In the case when the number of errors is large, this function is used to query if one or more
-     *  errors have been dropped due to lack of storage capacity. This is especially important in the
-     *  automotive safety case where the internal error handling mechanisms cannot allocate memory.
-     * 
-     *  @return true if errors have been dropped due to overflowing the error stack.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean hasOverflowed();
-
-    /**
-     *  \brief Clear the error stack on the error recorder.
-     * 
-     *  Removes all the tracked errors by the error recorder.  This function must guarantee that after
-     *  this function is called, and as long as no error occurs, the next call to getNbErrors will return
-     *  zero.
-     * 
-     *  @see getNbErrors
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void clear();
-
-    // API used by TensorRT to report Error information to the application.
-
-    /**
-     *  \brief Report an error to the error recorder with the corresponding enum and description.
-     * 
-     *  @param val The error code enum that is being reported.
-     *  @param desc The string description of the error.
-     * 
-     *  Report an error to the user that has a given value and human readable description. The function returns false
-     *  if processing can continue, which implies that the reported error is not fatal. This does not guarantee that
-     *  processing continues, but provides a hint to TensorRT.
-     * 
-     *  @return True if the error is determined to be fatal and processing of the current function must end.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean reportError(ErrorCode val, String desc);
-    public native @Cast("bool") @NoException(true) boolean reportError(@Cast("nvinfer1::ErrorCode") int val, @Cast("const char*") BytePointer desc);
-
-    /**
-     *  \brief Increments the refcount for the current ErrorRecorder.
-     * 
-     *  Increments the reference count for the object by one and returns the current value.
-     *  This reference count allows the application to know that an object inside of TensorRT has
-     *  taken a reference to the ErrorRecorder. If the ErrorRecorder is released before the
-     *  reference count hits zero, then behavior in TensorRT is undefined. It is strongly recommended
-     *  that the increment is an atomic operation. TensorRT guarantees that each incRefCount called on
-     *  an objects construction is paired with a decRefCount call when an object is destructed.
-     * 
-     *  @return The current reference counted value.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("nvinfer1::IErrorRecorder::RefCount") @NoException(true) int incRefCount();
-
-    /**
-     *  \brief Decrements the refcount for the current ErrorRecorder.
-     * 
-     *  Decrements the reference count for the object by one and returns the current value. It is undefined behavior
-     *  to call decRefCount when RefCount is zero. If the ErrorRecorder is destroyed before the reference count
-     *  hits zero, then behavior in TensorRT is undefined. It is strongly recommended that the decrement is an
-     *  atomic operation. TensorRT guarantees that each decRefCount called when an object is destructed is
-     *  paired with a incRefCount call when that object was constructed.
-     * 
-     *  @return The current reference counted value.
-     *  */
-    public native @Cast("nvinfer1::IErrorRecorder::RefCount") @NoException(true) int decRefCount();
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
deleted file mode 100644
index 873202f0c03..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExecutionContext.java
+++ /dev/null
@@ -1,654 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IExecutionContext
- * 
- *  \brief Context for executing inference using an engine, with functionally unsafe features.
- * 
- *  Multiple execution contexts may exist for one ICudaEngine instance, allowing the same
- *  engine to be used for the execution of multiple batches simultaneously. If the engine supports
- *  dynamic shapes, each execution context in concurrent use must use a separate optimization profile.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IExecutionContext extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IExecutionContext() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IExecutionContext(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IExecutionContext(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IExecutionContext position(long position) {
-        return (IExecutionContext)super.position(position);
-    }
-    @Override public IExecutionContext getPointer(long i) {
-        return new IExecutionContext((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Synchronously execute inference on a batch.
-     * 
-     *  This method requires an array of input and output buffers. The mapping from tensor names to indices
-     *  can be queried using ICudaEngine::getBindingIndex()
-     * 
-     *  @param batchSize The batch size. This is at most the value supplied when the engine was built.
-     *  @param bindings An array of pointers to input and output buffers for the network.
-     * 
-     *  @return True if execution succeeded.
-     * 
-     *  \warning This function will trigger layer resource updates if hasImplicitBatchDimension()
-     *           returns true and batchSize changes between subsequent calls, possibly resulting
-     *           in performance bottlenecks.
-     * 
-     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") PointerPointer bindings);
-    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings);
-
-    /**
-     *  \brief Asynchronously execute inference on a batch.
-     * 
-     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
-     *  queried using ICudaEngine::getBindingIndex() @param batchSize The batch size. This is at most the value supplied
-     *  when the engine was built.
-     * 
-     *  @param bindings An array of pointers to input and output buffers for the network.
-     *  @param stream A cuda stream on which the inference kernels will be enqueued.
-     *  @param inputConsumed An optional event which will be signaled when the input buffers can be refilled with new
-     *  data.
-     * 
-     *  @return True if the kernels were enqueued successfully.
-     * 
-     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
-     * 
-     *  \warning Calling enqueue() in from the same IExecutionContext object with different CUDA streams concurrently
-     *           results in undefined behavior. To perform inference concurrently in multiple streams, use one execution
-     *           context per stream.
-     * 
-     *  \warning This function will trigger layer resource updates if hasImplicitBatchDimension()
-     *           returns true and batchSize changes between subsequent calls, possibly resulting in performance
-     *           bottlenecks.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean enqueue(int batchSize, @Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @Cast("bool") @NoException(true) boolean enqueue(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-
-    /**
-     *  \brief Set the debug sync flag.
-     * 
-     *  If this flag is set to true, the engine will log the successful execution for each kernel during execute(). It
-     *  has no effect when using enqueue().
-     * 
-     *  @see getDebugSync()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDebugSync(@Cast("bool") boolean sync);
-
-    /**
-     *  \brief Get the debug sync flag.
-     * 
-     *  @see setDebugSync()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getDebugSync();
-
-    /**
-     *  \brief Set the profiler.
-     * 
-     *  @see IProfiler getProfiler()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setProfiler(IProfiler profiler);
-
-    /**
-     *  \brief Get the profiler.
-     * 
-     *  @see IProfiler setProfiler()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) IProfiler getProfiler();
-
-    /**
-     *  \brief Get the associated engine.
-     * 
-     *  @see ICudaEngine
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Const @ByRef @NoException(true) ICudaEngine getEngine();
-
-    /**
-     *  \brief Destroy this object.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Set the name of the execution context.
-     * 
-     *  This method copies the name string.
-     * 
-     *  @see getName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Return the name of the execution context.
-     * 
-     *  @see setName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Set the device memory for use by this execution context.
-     * 
-     *  The memory must be aligned with cuda memory alignment property (using cudaGetDeviceProperties()), and its size
-     *  must be at least that returned by getDeviceMemorySize(). Setting memory to nullptr is acceptable if
-     *  getDeviceMemorySize() returns 0. If using enqueue() to run the network, the memory is in use from the invocation
-     *  of enqueue() until network execution is complete. If using execute(), it is in use until execute() returns.
-     *  Releasing or otherwise using the memory for other purposes during this time will result in undefined behavior.
-     * 
-     *  @see ICudaEngine::getDeviceMemorySize() ICudaEngine::createExecutionContextWithoutDeviceMemory()
-     *  */
-
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setDeviceMemory(Pointer memory);
-
-    /**
-     *  \brief Return the strides of the buffer for the given binding.
-     * 
-     *  The strides are in units of elements, not components or bytes.
-     *  For example, for TensorFormat::kHWC8, a stride of one spans 8 scalars.
-     * 
-     *  Note that strides can be different for different execution contexts
-     *  with dynamic shapes.
-     * 
-     *  If the bindingIndex is invalid or there are dynamic dimensions that have not been
-     *  set yet, returns Dims with Dims::nbDims = -1.
-     * 
-     *  @param bindingIndex The binding index.
-     * 
-     *  @see getBindingComponentsPerElement
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides(int bindingIndex);
-    /**
-     *  \brief Select an optimization profile for the current context.
-     * 
-     *  @param profileIndex Index of the profile. It must lie between 0 and
-     *         getEngine().getNbOptimizationProfiles() - 1
-     * 
-     *  The selected profile will be used in subsequent calls to execute() or enqueue().
-     * 
-     *  When an optimization profile is switched via this API, TensorRT may
-     *  enqueue GPU memory copy operations required to set up the new profile during the subsequent enqueue()
-     *  operations. To avoid these calls during enqueue(), use setOptimizationProfileAsync() instead.
-     * 
-     *  If the associated CUDA engine has dynamic inputs, this method must be called at least once
-     *  with a unique profileIndex before calling execute or enqueue (i.e. the profile index
-     *  may not be in use by another execution context that has not been destroyed yet).
-     *  For the first execution context that is created for an engine, setOptimizationProfile(0)
-     *  is called implicitly.
-     * 
-     *  If the associated CUDA engine does not have inputs with dynamic shapes, this method need not be
-     *  called, in which case the default profile index of 0 will be used (this is particularly
-     *  the case for all safe engines).
-     * 
-     *  setOptimizationProfile() must be called before calling setBindingDimensions() and
-     *  setInputShapeBinding() for all dynamic input tensors or input shape tensors, which in
-     *  turn must be called before either execute() or enqueue().
-     * 
-     *  \warning This function will trigger layer resource updates on the next
-     *           call of enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks.
-     * 
-     *  @return true if the call succeeded, else false (e.g. input out of range)
-     * 
-     *  @deprecated This API is superseded by setOptimizationProfileAsync and will be removed in TensorRT 9.0.
-     * 
-     *  @see ICudaEngine::getNbOptimizationProfiles() IExecutionContext::setOptimizationProfileAsync()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @Deprecated @NoException(true) boolean setOptimizationProfile(int profileIndex);
-
-    /**
-     *  \brief Get the index of the currently selected optimization profile.
-     * 
-     *  If the profile index has not been set yet (implicitly to 0 for the first execution context
-     *  to be created, or explicitly for all subsequent contexts), an invalid value of -1 will be returned
-     *  and all calls to enqueue() or execute() will fail until a valid profile index has been set.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getOptimizationProfile();
-
-    /**
-     *  \brief Set the dynamic dimensions of a binding
-     * 
-     *  @param bindingIndex index of an input tensor whose dimensions must be compatible with
-     *         the network definition (i.e. only the wildcard dimension -1 can be replaced with a
-     *         new dimension >= 0).
-     * 
-     *  @param dimensions specifies the dimensions of the input tensor. It must be in the valid
-     *         range for the currently selected optimization profile, and the corresponding engine must
-     *         not be safety-certified.
-     * 
-     *  This method requires the engine to be built without an implicit batch dimension.
-     *  This method will fail unless a valid optimization profile is defined for the current
-     *  execution context (getOptimizationProfile() must not be -1).
-     * 
-     *  For all dynamic non-output bindings (which have at least one wildcard dimension of -1),
-     *  this method needs to be called before either enqueue() or execute() may be called.
-     *  This can be checked using the method allInputDimensionsSpecified().
-     * 
-     *  \warning This function will trigger layer resource updates on the next
-     *           call of enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks,
-     *           if the dimensions are different than the previous set dimensions.
-     * 
-     *  @return false if an error occurs (e.g. bindingIndex is out of range for the currently selected
-     *          optimization profile or binding dimension is inconsistent with min-max range of the
-     *          optimization profile), else true. Note that the network can still be invalid for certain
-     *          combinations of input shapes that lead to invalid output shapes. To confirm the correctness
-     *          of the network input shapes, check whether the output binding has valid
-     *          dimensions using getBindingDimensions() on the output bindingIndex.
-     * 
-     *  @see ICudaEngine::getBindingIndex
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setBindingDimensions(int bindingIndex, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the dynamic dimensions of a binding
-     * 
-     *  If the engine was built with an implicit batch dimension, same as ICudaEngine::getBindingDimensions.
-     * 
-     *  If setBindingDimensions() has been called on this binding (or if there are no
-     *  dynamic dimensions), all dimensions will be positive. Otherwise, it is necessary to
-     *  call setBindingDimensions() before enqueue() or execute() may be called.
-     * 
-     *  If the bindingIndex is out of range, an invalid Dims with nbDims == -1 is returned.
-     *  The same invalid Dims will be returned if the engine was not built with an implicit
-     *  batch dimension and if the execution context is not currently associated with a valid
-     *  optimization profile (i.e. if getOptimizationProfile() returns -1).
-     * 
-     *  If ICudaEngine::bindingIsInput(bindingIndex) is false, then both
-     *  allInputDimensionsSpecified() and allInputShapesSpecified() must be true
-     *  before calling this method.
-     * 
-     *  @return Currently selected binding dimensions
-     * 
-     *  For backwards compatibility with earlier versions of TensorRT, a bindingIndex that does not belong
-     *  to the current profile is corrected as described for ICudaEngine::getProfileDimensions.
-     * 
-     *  @see ICudaEngine::getProfileDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
-
-    /**
-     *  \brief Set values of input tensor required by shape calculations.
-     * 
-     *  @param bindingIndex index of an input tensor for which
-     *         ICudaEngine::isShapeBinding(bindingIndex) and ICudaEngine::bindingIsInput(bindingIndex)
-     *         are both true.
-     * 
-     *  @param data pointer to values of the input tensor.  The number of values should be
-     *          the product of the dimensions returned by getBindingDimensions(bindingIndex).
-     * 
-     *  If ICudaEngine::isShapeBinding(bindingIndex) and ICudaEngine::bindingIsInput(bindingIndex)
-     *  are both true, this method must be called before enqueue() or execute() may be called.
-     *  This method will fail unless a valid optimization profile is defined for the current
-     *  execution context (getOptimizationProfile() must not be -1).
-     * 
-     *  \warning This function will trigger layer resource updates on the next call of
-     *           enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks, if the
-     *           shapes are different than the previous set shapes.
-     * 
-     *  @return false if an error occurs (e.g. bindingIndex is out of range for the currently selected
-     *          optimization profile or shape data is inconsistent with min-max range of the
-     *          optimization profile), else true. Note that the network can still be invalid for certain
-     *          combinations of input shapes that lead to invalid output shapes. To confirm the correctness
-     *          of the network input shapes, check whether the output binding has valid
-     *          dimensions using getBindingDimensions() on the output bindingIndex. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntPointer data);
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntBuffer data);
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const int[] data);
-
-    /**
-     *  \brief Get values of an input tensor required for shape calculations or an output tensor produced by shape
-     *  calculations.
-     * 
-     *  @param bindingIndex index of an input or output tensor for which
-     *         ICudaEngine::isShapeBinding(bindingIndex) is true.
-     * 
-     *  @param data pointer to where values will be written.  The number of values written is
-     *         the product of the dimensions returned by getBindingDimensions(bindingIndex).
-     * 
-     *  If ICudaEngine::bindingIsInput(bindingIndex) is false, then both
-     *  allInputDimensionsSpecified() and allInputShapesSpecified() must be true
-     *  before calling this method. The method will also fail if no valid optimization profile
-     *  has been set for the current execution context, i.e. if getOptimizationProfile() returns -1.
-     * 
-     *  @see isShapeBinding(bindingIndex)
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntPointer data);
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntBuffer data);
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, int[] data);
-
-    /**
-     *  \brief Whether all dynamic dimensions of input tensors have been specified
-     * 
-     *  @return True if all dynamic dimensions of input tensors have been specified
-     *          by calling setBindingDimensions().
-     * 
-     *  Trivially true if network has no dynamically shaped input tensors.
-     * 
-     *  @see setBindingDimensions(bindingIndex,dimensions)
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean allInputDimensionsSpecified();
-
-    /**
-     *  \brief Whether all input shape bindings have been specified
-     * 
-     *  @return True if all input shape bindings have been specified by setInputShapeBinding().
-     * 
-     *  Trivially true if network has no input shape bindings.
-     * 
-     *  @see isShapeBinding(bindingIndex)
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean allInputShapesSpecified();
-
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief Get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
-     *  an error handler has not been set.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Synchronously execute inference a network.
-     * 
-     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
-     *  queried using ICudaEngine::getBindingIndex().
-     *  This method only works for execution contexts built with full dimension networks.
-     *  @param bindings An array of pointers to input and output buffers for the network.
-     * 
-     *  @return True if execution succeeded.
-     * 
-     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") PointerPointer bindings);
-    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") @ByPtrPtr Pointer bindings);
-
-    /**
-     *  \brief Asynchronously execute inference.
-     * 
-     *  This method requires an array of input and output buffers. The mapping from tensor names to indices can be
-     *  queried using ICudaEngine::getBindingIndex().
-     *  This method only works for execution contexts built with full dimension networks.
-     *  @param bindings An array of pointers to input and output buffers for the network.
-     *  @param stream A cuda stream on which the inference kernels will be enqueued
-     *  @param inputConsumed An optional event which will be signaled when the input buffers can be refilled with new
-     *  data
-     * 
-     *  @return True if the kernels were enqueued successfully.
-     * 
-     *  @see ICudaEngine::getBindingIndex() ICudaEngine::getMaxBatchSize()
-     * 
-     *  \note Calling enqueueV2() with a stream in CUDA graph capture mode has a known issue. If dynamic shapes are
-     *        used, the first enqueueV2() call after a setInputShapeBinding() call will cause failure in stream capture
-     *        due to resource allocation. Please call enqueueV2() once before capturing the graph.
-     * 
-     *  \warning Calling enqueueV2() in from the same IExecutionContext object with different CUDA streams concurrently
-     *           results in undefined behavior. To perform inference concurrently in multiple streams, use one execution
-     *           context per stream.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-
-    /**
-     *  \brief Select an optimization profile for the current context with async
-     *  semantics.
-     * 
-     *  @param profileIndex Index of the profile. The value must lie between 0 and
-     *         getEngine().getNbOptimizationProfiles() - 1
-     * 
-     *  @param stream A cuda stream on which the cudaMemcpyAsyncs may be
-     *  enqueued
-     * 
-     *  When an optimization profile is switched via this API, TensorRT may
-     *  require that data is copied via cudaMemcpyAsync. It is the
-     *  application’s responsibility to guarantee that synchronization between
-     *  the profile sync stream and the enqueue stream occurs.
-     * 
-     *  The selected profile will be used in subsequent calls to execute() or
-     *  enqueue().
-     *  If the associated CUDA engine has inputs with dynamic shapes, the
-     *  optimization profile must be set with a unique profileIndex before
-     *  calling execute or enqueue.
-     *  For the first execution context that is created for an engine,
-     *  setOptimizationProfile(0) is called implicitly.
-     * 
-     *  If the associated CUDA engine does not have inputs with dynamic shapes,
-     *  this method need not be called, in which case the default profile index
-     *  of 0 will be used.
-     * 
-     *  setOptimizationProfileAsync() must be called before calling
-     *  setBindingDimensions() and setInputShapeBinding() for all dynamic input
-     *  tensors or input shape tensors, which in turn must be called before
-     *  either execute() or enqueue().
-     * 
-     *  \warning This function will trigger layer resource updates on the next call of
-     *           enqueue[V2]()/execute[V2](), possibly resulting in performance bottlenecks.
-     * 
-     *  \warning Not synchronizing the stream used at enqueue with the stream
-     *  used to set optimization profile asynchronously using this API will
-     *  result in undefined behavior.
-     * 
-     *  @return true if the call succeeded, else false (e.g. input out of range)
-     * 
-     *  @see ICudaEngine::getNbOptimizationProfiles()
-     *  @see IExecutionContext::setOptimizationProfile() */
-    public native @Cast("bool") @NoException(true) boolean setOptimizationProfileAsync(int profileIndex, CUstream_st stream);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
deleted file mode 100644
index b9095e8392d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IExprBuilder.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IExprBuilder
- * 
- *  Object for constructing IDimensionExpr.
- * 
- *  There is no public way to construct an IExprBuilder.  It appears as an argument to
- *  method IPluginV2DynamicExt::getOutputDimensions().  Overrides of that method can use
- *  that IExprBuilder argument to construct expressions that define output dimensions
- *  in terms of input dimensions.
- * 
- *  Clients should assume that any values constructed by the IExprBuilder are destroyed
- *  after IPluginV2DynamicExt::getOutputDimensions() returns.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- * 
- *  @see IDimensionExpr
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IExprBuilder extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IExprBuilder(Pointer p) { super(p); }
-
-    /** Return pointer to IDimensionExp for given value. */
-    public native @Const @NoException(true) IDimensionExpr constant(int value);
-
-    /** Return pointer to IDimensionExp that represents the given operation applied to first and second.
-     *  Returns nullptr if op is not a valid DimensionOperation. */
-    public native @Const @NoException(true) IDimensionExpr operation(
-            DimensionOperation op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
-    public native @Const @NoException(true) IDimensionExpr operation(
-            @Cast("nvinfer1::DimensionOperation") int op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
deleted file mode 100644
index 7dac15c60b4..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFillLayer.java
+++ /dev/null
@@ -1,229 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \brief Generate an output tensor with specified mode.
- * 
- *  The fill layer has two variants, static and dynamic. Static fill specifies its parameters
- *  at layer creation time via Dims and the get/set accessor functions of the IFillLayer.
- *  Dynamic fill specifies one or more of its parameters as ITensors, by using ILayer::setTensor to add
- *  a corresponding input.  The corresponding static parameter is used if an input is missing or null.
- * 
- *  The shape of the output is specified by the parameter \p Dimension, or if non-null and present,
- *  the first input, which must be a 1D Int32 shape tensor.  Thus an application can determine if the
- *  IFillLayer has a dynamic output shape based on whether it has a non-null first input.
- * 
- *  Alpha and Beta are treated differently based on the Fill Operation specified. See details in
- *  IFillLayer::setAlpha(), IFillLayer::setBeta(), and IFillLayer::setInput().
- * 
- *  A fill layer can produce a shape tensor if the following restrictions are met:
- * 
- *  * The FillOperation is kLINSPACE.
- *  * The output is a 1D Int32 tensor with length not exceeding 2*Dims::MAX_DIMS.
- *  * There is at most one input, and if so, that input is input 0.
- *  * If input 0 exists, the length of the output tensor must be computable by constant folding.
- * 
- *  @see FillOperation
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IFillLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IFillLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the output tensor's dimensions.
-     * 
-     *  @param dimensions The output tensor's dimensions.
-     * 
-     *  If the first input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getDimensions */
-    //
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the output tensor's dimensions.
-     * 
-     *  @return The output tensor's dimensions, or an invalid Dims structure.
-     * 
-     *  If the first input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-
-    /**
-     *  \brief Set the fill operation for the layer.
-     * 
-     *  @see getOperation(), FillOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOperation(FillOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::FillOperation") int op);
-
-    /**
-     *  \brief Get the fill operation for the layer.
-     * 
-     *  @see setOperation(), FillOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) FillOperation getOperation();
-
-    /**
-     *  \brief Set the alpha parameter.
-     * 
-     *  @param alpha has different meanings for each operator:
-     * 
-     *  Operation          | Usage
-     *  kLINSPACE          | the start value;
-     *  kRANDOMUNIFORM     | the minimum value;
-     * 
-     *  If a second input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getAlpha */
-    //
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAlpha(double alpha);
-
-    /**
-     *  \brief Get the value of alpha parameter.
-     * 
-     *  @return A double value of alpha.
-     * 
-     *  If the second input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setAlpha
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) double getAlpha();
-
-    /**
-     *  \brief Set the beta parameter.
-     * 
-     *  @param beta has different meanings for each operator:
-     * 
-     *  Operation          | Usage
-     *  kLINSPACE          | the delta value;
-     *  kRANDOMUNIFORM     | the maximal value;
-     * 
-     *  If a third input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getBeta
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBeta(double beta);
-
-    /**
-     *  \brief Get the value of beta parameter.
-     * 
-     *  @return A double value of beta.
-     * 
-     *  If the third input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setBeta
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) double getBeta();
-
-    /**
-     *  \brief replace an input of this layer with a specific tensor.
-     * 
-     *  @param index the index of the input to set.
-     *  @param tensor the new input tensor
-     * 
-     *  Indices for kLINSPACE are described as:
-     * 
-     *  - 0: Shape tensor, represents the output tensor's dimensions.
-     *  - 1: Start, a scalar, represents the start value.
-     *  - 2: Delta, a 1D tensor, length equals to shape tensor's nbDims, represents the delta value for each dimension.
-     * 
-     *  Indices for kRANDOM_UNIFORM are described as:
-     * 
-     *  - 0: Shape tensor, represents the output tensor's dimensions.
-     *  - 1: Minimum, a scalar, represents the minimum random value.
-     *  - 2: Maximum, a scalar, represents the maximal random value.
-     * 
-     *  Using the corresponding setter resets the input to null.
-     * 
-     *  If either inputs 1 or 2, is non-null, then both must be non-null and have the same data type.
-     * 
-     *  If this function is called for an index greater or equal to getNbInputs(),
-     *  then afterwards getNbInputs() returns index + 1, and any missing intervening
-     *  inputs are set to null.
-     *  */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
deleted file mode 100644
index 06c60520e83..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IFullyConnectedLayer.java
+++ /dev/null
@@ -1,152 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IFullyConnectedLayer
- * 
- *  \brief A fully connected layer in a network definition.
- *  This layer expects an input tensor of three or more non-batch dimensions.  The input is automatically
- *  reshaped into an {@code MxV} tensor {@code X}, where {@code V} is a product of the last three dimensions and {@code M}
- *  is a product of the remaining dimensions (where the product over 0 dimensions is defined as 1).  For example:
- * 
- *  - If the input tensor has shape {@code {C, H, W}}, then the tensor is reshaped into {@code {1, C*H*W}}.
- *  - If the input tensor has shape {@code {P, C, H, W}}, then the tensor is reshaped into {@code {P, C*H*W}}.
- * 
- *  The layer then performs the following operation:
- * 
- *  ~~~
- *  Y := matmul(X, W^T) + bias
- *  ~~~
- * 
- *  Where {@code X} is the {@code MxV} tensor defined above, {@code W} is the {@code KxV} weight tensor
- *  of the layer, and {@code bias} is a row vector size {@code K} that is broadcasted to
- *  {@code MxK}.  {@code K} is the number of output channels, and configurable via
- *  setNbOutputChannels().  If {@code bias} is not specified, it is implicitly {@code 0}.
- * 
- *  The {@code MxK} result {@code Y} is then reshaped such that the last three dimensions are {@code {K, 1, 1}} and
- *  the remaining dimensions match the dimensions of the input tensor. For example:
- * 
- *  - If the input tensor has shape {@code {C, H, W}}, then the output tensor will have shape {@code {K, 1, 1}}.
- *  - If the input tensor has shape {@code {P, C, H, W}}, then the output tensor will have shape {@code {P, K, 1, 1}}.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IFullyConnectedLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IFullyConnectedLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the number of output channels {@code K} from the fully connected layer.
-     * 
-     *  If executing this layer on DLA, number of output channels must in the range [1,8192].
-     * 
-     *  @see getNbOutputChannels()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbOutputChannels(int nbOutputs);
-
-    /**
-     *  \brief Get the number of output channels {@code K} from the fully connected layer.
-     * 
-     *  @see setNbOutputChannels()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputChannels();
-
-    /**
-     *  \brief Set the kernel weights, given as a {@code KxC} matrix in row-major order.
-     * 
-     *  @see getKernelWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the kernel weights.
-     * 
-     *  @see setKernelWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-
-    /**
-     *  \brief Set the bias weights.
-     * 
-     *  Bias is optional. To omit bias, set the count value in the weights structure to zero.
-     * 
-     *  @see getBiasWeightsWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-
-    /**
-     *  \brief Get the bias weights.
-     * 
-     *  @see setBiasWeightsWeights()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor
-     * 
-     *  For a IFullyConnectedLayer, only index 0 is valid unless explicit precision mode is enabled.
-     *  With explicit precision mode, values 0-1 are valid where value 1 overrides kernel weights.
-     *  Kernel weights tensor (computed at build-time) must be an output of dequantize scale layer (i.e. a scale layer
-     *  with int8 input and float output) in explicit precision network. Conversely, this input tensor can be overridden
-     *  via appropriate set call. The indices are as follows:
-     * 
-     *  - 0: The input activation tensor.
-     *  - 1: The kernel weights tensor (a constant tensor).
-     * 
-     *  If this function is called with a value greater than 0, then the function getNbInputs() changes */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
deleted file mode 100644
index 4bab3e1c1ba..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGatherLayer.java
+++ /dev/null
@@ -1,76 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IGatherLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IGatherLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the axis to gather on.
-     *   The axis must be less than the number of dimensions in the data input.
-     * 
-     *  @see getGatherAxis()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setGatherAxis(int axis);
-
-    /**
-     *  \brief Get the axis to gather on.
-     * 
-     *  @see setGatherAxis()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getGatherAxis();
-
-    /**
-     *  \brief Set the number of leading dimensions of indices tensor to be handled elementwise.
-     *  k must be 0 if there is an implicit batch dimension.  It can be 0 or 1 if there is not an implicit batch
-     *  dimension.
-     * 
-     *  @see getNbElementWiseDims()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNbElementWiseDims(int k);
-
-    /**
-     *  \brief Get the number of leading dimensions of indices tensor to be handled elementwise.
-     * 
-     *  @see setNbElementWiseDims()
-     *  */
-    public native @NoException(true) int getNbElementWiseDims();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
deleted file mode 100644
index 8881309101f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IGpuAllocator.java
+++ /dev/null
@@ -1,110 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IGpuAllocator
- * 
- *  \brief Application-implemented class for controlling allocation on the GPU.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IGpuAllocator extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IGpuAllocator(Pointer p) { super(p); }
-
-    /**
-     *  A thread-safe callback implemented by the application to handle acquisition of GPU memory.
-     * 
-     *  @param size The size of the memory required.
-     *  @param alignment The required alignment of memory. Alignment will be zero
-     *         or a power of 2 not exceeding the alignment guaranteed by cudaMalloc.
-     *         Thus this allocator can be safely implemented with cudaMalloc/cudaFree.
-     *         An alignment value of zero indicates any alignment is acceptable.
-     *  @param flags Reserved for future use. In the current release, 0 will be passed.
-     * 
-     *  If an allocation request of size 0 is made, nullptr should be returned.
-     * 
-     *  If an allocation request cannot be satisfied, nullptr should be returned.
-     * 
-     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
-     *  requests.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Name("allocate") @NoException(true) Pointer _allocate(@Cast("const uint64_t") long size, @Cast("const uint64_t") long alignment, @Cast("const nvinfer1::AllocatorFlags") int flags);
-
-    /**
-     *  A thread-safe callback implemented by the application to handle release of GPU memory.
-     * 
-     *  TensorRT may pass a nullptr to this function if it was previously returned by allocate().
-     * 
-     *  @param memory The acquired memory.
-     * 
-     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
-     *  requests.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Name("free") @NoException(true) void _free(Pointer memory);
-
-    /**
-     *  Destructor declared virtual as general good practice for a class with virtual methods.
-     *  TensorRT never calls the destructor for an IGpuAllocator defined by the application.
-     *  */
-
-    /**
-     *  A thread-safe callback implemented by the application to resize an existing allocation.
-     * 
-     *  Only allocations which were allocated with AllocatorFlag::kRESIZABLE will be resized.
-     * 
-     *  Options are one of:
-     *  * resize in place leaving min(oldSize, newSize) bytes unchanged and return the original address
-     *  * move min(oldSize, newSize) bytes to a new location of sufficient size and return its address
-     *  * return nullptr, to indicate that the request could not be fulfilled.
-     * 
-     *  If nullptr is returned, TensorRT will assume that resize() is not implemented, and that the
-     *  allocation at baseAddr is still valid.
-     * 
-     *  This method is made available for use cases where delegating the resize
-     *  strategy to the application provides an opportunity to improve memory management.
-     *  One possible implementation is to allocate a large virtual device buffer and
-     *  progressively commit physical memory with cuMemMap. CU_MEM_ALLOC_GRANULARITY_RECOMMENDED
-     *  is suggested in this case.
-     * 
-     *  TensorRT may call realloc to increase the buffer by relatively small amounts.
-     * 
-     *  @param baseAddr the address of the original allocation.
-     *  @param alignment The alignment used by the original allocation.
-     *  @param newSize The new memory size required.
-     *  @return the address of the reallocated memory
-     * 
-     *  \note The implementation must guarantee thread safety for concurrent allocate/free/reallocate
-     *  requests.
-     *  */
-    public native @NoException(true) Pointer reallocate(Pointer baseAddr, @Cast("uint64_t") long alignment, @Cast("uint64_t") long newSize);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
deleted file mode 100644
index 309f48751ec..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IHostMemory.java
+++ /dev/null
@@ -1,72 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IHostMemory
- * 
- *  \brief Class to handle library allocated memory that is accessible to the user.
- * 
- *  The memory allocated via the host memory object is owned by the library and will
- *  be de-allocated when the destroy method is called.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IHostMemory extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IHostMemory() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IHostMemory(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IHostMemory(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IHostMemory position(long position) {
-        return (IHostMemory)super.position(position);
-    }
-    @Override public IHostMemory getPointer(long i) {
-        return new IHostMemory((Pointer)this).offsetAddress(i);
-    }
-
-
-    /** A pointer to the raw data that is owned by the library. */
-    public native @NoException(true) Pointer data();
-
-    /** The size in bytes of the data that was allocated. */
-    public native @Cast("std::size_t") @NoException(true) long size();
-
-    /** The type of the memory that was allocated. */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType type();
-    /**
-     *  Destroy the allocated memory.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    public native @Deprecated @NoException(true) void destroy();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
deleted file mode 100644
index f6e78ae79e0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIdentityLayer.java
+++ /dev/null
@@ -1,39 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IIdentityLayer
- * 
- *  \brief A layer that represents the identity function.
- * 
- *  If tensor precision is being explicitly specified, it can be used to convert from one precision to another.
- *  Other than conversion between the same precision (kFLOAT -> kFLOAT for example), the only valid
- *  tranformations supported are: (kHALF -> kINT32), (kHALF -> kFLOAT), (kFLOAT -> kINT32), (kINT32 -> kHALF),
- *  (kINT32 -> kFLOAT), (kBOOL -> kBOOL), (kBOOL -> kHALF), (kBOOL -> kFLOAT).
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IIdentityLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IIdentityLayer(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
deleted file mode 100644
index ed922916d2d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8Calibrator.java
+++ /dev/null
@@ -1,130 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IInt8Calibrator
- * 
- *  \brief Application-implemented interface for calibration.
- * 
- *  Calibration is a step performed by the builder when deciding suitable scale factors for 8-bit inference.
- * 
- *  It must also provide a method for retrieving representative images which the calibration process can use to examine
- *  the distribution of activations. It may optionally implement a method for caching the calibration result for reuse
- *  on subsequent runs.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IInt8Calibrator extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IInt8Calibrator() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IInt8Calibrator(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IInt8Calibrator(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IInt8Calibrator position(long position) {
-        return (IInt8Calibrator)super.position(position);
-    }
-    @Override public IInt8Calibrator getPointer(long i) {
-        return new IInt8Calibrator((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Get the batch size used for calibration batches.
-     * 
-     *  @return The batch size.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    @Virtual(true) public native @NoException(true) @Const({false, false, true}) int getBatchSize();
-
-    /**
-     *  \brief Get a batch of input for calibration.
-     * 
-     *  The batch size of the input must match the batch size returned by getBatchSize().
-     * 
-     *  @param bindings An array of pointers to device memory that must be updated to point to device memory
-     *  containing each network input data.
-     *  @param names The names of the network input for each pointer in the binding array.
-     *  @param nbBindings The number of pointers in the bindings array.
-     *  @return False if there are no more batches for calibration.
-     * 
-     *  @see getBatchSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    @Virtual(true) public native @Cast("bool") @NoException(true) boolean getBatch(@Cast("void**") PointerPointer bindings, @Cast("const char**") PointerPointer names, int nbBindings);
-
-    /**
-     *  \brief Load a calibration cache.
-     * 
-     *  Calibration is potentially expensive, so it can be useful to generate the calibration data once, then use it on
-     *  subsequent builds of the network. The cache includes the regression cutoff and quantile values used to generate
-     *  it, and will not be used if these do not batch the settings of the current calibrator. However, the network
-     *  should also be recalibrated if its structure changes, or the input data set changes, and it is the
-     *  responsibility of the application to ensure this.
-     * 
-     *  @param length The length of the cached data, that should be set by the called function. If there is no data,
-     *  this should be zero.
-     * 
-     *  @return A pointer to the cache, or nullptr if there is no data.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    @Virtual(true) public native @Const @NoException(true) Pointer readCalibrationCache(@Cast("std::size_t*") @ByRef LongPointer length);
-
-    /**
-     *  \brief Save a calibration cache.
-     * 
-     *  @param ptr A pointer to the data to cache.
-     *  @param length The length in bytes of the data to cache.
-     * 
-     *  @see readCalibrationCache()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    @Virtual(true) public native @NoException(true) void writeCalibrationCache(@Const Pointer ptr, @Cast("std::size_t") long length);
-
-    /**
-     *  \brief Get the algorithm used by this calibrator.
-     * 
-     *  @return The algorithm used by the calibrator.
-     *  */
-    @Virtual(true) public native @NoException(true) CalibrationAlgoType getAlgorithm();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
deleted file mode 100644
index 7664b69b916..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  Entropy calibrator. This is the Legacy Entropy calibrator. It is less complicated than the legacy calibrator and
- *  produces better results.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IInt8EntropyCalibrator extends IInt8Calibrator {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IInt8EntropyCalibrator() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IInt8EntropyCalibrator(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IInt8EntropyCalibrator(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IInt8EntropyCalibrator position(long position) {
-        return (IInt8EntropyCalibrator)super.position(position);
-    }
-    @Override public IInt8EntropyCalibrator getPointer(long i) {
-        return new IInt8EntropyCalibrator((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  Signal that this is the entropy calibrator.
-     *  */
-    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
deleted file mode 100644
index 706e4366305..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8EntropyCalibrator2.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  Entropy calibrator 2. This is the preferred calibrator. This is the required calibrator for DLA, as it supports per
- *  activation tensor scaling.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IInt8EntropyCalibrator2 extends IInt8Calibrator {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IInt8EntropyCalibrator2() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IInt8EntropyCalibrator2(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IInt8EntropyCalibrator2(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IInt8EntropyCalibrator2 position(long position) {
-        return (IInt8EntropyCalibrator2)super.position(position);
-    }
-    @Override public IInt8EntropyCalibrator2 getPointer(long i) {
-        return new IInt8EntropyCalibrator2((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  Signal that this is the entropy calibrator 2.
-     *  */
-    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
deleted file mode 100644
index 3894da826f7..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8LegacyCalibrator.java
+++ /dev/null
@@ -1,111 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  Legacy calibrator left for backward compatibility with TensorRT 2.0. This calibrator requires user parameterization,
- *  and is provided as a fallback option if the other calibrators yield poor results.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IInt8LegacyCalibrator extends IInt8Calibrator {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IInt8LegacyCalibrator() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IInt8LegacyCalibrator(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IInt8LegacyCalibrator(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IInt8LegacyCalibrator position(long position) {
-        return (IInt8LegacyCalibrator)super.position(position);
-    }
-    @Override public IInt8LegacyCalibrator getPointer(long i) {
-        return new IInt8LegacyCalibrator((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  Signal that this is the legacy calibrator.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
-
-    /**
-     *  \brief The quantile (between 0 and 1) that will be used to select the region maximum when the quantile method
-     *  is in use.
-     * 
-     *  See the user guide for more details on how the quantile is used.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    @Virtual(true) public native @NoException(true) @Const({false, false, true}) double getQuantile();
-
-    /**
-     *  \brief The fraction (between 0 and 1) of the maximum used to define the regression cutoff when using regression
-     *  to determine the region maximum.
-     * 
-     *  See the user guide for more details on how the regression cutoff is used
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    @Virtual(true) public native @NoException(true) @Const({false, false, true}) double getRegressionCutoff();
-
-    /**
-     *  \brief Load a histogram.
-     * 
-     *  Histogram generation is potentially expensive, so it can be useful to generate the histograms once, then use
-     *  them when exploring the space of calibrations. The histograms should be regenerated if the network structure
-     *  changes, or the input data set changes, and it is the responsibility of the application to ensure this.
-     * 
-     *  @param length The length of the cached data, that should be set by the called function. If there is no data,
-     *  this should be zero.
-     * 
-     *  @return A pointer to the cache, or nullptr if there is no data.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    @Virtual(true) public native @Const @NoException(true) Pointer readHistogramCache(@Cast("std::size_t*") @ByRef LongPointer length);
-
-    /**
-     *  \brief Save a histogram cache.
-     * 
-     *  @param ptr A pointer to the data to cache.
-     *  @param length The length in bytes of the data to cache.
-     * 
-     *  @see readHistogramCache()
-     *  */
-    @Virtual(true) public native @NoException(true) void writeHistogramCache(@Const Pointer ptr, @Cast("std::size_t") long length);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
deleted file mode 100644
index ba98a49e13f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IInt8MinMaxCalibrator.java
+++ /dev/null
@@ -1,47 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  MinMax Calibrator. It supports per activation tensor scaling.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IInt8MinMaxCalibrator extends IInt8Calibrator {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IInt8MinMaxCalibrator() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IInt8MinMaxCalibrator(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IInt8MinMaxCalibrator(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IInt8MinMaxCalibrator position(long position) {
-        return (IInt8MinMaxCalibrator)super.position(position);
-    }
-    @Override public IInt8MinMaxCalibrator getPointer(long i) {
-        return new IInt8MinMaxCalibrator((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  Signal that this is the MinMax Calibrator.
-     *  */
-    @Virtual public native @NoException(true) CalibrationAlgoType getAlgorithm();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
deleted file mode 100644
index 0bbcad6bc6e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IIteratorLayer.java
+++ /dev/null
@@ -1,43 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IIteratorLayer extends ILoopBoundaryLayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IIteratorLayer(Pointer p) { super(p); }
-
-    /** Set axis to iterate over. */
-    public native @NoException(true) void setAxis(int axis);
-
-    /** Get axis being iterated over. */
-    public native @NoException(true) int getAxis();
-
-    /** For reverse=false, the layer is equivalent to addGather(tensor, I, 0) where I is a
-     *  scalar tensor containing the loop iteration number.
-     *  For reverse=true, the layer is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
-     *  computed from TripLimits of kind kCOUNT.
-     *  The default is reverse=false. */
-    public native @NoException(true) void setReverse(@Cast("bool") boolean reverse);
-
-    /** True if and only if reversing input. */
-    public native @Cast("bool") @NoException(true) boolean getReverse();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
deleted file mode 100644
index 244ab953ad0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILRNLayer.java
+++ /dev/null
@@ -1,134 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ILRNLayer
- * 
- *  \brief A LRN layer in a network definition.
- * 
- *  The output size is the same as the input size.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILRNLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILRNLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the LRN window size.
-     * 
-     *  The window size must be odd and in the range of [1, 15].
-     * 
-     *  If executing this layer on the DLA, only values in the set, [3, 5, 7, 9], are valid.
-     * 
-     *  @see setWindowStride()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setWindowSize(int windowSize);
-
-    /**
-     *  \brief Get the LRN window size.
-     * 
-     *  @see getWindowStride()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getWindowSize();
-
-    /**
-     *  \brief Set the LRN alpha value.
-     * 
-     *  The valid range is [-1e20, 1e20].
-     *  @see getAlpha()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAlpha(float alpha);
-
-    /**
-     *  \brief Get the LRN alpha value.
-     * 
-     *  @see setAlpha()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) float getAlpha();
-
-    /**
-     *  \brief Set the LRN beta value.
-     * 
-     *  The valid range is [0.01, 1e5f].
-     *  @see getBeta()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBeta(float beta);
-
-    /**
-     *  \brief Get the LRN beta value.
-     * 
-     *  @see setBeta()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) float getBeta();
-
-    /**
-     *  \brief Set the LRN K value.
-     * 
-     *  The valid range is [1e-5, 1e10].
-     *  @see getK()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setK(float k);
-
-    /**
-     *  \brief Get the LRN K value.
-     * 
-     *  @see setK()
-     *  */
-    public native @NoException(true) float getK();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
deleted file mode 100644
index c5ffeef0cf3..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILayer.java
+++ /dev/null
@@ -1,291 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ILayer
- * 
- *  \brief Base class for all layer classes in a network definition.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILayer extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return the type of a layer.
-     * 
-     *  @see LayerType
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) LayerType getType();
-
-    /**
-     *  \brief Set the name of a layer.
-     * 
-     *  This method copies the name string.
-     * 
-     *  @see getName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Return the name of a layer.
-     * 
-     <p>
-     *  @see setName()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Get the number of inputs of a layer.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbInputs();
-
-    /**
-     *  \brief Get the layer input corresponding to the given index.
-     * 
-     *  @param index The index of the input tensor.
-     * 
-     *  @return The input tensor, or nullptr if the index is out of range or the tensor is optional
-     *  (\ref ISliceLayer and \ref IRNNv2Layer).
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) ITensor getInput(int index);
-
-    /**
-     *  \brief Get the number of outputs of a layer.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputs();
-
-    /**
-     *  \brief Get the layer output corresponding to the given index.
-     * 
-     *  @return The indexed output tensor, or nullptr if the index is out of range or the tensor is optional
-     *  (\ref IRNNv2Layer).
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITensor getOutput(int index);
-
-    /**
-     *  \brief Replace an input of this layer with a specific tensor.
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor
-     * 
-     *  Except for IFillLayer, ILoopOutputLayer, IResizeLayer, IShuffleLayer, and ISliceLayer,
-     *  this method cannot change the number of inputs to a layer. The index argument must be
-     *  less than the value of getNbInputs().
-     * 
-     *  See comments for overloads of setInput() for layers with special behavior.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setInput(int index, @ByRef ITensor tensor);
-
-    /**
-     *  \brief Set the computational precision of this layer
-     * 
-     *  Setting the precision allows TensorRT to choose implementation which run at this computational precision.
-     *  Layer input type would also get inferred from layer computational precision. TensorRT could still choose a
-     *  non-conforming fastest implementation ignoring set layer precision. Use BuilderFlag::kSTRICT_TYPES to force
-     *  choose implementations with requested precision. In case no implementation is found with requested precision,
-     *  TensorRT would choose available fastest implementation. If precision is not set, TensorRT will select the layer
-     *  computational precision and layer input type based on performance considerations and the flags specified to the
-     *  builder.
-     * 
-     *  @param dataType the computational precision.
-     * 
-     *  @see getPrecision() precisionIsSet() resetPrecision()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPrecision(DataType dataType);
-    public native @NoException(true) void setPrecision(@Cast("nvinfer1::DataType") int dataType);
-
-    /**
-     *  \brief get the computational precision of this layer
-     * 
-     *  @return the computational precision
-     * 
-     *  @see setPrecision() precisionIsSet() resetPrecision()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getPrecision();
-
-    /**
-     *  \brief whether the computational precision has been set for this layer
-     * 
-     *  @return whether the computational precision has been explicitly set
-     * 
-     *  @see setPrecision() getPrecision() resetPrecision()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean precisionIsSet();
-
-    /**
-     *  \brief reset the computational precision for this layer
-     * 
-     *  @see setPrecision() getPrecision() precisionIsSet()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void resetPrecision();
-
-    /**
-     *  \brief Set the output type of this layer
-     * 
-     *  Setting the output type constrains TensorRT to choose implementations which generate output data with the
-     *  given type. If it is not set, TensorRT will select output type based on layer computational precision. TensorRT
-     *  could still choose non-conforming output type based on fastest implementation. Use BuilderFlag::kSTRICT_TYPES to
-     *  force choose requested output type. In case layer precision is not specified, output type would depend on
-     *  chosen implementation based on performance considerations and the flags specified to the builder.
-     * 
-     *  This method cannot be used to set the data type of the second output tensor of the TopK layer. The data type of
-     *  the second output tensor of the topK layer is always Int32. Also the output type of all layers that are shape
-     *  operations must be DataType::kINT32, and all attempts to set the output type to some other data type will be
-     *  ignored except for issuing an error message.
-     * 
-     *  Note that the layer output type is generally not identical to the data type of the output tensor, as TensorRT
-     *  may insert implicit reformatting operations to convert the former to the latter. Calling layer->setOutputType(i,
-     *  type) has no effect on the data type of the i-th output tensor of layer, and users need to call
-     *  layer->getOutput(i)->setType(type) to change the tensor data type. This is particularly relevant if the tensor
-     *  is marked as a network output, since only setType() [but not setOutputType()] will affect the data
-     *  representation in the corresponding output binding.
-     * 
-     *  @param index the index of the output to set
-     *  @param dataType the type of the output
-     * 
-     *  @see getOutputType() outputTypeIsSet() resetOutputType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOutputType(int index, DataType dataType);
-    public native @NoException(true) void setOutputType(int index, @Cast("nvinfer1::DataType") int dataType);
-
-    /**
-     *  \brief get the output type of this layer
-     * 
-     *  @param index the index of the output
-     *  @return the output precision. If no precision has been set, DataType::kFLOAT will be returned,
-     *          unless the output type is inherently DataType::kINT32.
-     * 
-     *  @see getOutputType() outputTypeIsSet() resetOutputType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getOutputType(int index);
-
-    /**
-     *  \brief whether the output type has been set for this layer
-     * 
-     *  @param index the index of the output
-     *  @return whether the output type has been explicitly set
-     * 
-     *  @see setOutputType() getOutputType() resetOutputType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean outputTypeIsSet(int index);
-
-    /**
-     *  \brief reset the output type for this layer
-     * 
-     *  @param index the index of the output
-     * 
-     *  @see setOutputType() getOutputType() outputTypeIsSet()
-     *  */
-    public native @NoException(true) void resetOutputType(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
deleted file mode 100644
index 7a03125b900..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILogger.java
+++ /dev/null
@@ -1,80 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ILogger
- * 
- *  \brief Application-implemented logging interface for the builder, engine and runtime.
- * 
- *  Note that although a logger is passed on creation to each instance of a IBuilder or IRuntime interfaces, the logger
- *  is internally considered a singleton, and thus multiple instances of IRuntime and/or IBuilder must all use the same
- *  logger.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILogger extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILogger(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public ILogger(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public ILogger position(long position) {
-        return (ILogger)super.position(position);
-    }
-    @Override public ILogger getPointer(long i) {
-        return new ILogger((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \enum Severity
-     * 
-     *  The severity corresponding to a log message.
-     *  */
-    public enum Severity {
-        /** An internal error has occurred. Execution is unrecoverable. */
-        kINTERNAL_ERROR(0),
-        /** An application error has occurred. */
-        kERROR(1),
-        /** An application error has been discovered, but TensorRT has recovered or fallen back to a default. */
-        kWARNING(2),
-        /**  Informational messages with instructional information. */
-        kINFO(3),
-        /**  Verbose messages with debugging information. */
-        kVERBOSE(4);
-
-        public final int value;
-        private Severity(int v) { this.value = v; }
-        private Severity(Severity e) { this.value = e.value; }
-        public Severity intern() { for (Severity e : values()) if (e.value == value) return e; return this; }
-        @Override public String toString() { return intern().name(); }
-    }
-
-    /**
-     *  A callback implemented by the application to handle logging messages;
-     * 
-     *  @param severity The severity of the message.
-     *  @param msg The log message, null terminated.
-     *  */
-    @Virtual(true) public native @NoException(true) void log(Severity severity, String msg);
-
-    public ILogger() { super((Pointer)null); allocate(); }
-    private native void allocate();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
deleted file mode 100644
index df4cdee446d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoop.java
+++ /dev/null
@@ -1,127 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  Helper for creating a recurrent subgraph.
- * 
- *  An ILoop cannot be added to an INetworkDefinition where hasImplicitBatchDimensions() returns true.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILoop extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILoop(Pointer p) { super(p); }
-
-    /**
-     *  \brief Create a recurrence layer for this loop with initialValue as its first input.
-     * 
-     *  IRecurrenceLayer requires exactly two inputs.  The 2nd input must be added, via method
-     *  IRecurrenceLayer::setInput(1,...) before an Engine can be built.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IRecurrenceLayer addRecurrence(@ByRef ITensor initialValue);
-
-    /**
-     *  \brief Add a trip-count limiter, based on the given tensor.
-     * 
-     *  There may be at most one kCOUNT and one kWHILE limiter for a loop.
-     *  When both trip limits exist, the loop exits when the
-     *  count is reached or condition is falsified.
-     *  It is an error to not add at least one trip limiter.
-     * 
-     *  For kCOUNT, the input tensor must be available before the loop starts.
-     * 
-     *  For kWHILE, the input tensor must be the output of a subgraph that contains
-     *  only layers that are not ITripLimitLayer, IIteratorLayer or ILoopOutputLayer.
-     *  Any IRecurrenceLayers in the subgraph must belong to the same loop as the
-     *  ITripLimitLayer.  A trivial example of this rule is that the input to the kWHILE
-     *  is the output of an IRecurrenceLayer for the same loop.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, TripLimit _limit);
-    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, @Cast("nvinfer1::TripLimit") int _limit);
-
-    /**
-     *  \brief Return layer that subscripts tensor by loop iteration.
-     * 
-     *  For reverse=false, this is equivalent to addGather(tensor, I, 0) where I is a
-     *  scalar tensor containing the loop iteration number.
-     *  For reverse=true, this is equivalent to addGather(tensor, M-1-I, 0) where M is the trip count
-     *  computed from TripLimits of kind kCOUNT.
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor, int axis/*=0*/, @Cast("bool") boolean reverse/*=false*/);
-    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor);
-
-    /** \brief Make an output for this loop, based on the given tensor.
-     * 
-     *  axis is the axis for concatenation (if using outputKind of kCONCATENATE or kREVERSE).
-     * 
-     *  If outputKind is kCONCATENATE or kREVERSE, a second input specifying the
-     *  concatenation dimension must be added via method ILoopOutputLayer::setInput.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind, int axis/*=0*/);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind, int axis/*=0*/);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind);
-
-    /**
-     *  \brief Set the name of the loop.
-     * 
-     *  The name is used in error diagnostics.
-     *  This method copies the name string.
-     * 
-     *  @see getName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Return the name of the loop.
-     * 
-     *  @see setName()
-     *  */
-    public native @NoException(true) String getName();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
deleted file mode 100644
index e88e0245b37..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopBoundaryLayer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILoopBoundaryLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILoopBoundaryLayer(Pointer p) { super(p); }
-
-    /** Return pointer to ILoop associated with this boundary layer. */
-    public native @NoException(true) ILoop getLoop();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
deleted file mode 100644
index 8a27f7357b3..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ILoopOutputLayer.java
+++ /dev/null
@@ -1,97 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  An ILoopOutputLayer is the sole way to get output from a loop.
- * 
- *  The first input tensor must be defined inside the loop; the output tensor is outside the loop.
- *  The second input tensor, if present, must be defined outside the loop.
- * 
- *  If getLoopOutput() is kLAST_VALUE, a single input must be provided,
- *  and that input must from a IRecurrenceLayer in the same loop.
- * 
- *  If getLoopOutput() is kCONCATENATE or kREVERSE, a second input must be provided.
- *  The second input must be a scalar “shape tensor”, defined before the loop commences,
- *  that specifies the concatenation length of the output.
- * 
- *  The output tensor has j more dimensions than the input tensor, where
- *  j == 0 if getLoopOutput() is kLAST_VALUE
- *  j == 1 if getLoopOutput() is kCONCATENATE or kREVERSE.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ILoopOutputLayer extends ILoopBoundaryLayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ILoopOutputLayer(Pointer p) { super(p); }
-
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) LoopOutput getLoopOutput();
-
-    /**
-     *  \brief Set where to insert the contenation axis. Ignored if getLoopOutput() is kLAST_VALUE.
-     * 
-     *  For example, if the input tensor has dimensions [b,c,d],
-     *  and getLoopOutput() is  kCONCATENATE, the output has four dimensions.
-     *  Let a be the value of the second input.
-     *  setAxis(0) causes the output to have dimensions [a,b,c,d].
-     *  setAxis(1) causes the output to have dimensions [b,a,c,d].
-     *  setAxis(2) causes the output to have dimensions [b,c,a,d].
-     *  setAxis(3) causes the output to have dimensions [b,c,d,a].
-     *  Default is axis is 0.
-     *  */
-    public native @NoException(true) void setAxis(int axis);
-
-    /** Get axis being concatenated over. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getAxis();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor */
-    //
-    /** Sets the input tensor for the given index. The index must be 0 for a kLAST_VALUE loop output layer.
-    /** Loop output layer is converted to a kCONCATENATE or kREVERSE loop output layer by calling setInput with an
-    /** index 1. A kCONCATENATE or kREVERSE loop output layer cannot be converted back to a kLAST_VALUE loop output
-    /** layer.
-    /**
-    /** For a kCONCATENATE or kREVERSE loop output layer, the values 0 and 1 are valid.
-    /** The indices in the kCONCATENATE or kREVERSE cases are as follows:
-    /**
-    /** - 0: Contribution to the output tensor.  The contribution must come from inside the loop.
-    /** - 1: The concatenation length scalar value, must come from outside the loop, as a 0D Int32 shape tensor.
-    /**
-    /** If this function is called with a value 1, then the function getNbInputs() changes
-    /** from returning 1 to 2.
-    /** */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
deleted file mode 100644
index 4a14443dcdf..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IMatrixMultiplyLayer.java
+++ /dev/null
@@ -1,72 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IMatrixMultiplyLayer
- * 
- *  \brief Layer that represents a Matrix Multiplication.
- * 
- *  Let A be op(getInput(0)) and B be op(getInput(1)) where
- *  op(x) denotes the corresponding MatrixOperation.
- * 
- *  When A and B are matrices or vectors, computes the inner product A * B:
- * 
- *      matrix * matrix -> matrix
- *      matrix * vector -> vector
- *      vector * matrix -> vector
- *      vector * vector -> scalar
- * 
- *  Inputs of higher rank are treated as collections of matrices or vectors.
- *  The output will be a corresponding collection of matrices, vectors, or scalars.
- * 
- *  For a dimension that is not one of the matrix or vector dimensions:
- *  If the dimension is 1 for one of the tensors but not the other tensor,
- *  the former tensor is broadcast along that dimension to match the dimension of the latter tensor.
- *  The number of these extra dimensions for A and B must match.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IMatrixMultiplyLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IMatrixMultiplyLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the operation for an input tensor.
-     *  @param index Input tensor number (0 or 1).
-     *  @param op New operation.
-     *  @see getOperation()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setOperation(int index, MatrixOperation op);
-    public native @NoException(true) void setOperation(int index, @Cast("nvinfer1::MatrixOperation") int op);
-
-    /**
-     *  \brief Get the operation for an input tensor.
-     *  @param index Input tensor number (0 or 1).
-     *  @see setOperation()
-     *  */
-    public native @NoException(true) MatrixOperation getOperation(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
deleted file mode 100644
index 75e9db3a9a0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INetworkDefinition.java
+++ /dev/null
@@ -1,1480 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class INetworkDefinition
- * 
- *  \brief A network definition for input to the builder.
- * 
- *  A network definition defines the structure of the network, and combined with a IBuilderConfig, is built
- *  into an engine using an IBuilder. An INetworkDefinition can either have an implicit batch dimensions, specified
- *  at runtime, or all dimensions explicit, full dims mode, in the network definition. When a network has been
- *  created using createNetwork(), only implicit batch size mode is supported. The function hasImplicitBatchDimension()
- *  is used to query the mode of the network.
- * 
- *  A network with implicit batch dimensions returns the dimensions of a layer without the implicit dimension,
- *  and instead the batch is specified at execute/enqueue time. If the network has all dimensions specified, then
- *  the first dimension follows elementwise broadcast rules: if it is 1 for some inputs and is some value N for all
- *  other inputs, then the first dimension of each outut is N, and the inputs with 1 for the first dimension are
- *  broadcast. Having divergent batch sizes across inputs to a layer is not supported.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class INetworkDefinition extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public INetworkDefinition() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public INetworkDefinition(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public INetworkDefinition(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public INetworkDefinition position(long position) {
-        return (INetworkDefinition)super.position(position);
-    }
-    @Override public INetworkDefinition getPointer(long i) {
-        return new INetworkDefinition((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Add an input tensor to the network.
-     * 
-     *  The name of the input tensor is used to find the index into the buffer array for an engine built from
-     *  the network. The volume of the dimensions must be less than 2^30 elements.
-     * 
-     *  For networks with an implicit batch dimension, this volume includes the batch dimension with its length set
-     *  to the maximum batch size. For networks with all explicit dimensions and with wildcard dimensions, the volume
-     *  is based on the maxima specified by an IOptimizationProfile.Dimensions are normally non-negative integers. The
-     *  exception is that in networks with all explicit dimensions, -1 can be used as a wildcard for a dimension to
-     *  be specified at runtime. Input tensors with such a wildcard must have a corresponding entry in the
-     *  IOptimizationProfiles indicating the permitted extrema, and the input dimensions must be set by
-     *  IExecutionContext::setBindingDimensions. Different IExecutionContext instances can have different dimensions.
-     *  Wildcard dimensions are only supported for EngineCapability::kSTANDARD. They are not
-     *  supported in safety contexts. DLA does not support Wildcard dimensions.
-     * 
-     *  Tensor dimensions are specified independent of format.  For example, if a
-     *  tensor is formatted in "NHWC" or a vectorized format, the dimensions are
-     *  still specified in the order{N, C, H, W}. For 2D images with a channel
-     *  dimension, the last three dimensions are always {C,H,W}. For 3D images
-     *  with a channel dimension, the last four dimensions are always {C,D,H,W}.
-     * 
-     *  @param name The name of the tensor.
-     *  @param type The type of the data held in the tensor.
-     *  @param dimensions The dimensions of the tensor.
-     * 
-     *  \warning It is an error to specify a wildcard value on a dimension that is determined by trained parameters.
-     * 
-     *  \warning If run on DLA with explicit dimensions, only leading dimension can be a wildcard. And provided profile
-     *  must have same minimum, optimum, and maximum dimensions.
-     * 
-     *  @see ITensor
-     * 
-     *  @return The new tensor or nullptr if there is an error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITensor addInput(String name, DataType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @NoException(true) ITensor addInput(@Cast("const char*") BytePointer name, @Cast("nvinfer1::DataType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Mark a tensor as a network output.
-     * 
-     *  @param tensor The tensor to mark as an output tensor.
-     * 
-     *  \warning It is an error to mark a network input as an output.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void markOutput(@ByRef ITensor tensor);
-
-    /**
-     *  \brief Add a convolution layer to the network.
-     * 
-     *  @param input The input tensor to the convolution.
-     *  @param nbOutputMaps The number of output feature maps for the convolution.
-     *  @param kernelSize The HW-dimensions of the convolution kernel.
-     *  @param kernelWeights The kernel weights for the convolution.
-     *  @param biasWeights The optional bias weights for the convolution.
-     * 
-     *  @see IConvolutionLayer
-     * 
-     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new convolution layer, or nullptr if it could not be created.
-     * 
-     *  @deprecated Superseded by addConvolutionNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) IConvolutionLayer addConvolution(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-
-    /**
-     *  \brief Add a fully connected layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param nbOutputs The number of outputs of the layer.
-     *  @param kernelWeights The kernel weights for the fully connected layer.
-     *  @param biasWeights The optional bias weights for the fully connected layer.
-     * 
-     *  @see IFullyConnectedLayer
-     * 
-     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new fully connected layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IFullyConnectedLayer addFullyConnected(
-            @ByRef ITensor input, int nbOutputs, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-
-    /**
-     *  \brief Add an activation layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param type The type of activation function to apply.
-     * 
-     *  Note that the setAlpha() and setBeta() methods must be used on the
-     *  output for activations that require these parameters.
-     * 
-     *  @see IActivationLayer ActivationType
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new activation layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, ActivationType type);
-    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, @Cast("nvinfer1::ActivationType") int type);
-
-    /**
-     *  \brief Add a pooling layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param type The type of pooling to apply.
-     *  @param windowSize The size of the pooling window.
-     * 
-     *  @see IPoolingLayer PoolingType
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new pooling layer, or nullptr if it could not be created.
-     * 
-     *  @deprecated Superseded by addPoolingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, PoolingType type, @ByVal DimsHW windowSize);
-    public native @Deprecated @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal DimsHW windowSize);
-
-    /**
-     *  \brief Add a LRN layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param window The size of the window.
-     *  @param alpha The alpha value for the LRN computation.
-     *  @param beta The beta value for the LRN computation.
-     *  @param k The k value for the LRN computation.
-     * 
-     *  @see ILRNLayer
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new LRN layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ILRNLayer addLRN(@ByRef ITensor input, int window, float alpha, float beta, float k);
-
-    /**
-     *  \brief Add a Scale layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *               This tensor is required to have a minimum of 3 dimensions in implicit batch mode
-     *               and a minimum of 4 dimensions in explicit batch mode.
-     *  @param mode The scaling mode.
-     *  @param shift The shift value.
-     *  @param scale The scale value.
-     *  @param power The power value.
-     * 
-     *  If the weights are available, then the size of weights are dependent on the ScaleMode.
-     *  For ::kUNIFORM, the number of weights equals 1.
-     *  For ::kCHANNEL, the number of weights equals the channel dimension.
-     *  For ::kELEMENTWISE, the number of weights equals the product of the last three dimensions of the input.
-     * 
-     *  @see addScaleNd
-     *  @see IScaleLayer
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new Scale layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
-    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
-
-    /**
-     *  \brief Add a SoftMax layer to the network.
-     * 
-     *  @see ISoftMaxLayer
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new SoftMax layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ISoftMaxLayer addSoftMax(@ByRef ITensor input);
-
-    /**
-     *  \brief Add a concatenation layer to the network.
-     * 
-     *  @param inputs The input tensors to the layer.
-     *  @param nbInputs The number of input tensors.
-     * 
-     *  @see IConcatenationLayer
-     * 
-     *  @return The new concatenation layer, or nullptr if it could not be created.
-     * 
-     *  \warning All tensors must have the same dimensions except along the concatenation axis.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IConcatenationLayer addConcatenation(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs);
-    public native @NoException(true) IConcatenationLayer addConcatenation(@ByPtrPtr ITensor inputs, int nbInputs);
-
-    /**
-     *  \brief Add a deconvolution layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param nbOutputMaps The number of output feature maps.
-     *  @param kernelSize The HW-dimensions of the deconvolution kernel.
-     *  @param kernelWeights The kernel weights for the deconvolution.
-     *  @param biasWeights The optional bias weights for the deconvolution.
-     * 
-     *  @see IDeconvolutionLayer
-     * 
-     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new deconvolution layer, or nullptr if it could not be created.
-     * 
-     *  @deprecated Superseded by addDeconvolutionNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) IDeconvolutionLayer addDeconvolution(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-
-    /**
-     *  \brief Add an elementwise layer to the network.
-     * 
-     *  @param input1 The first input tensor to the layer.
-     *  @param input2 The second input tensor to the layer.
-     *  @param op The binary operation that the layer applies.
-     * 
-     *  The input tensors must have the same number of dimensions.
-     *  For each dimension, their lengths must match, or one of them must be one.
-     *  In the latter case, the tensor is broadcast along that axis.
-     * 
-     *  The output tensor has the same number of dimensions as the inputs.
-     *  For each dimension, its length is the maximum of the lengths of the
-     *  corresponding input dimension.
-     * 
-     *  @see IElementWiseLayer
-     *  \warning For shape tensors, ElementWiseOperation::kPOW is not a valid op.
-     * 
-     *  @return The new elementwise layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, ElementWiseOperation op);
-    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, @Cast("nvinfer1::ElementWiseOperation") int op);
-
-    /**
-     *  \brief Add a unary layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param operation The operation to apply.
-     * 
-     *  @see IUnaryLayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  \warning Shape tensors are not supported as outputs.
-     * 
-     *  @return The new unary layer, or nullptr if it could not be created
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, UnaryOperation operation);
-    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, @Cast("nvinfer1::UnaryOperation") int operation);
-
-    /** \brief Add a padding layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param prePadding The padding to apply to the start of the tensor.
-     *  @param postPadding The padding to apply to the end of the tensor.
-     * 
-     *  @see IPaddingLayer
-     * 
-     *  @return The new padding layer, or nullptr if it could not be created.
-     * 
-     *  @deprecated Superseded by addPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) IPaddingLayer addPadding(@ByRef ITensor input, @ByVal DimsHW prePadding, @ByVal DimsHW postPadding);
-
-    /**
-     *  \brief Add a shuffle layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     * 
-     *  @see IShuffleLayer
-     * 
-     *  @return The new shuffle layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IShuffleLayer addShuffle(@ByRef ITensor input);
-
-    /**
-     *  \brief Get the number of layers in the network.
-     * 
-     *  @return The number of layers in the network.
-     * 
-     *  @see getLayer()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbLayers();
-
-    /**
-     *  \brief Get the layer specified by the given index.
-     * 
-     *  @param index The index of the layer.
-     * 
-     *  @return The layer, or nullptr if the index is out of range.
-     * 
-     *  @see getNbLayers()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ILayer getLayer(int index);
-
-    /**
-     *  \brief Get the number of inputs in the network.
-     * 
-     *  @return The number of inputs in the network.
-     * 
-     *  @see getInput()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbInputs();
-
-    /**
-     *  \brief Get the input tensor specified by the given index.
-     * 
-     *  @param index The index of the input tensor.
-     * 
-     *  @return The input tensor, or nullptr if the index is out of range.
-     * 
-     *  \note adding inputs invalidates indexing here
-     * 
-     *  @see getNbInputs()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITensor getInput(int index);
-
-    /**
-     *  \brief Get the number of outputs in the network.
-     * 
-     *  The outputs include those marked by markOutput or markOutputForShapes.
-     * 
-     *  @return The number of outputs in the network.
-     * 
-     *  @see getOutput()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputs();
-
-    /**
-     *  \brief Get the output tensor specified by the given index.
-     * 
-     *  @param index The index of the output tensor.
-     * 
-     *  @return The output tensor, or nullptr if the index is out of range.
-     * 
-     *  \note adding inputs invalidates indexing here
-     * 
-     *  @see getNbOutputs()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITensor getOutput(int index);
-
-    /**
-     *  \brief Destroy this INetworkDefinition object.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Add a reduce layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param operation The reduction operation to perform.
-     *  @param reduceAxes The reduction dimensions.
-     *         The bit in position i of bitmask reduceAxes corresponds to explicit dimension i if result.
-     *         E.g., the least significant bit corresponds to the first explicit dimension and the next to least
-     *         significant bit corresponds to the second explicit dimension.
-     * 
-     *  @param keepDimensions The boolean that specifies whether or not to keep the reduced dimensions in the
-     *  output of the layer.
-     * 
-     *  The reduce layer works by performing an operation specified by \p operation to reduce the tensor \p input across
-     *  the
-     *  axes specified by \p reduceAxes.
-     * 
-     *  @see IReduceLayer
-     * 
-     *  \warning If output is a shape tensor, ReduceOperation::kAVG is unsupported.
-     * 
-     *  @return The new reduce layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IReduceLayer addReduce(
-            @ByRef ITensor input, ReduceOperation operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
-    public native @NoException(true) IReduceLayer addReduce(
-            @ByRef ITensor input, @Cast("nvinfer1::ReduceOperation") int operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
-
-    /**
-     *  \brief Add a TopK layer to the network.
-     * 
-     *  The TopK layer has two outputs of the same dimensions. The first contains data values,
-     *  the second contains index positions for the values. Output values are sorted, largest first
-     *  for operation kMAX and smallest first for operation kMIN.
-     * 
-     *  Currently only values of K up to 1024 are supported.
-     * 
-     *  @param input The input tensor to the layer.
-     * 
-     *  @param op Operation to perform.
-     * 
-     *  @param k Number of elements to keep.
-     * 
-     *  @param reduceAxes The reduction dimensions.
-     *         The bit in position i of bitmask reduceAxes corresponds to explicit dimension i of the result.
-     *         E.g., the least significant bit corresponds to the first explicit dimension and the next to least
-     *         significant bit corresponds to the second explicit dimension.
-     * 
-     *         Currently reduceAxes must specify exactly one dimension, and it must be one of the last four dimensions.
-     * 
-     *  @see ITopKLayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new TopK layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, TopKOperation op, int k, @Cast("uint32_t") int reduceAxes);
-    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, @Cast("nvinfer1::TopKOperation") int op, int k, @Cast("uint32_t") int reduceAxes);
-
-    /**
-     *  \brief Add a gather layer to the network.
-     * 
-     *  @param data The tensor to gather values from.
-     *  @param indices The tensor to get indices from to populate the output tensor.
-     *  @param axis The axis in the data tensor to gather on.
-     * 
-     *  @see IGatherLayer
-     * 
-     *  @return The new gather layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IGatherLayer addGather(@ByRef ITensor data, @ByRef ITensor indices, int axis);
-
-    /**
-     *  \brief Add a RaggedSoftMax layer to the network.
-     * 
-     *  @param input The ZxS input tensor.
-     *  @param bounds The Zx1 bounds tensor.
-     * 
-     *  @see IRaggedSoftMaxLayer
-     * 
-     *  \warning The bounds tensor cannot have the last dimension be the wildcard character.
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new RaggedSoftMax layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IRaggedSoftMaxLayer addRaggedSoftMax(@ByRef ITensor input, @ByRef ITensor bounds);
-
-    /**
-     *  \brief Add a MatrixMultiply layer to the network.
-     * 
-     *  @param input0 The first input tensor (commonly A).
-     *  @param op0 The operation to apply to input0.
-     *  @param input1 The second input tensor (commonly B).
-     *  @param op1 The operation to apply to input1.
-     * 
-     *  @see IMatrixMultiplyLayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new matrix multiply layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
-            @ByRef ITensor input0, MatrixOperation op0, @ByRef ITensor input1, MatrixOperation op1);
-    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
-            @ByRef ITensor input0, @Cast("nvinfer1::MatrixOperation") int op0, @ByRef ITensor input1, @Cast("nvinfer1::MatrixOperation") int op1);
-
-    /**
-     *  \brief Add a constant layer to the network.
-     * 
-     *  @param dimensions The dimensions of the constant.
-     *  @param weights The constant value, represented as weights.
-     * 
-     *  @see IConstantLayer
-     * 
-     *  @return The new constant layer, or nullptr if it could not be created.
-     * 
-     *  If weights.type is DataType::kINT32, the output is a tensor of 32-bit indices.
-     *  Otherwise the output is a tensor of real values and the output type will be
-     *  follow TensorRT's normal precision rules.
-     * 
-     *  If tensors in the network have an implicit batch dimension, the constant
-     *  is broadcast over that dimension.
-     * 
-     *  If a wildcard dimension is used, the volume of the runtime dimensions must equal
-     *  the number of weights specified.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IConstantLayer addConstant(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @ByVal Weights weights);
-
-    /**
-     *  \brief Add an \p layerCount deep RNN layer to the network with \p hiddenSize internal states that can
-     *  take a batch with fixed or variable sequence lengths.
-     * 
-     *  @param input The input tensor to the layer (see below).
-     *  @param layerCount The number of layers in the RNN.
-     *  @param hiddenSize Size of the internal hidden state for each layer.
-     *  @param maxSeqLen Maximum sequence length for the input.
-     *  @param op The type of RNN to execute.
-     * 
-     *  By default, the layer is configured with RNNDirection::kUNIDIRECTION and RNNInputMode::kLINEAR.
-     *  To change these settings, use IRNNv2Layer::setDirection() and IRNNv2Layer::setInputMode().
-     * 
-     *  %Weights and biases for the added layer should be set using
-     *  IRNNv2Layer::setWeightsForGate() and IRNNv2Layer::setBiasForGate() prior
-     *  to building an engine using this network.
-     * 
-     *  The input tensors must be of the type DataType::kFLOAT or DataType::kHALF.
-     *  The layout of the weights is row major and must be the same datatype as the input tensor.
-     *  \p weights contain 8 matrices and \p bias contains 8 vectors.
-     * 
-     *  See IRNNv2Layer::setWeightsForGate() and IRNNv2Layer::setBiasForGate() for details on the required input
-     *  format for \p weights and \p bias.
-     * 
-     *  The \p input ITensor should contain zero or more index dimensions {@code {N1, ..., Np}}, followed by
-     *  two dimensions, defined as follows:
-     *    - {@code S_max} is the maximum allowed sequence length (number of RNN iterations)
-     *    - {@code E} specifies the embedding length (unless ::kSKIP is set, in which case it should match
-     *      getHiddenSize()).
-     * 
-     *  By default, all sequences in the input are assumed to be size \p maxSeqLen.  To provide explicit sequence
-     *  lengths for each input sequence in the batch, use IRNNv2Layer::setSequenceLengths().
-     * 
-     *  The RNN layer outputs up to three tensors.
-     * 
-     *  The first output tensor is the output of the final RNN layer across all timesteps, with dimensions
-     *  {@code {N1, ..., Np, S_max, H}}:
-     * 
-     *    - {@code N1..Np} are the index dimensions specified by the input tensor
-     *    - {@code S_max} is the maximum allowed sequence length (number of RNN iterations)
-     *    - {@code H} is an output hidden state (equal to getHiddenSize() or 2x getHiddenSize())
-     * 
-     *  The second tensor is the final hidden state of the RNN across all layers, and if the RNN
-     *  is an LSTM (i.e. getOperation() is ::kLSTM), then the third tensor is the final cell state
-     *  of the RNN across all layers.  Both the second and third output tensors have dimensions
-     *  {@code {N1, ..., Np, L, H}}:
-     * 
-     *   - {@code N1..Np} are the index dimensions specified by the input tensor
-     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
-     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
-     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
-     *      final backward hidden state is stored in {@code L= 2*l + 1}.
-     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
-     * 
-     *  @see IRNNv2Layer
-     * 
-     *  @deprecated Superseded by INetworkDefinition::addLoop and will be removed in TensorRT 9.0.
-     * 
-     *  \warning RNN inputs do not support wildcard dimensions or explicit batch size networks.
-     *  \warning Int32 tensors are not valid input tensors, only for sequence lengths.
-     * 
-     *  @return The new RNN layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) IRNNv2Layer addRNNv2(
-            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, RNNOperation op);
-    public native @Deprecated @NoException(true) IRNNv2Layer addRNNv2(
-            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, @Cast("nvinfer1::RNNOperation") int op);
-
-    /**
-     *  \brief Add an identity layer.
-     * 
-     *  @param input The input tensor to the layer.
-     * 
-     *  @see IIdentityLayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new identity layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IIdentityLayer addIdentity(@ByRef ITensor input);
-
-    /**
-     *  \brief remove a tensor from the network definition.
-     * 
-     *  @param tensor the tensor to remove
-     * 
-     *  It is illegal to remove a tensor that is the input or output of a layer.
-     *  if this method is called with such a tensor, a warning will be emitted on the log
-     *  and the call will be ignored. Its intended use is to remove detached tensors after
-     *  e.g. concatenating two networks with Layer::setInput().
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void removeTensor(@ByRef ITensor tensor);
-
-    /**
-     *  \brief unmark a tensor as a network output.
-     * 
-     *  @param tensor The tensor to unmark as an output tensor.
-     * 
-     *  see markOutput()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void unmarkOutput(@ByRef ITensor tensor);
-
-    /**
-     *  \brief Add a plugin layer to the network using the IPluginV2 interface.
-     * 
-     *  @param inputs The input tensors to the layer.
-     *  @param nbInputs The number of input tensors.
-     *  @param plugin The layer plugin.
-     * 
-     *  @see IPluginV2Layer
-     * 
-     *  \warning Dimension wildcard are only supported with IPluginV2DynamicExt or IPluginV2IOExt plugins.
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new plugin layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IPluginV2Layer addPluginV2(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs, @ByRef IPluginV2 plugin);
-    public native @NoException(true) IPluginV2Layer addPluginV2(@ByPtrPtr ITensor inputs, int nbInputs, @ByRef IPluginV2 plugin);
-
-    /**
-     *  \brief Add a slice layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param start The start offset
-     *  @param size The output dimension
-     *  @param stride The slicing stride
-     * 
-     *  Positive, negative, zero stride values, and combinations of them in different dimensions are allowed.
-     * 
-     *  @see ISliceLayer
-     * 
-     *  @return The new slice layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ISliceLayer addSlice(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 start, @ByVal @Cast("nvinfer1::Dims*") Dims32 size, @ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-
-    /**
-     *  \brief Sets the name of the network.
-     * 
-     *  @param name The name to assign to this network.
-     * 
-     *  Set the name of the network so that it can be associated with a built
-     *  engine. The \p name must be a zero delimited C-style string of length
-     *  no greater than 128 characters. TensorRT makes no use of this string
-     *  except storing it as part of the engine so that it may be retrieved at
-     *  runtime. A name unique to the builder will be generated by default.
-     * 
-     *  This method copies the name string.
-     * 
-     *  @see INetworkDefinition::getName(), ISafeCudaEngine::getName()
-     * 
-     *  @return none
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Returns the name associated with the network.
-     * 
-     *  The memory pointed to by getName() is owned by the INetworkDefinition object.
-     * 
-     *  @see INetworkDefinition::setName()
-     * 
-     *  @return A zero delimited C-style string representing the name of the network.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Add a shape layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     * 
-     *  @see IShapeLayer
-     * 
-     *  \warning addShape is only supported when hasImplicitBatchDimensions is false.
-     * 
-     *  \warning input to addShape cannot contain wildcard dimension values.
-     * 
-     *  @return The new shape layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IShapeLayer addShape(@ByRef ITensor input);
-
-    /**
-     *  \brief Query whether the network was created with an implicit batch dimension.
-     * 
-     *  @return True if tensors have implicit batch dimension, false otherwise.
-     * 
-     *  This is a network-wide property.  Either all tensors in the network
-     *  have an implicit batch dimension or none of them do.
-     * 
-     *  hasImplicitBatchDimension() is true if and only if this INetworkDefinition
-     *  was created with createNetwork() or createNetworkV2() without
-     *  NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag.
-     * 
-     *  @see createNetworkV2
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
-
-    /**
-     *  \brief Enable tensor's value to be computed by IExecutionContext::getShapeBinding.
-     * 
-     *  @return True if successful, false if tensor is already marked as an output.
-     * 
-     *  The tensor must be of type DataType::kINT32 and have no more than one dimension.
-     * 
-     *  \warning The tensor must have dimensions that can be determined to be constants at build time.
-     * 
-     *  \warning It is an error to mark a network input as a shape output.
-     * 
-     *  @see isShapeBinding(), getShapeBinding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean markOutputForShapes(@ByRef ITensor tensor);
-
-    /**
-     *  \brief Undo markOutputForShapes.
-     * 
-     *  \warning inputs to addShape cannot contain wildcard dimension values.
-     * 
-     *  @return True if successful, false if tensor is not marked as an output.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean unmarkOutputForShapes(@ByRef ITensor tensor);
-
-    /**
-     *  \brief Add a parametric ReLU layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param slope The slope tensor to the layer. This tensor should be unidirectionally broadcastable
-     *         to the input tensor.
-     * 
-     *  @see IParametricReLULayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new parametric ReLU layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IParametricReLULayer addParametricReLU(@ByRef ITensor input, @ByRef ITensor slope);
-
-    /**
-     *  \brief Add a multi-dimension convolution layer to the network.
-     * 
-     *  @param input The input tensor to the convolution.
-     *  @param nbOutputMaps The number of output feature maps for the convolution.
-     *  @param kernelSize The multi-dimensions of the convolution kernel.
-     *  @param kernelWeights The kernel weights for the convolution.
-     *  @param biasWeights The optional bias weights for the convolution.
-     * 
-     *  @see IConvolutionLayer
-     * 
-     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
-     *  \warning Int32 tensors are not valid input tensors.
-     *  \warning Only 2D or 3D convolution is supported.
-     * 
-     *  @return The new convolution layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IConvolutionLayer addConvolutionNd(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-
-    /**
-     *  \brief Add a multi-dimension pooling layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param type The type of pooling to apply.
-     *  @param windowSize The size of the pooling window.
-     * 
-     *  @see IPoolingLayer PoolingType
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     *  \warning Only 2D or 3D pooling is supported.
-     * 
-     *  @return The new pooling layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, PoolingType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-
-    /**
-     *  \brief Add a multi-dimension deconvolution layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param nbOutputMaps The number of output feature maps.
-     *  @param kernelSize The multi-dimensions of the deconvolution kernel.
-     *  @param kernelWeights The kernel weights for the deconvolution.
-     *  @param biasWeights The optional bias weights for the deconvolution.
-     * 
-     *  @see IDeconvolutionLayer
-     * 
-     *  \warning It is an error to specify a wildcard value for the 'C' dimension of the input tensor.
-     *  \warning Int32 tensors are not valid input tensors.
-     *  \warning Only 2D or 3D deconvolution is supported. */
-    //
-    /** @return The new deconvolution layer, or nullptr if it could not be created.
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IDeconvolutionLayer addDeconvolutionNd(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-
-    /**
-     *  \brief Add a multi-dimension scale layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param mode The scaling mode.
-     *  @param shift The shift value.
-     *  @param scale The scale value.
-     *  @param power The power value.
-     *  @param channelAxis The channel axis.
-     * 
-     *  If the weights are available, then the size of weights are dependent on the ScaleMode.
-     *  For ::kUNIFORM, the number of weights equals 1.
-     *  For ::kCHANNEL, the number of weights equals the channel dimension.
-     *  For ::kELEMENTWISE, the number of weights equals the product of all input dimensions at channelAxis and beyond.
-     * 
-     *  For example, if the inputs dimensions are [A,B,C,D,E,F], and channelAxis=2:
-     *  For ::kUNIFORM, the number of weights is equal to 1.
-     *  For ::kCHANNEL, the number of weights is C.
-     *  For ::kELEMENTWISE, the number of weights is C*D*E*F.
-     * 
-     *  channelAxis can also be set explicitly using setChannelAxis().
-     * 
-     *  @see IScaleLayer
-     *  @see setChannelAxis()
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     *  \warning Only 2D or 3D scale is supported.
-     * 
-     *  @return The new Scale layer, or nullptr if it could not be created.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IScaleLayer addScaleNd(
-            @ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
-    public native @NoException(true) IScaleLayer addScaleNd(
-            @ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
-
-    /** \brief Add a resize layer to the network.
-     * 
-     *  @param input The input tensor to the layer.
-     * 
-     *  @see IResizeLayer
-     * 
-     *  \warning Int32 tensors are not valid input tensors.
-     * 
-     *  @return The new resize layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IResizeLayer addResize(@ByRef ITensor input);
-
-    /**
-     *  \brief True if network is an explicit precision network
-     * 
-     *  @deprecated Will be removed in TensorRT 10.0.
-     * 
-     *  hasExplicitPrecision() is true if and only if this INetworkDefinition
-     *  was created with createNetworkV2() with NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION set.
-     * 
-     *  @see createNetworkV2
-     * 
-     *  @return True if network has explicit precision, false otherwise.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @Deprecated @NoException(true) boolean hasExplicitPrecision();
-
-    /**
-     *  \brief Add a loop to the network.
-     * 
-     *  An ILoop provides a way to specify a recurrent subgraph.
-     * 
-     *  @return Pointer to ILoop that can be used to add loop boundary layers for the loop,
-     *          or nullptr if network has an implicit batch dimension or this version
-     *          of TensorRT does not support loops.
-     * 
-     *  The network must not have an implicit batch dimension.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ILoop addLoop();
-
-    /** \brief Add a select layer to the network.
-     * 
-     *  @param condition The condition tensor to the layer. Must have type DataType::kBOOL.
-     *  @param thenInput The "then" input tensor to the layer.
-     *  @param elseInput The "else" input tensor to the layer.
-     * 
-     *  All three input tensors must have the same number of dimensions, and along each axis
-     *  must have the same length or a length of one. If the length is one, the tensor
-     *  is broadcast along that axis. The output tensor has the dimensions of the inputs AFTER
-     *  the broadcast rule is applied. For example, given:
-     * 
-     *     dimensions of condition:  [1,1,5,9]
-     *     dimensions of thenInput:  [1,1,5,9]
-     *     dimensions of elseInput:  [1,3,1,9]
-     * 
-     *  the output dimensions are [1,3,5,9], and the output contents are defined by:
-     * 
-     *       output[0,i,j,k] = condition[0,0,j,k] ? thenInput[0,0,j,k] : elseInput[0,i,0,k]
-     * 
-     *  The output dimensions are not necessarily the max of the input dimensions if any input
-     *  is an empty tensor. For example, if in the preceding example, 5 is changed to 0:
-     * 
-     *     dimensions of condition:  [1,1,0,9]
-     *     dimensions of thenInput:  [1,1,0,9]
-     *     dimensions of elseInput:  [1,3,1,9]
-     * 
-     *  then the output dimensions are [1,3,0,9].
-     * 
-     *  The network must not have an implicit batch dimension.
-     * 
-     *  @see ISelectLayer
-     * 
-     *  @return The new select layer, or nullptr if it could not be created. */
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ISelectLayer addSelect(@ByRef ITensor condition, @ByRef ITensor thenInput, @ByRef ITensor elseInput);
-
-    /** \brief Add a fill layer to the network.
-     * 
-     *  @param dimensions The output tensor dimensions.
-     *  @param op The fill operation that the layer applies.
-     * 
-     *  \warning For FillOperation::kLINSPACE, dimensions.nbDims must be 1.
-     * 
-     *  The network must not have an implicit batch dimension.
-     * 
-     *  @see IFillLayer
-     * 
-     *  @return The new fill layer, or nullptr if it could not be created.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, FillOperation op);
-    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @Cast("nvinfer1::FillOperation") int op);
-
-    /** \brief Add a padding layer to the network. Only 2D padding is currently supported.
-     * 
-     *  @param input The input tensor to the layer.
-     *  @param prePadding The padding to apply to the start of the tensor.
-     *  @param postPadding The padding to apply to the end of the tensor.
-     * 
-     *  @see IPaddingLayer
-     * 
-     *  @return The new padding layer, or nullptr if it could not be created.
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) IPaddingLayer addPaddingNd(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 prePadding, @ByVal @Cast("nvinfer1::Dims*") Dims32 postPadding);
-
-    /** \brief Associate a name with all current uses of the given weights.
-     * 
-     *  The name must be set after the Weights are used in the network.
-     *  Lookup is associative. The name applies to all Weights with matching
-     *  type, value pointer, and count. If Weights with a matching value
-     *  pointer, but different type or count exists in the network, an
-     *  error message is issued, the name is rejected, and return false.
-     *  If the name has already been used for other weights,
-     *  return false. A nullptr causes the weights to become unnamed,
-     *  i.e. clears any previous name.
-     * 
-     *  @param weights The weights to be named.
-     *  @param name The name to associate with the weights.
-     * 
-     *  @return true on success. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, String name);
-    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, @Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class.
-     *  A nullptr will be returned if setErrorRecorder has not been called.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Add a dequantization layer to the network.
-     * 
-     *  @param input The input tensor to be quantized.
-     *  @param scale A tensor with the scale value.
-     * 
-     *  @see IDequantizeLayer
-     * 
-     *  \p input tensor data type must be DataType::kFLOAT.
-     *  \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
-     *  be a build-time constant.
-     * 
-     *  @return The new quantization layer, or nullptr if it could not be created.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IDequantizeLayer addDequantize(@ByRef ITensor input, @ByRef ITensor scale);
-
-    /**
-     *  \brief Add a quantization layer to the network.
-     * 
-     *  @param input The input tensor to be quantized.
-     *  @param scale A tensor with the scale value.
-     * 
-     *  @see IQuantizeLayer
-     * 
-     *  \p input tensor data type must be DataType::kFLOAT.
-     *  \p scale tensor data type must be DataType::kFLOAT. The subgraph which terminates with the \p scale tensor must
-     *  be a build-time constant.
-     * 
-     *  @return The new quantization layer, or nullptr if it could not be created.
-     *  */
-    public native @NoException(true) IQuantizeLayer addQuantize(@ByRef ITensor input, @ByRef ITensor scale);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
deleted file mode 100644
index 2580639b841..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/INoCopy.java
+++ /dev/null
@@ -1,37 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class INoCopy
- * 
- *  \brief Base class for all TensorRT interfaces that are implemented by the TensorRT libraries
- * 
- *  Objects of such classes are not movable or copyable, and should only be manipulated
- *  via pointers.
- *  */
-
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class INoCopy extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public INoCopy(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
deleted file mode 100644
index c76e084040c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IOptimizationProfile.java
+++ /dev/null
@@ -1,231 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IOptimizationProfile
- *  \brief Optimization profile for dynamic input dimensions and shape tensors.
- * 
- *  When building an ICudaEngine from an INetworkDefinition that has dynamically resizable inputs (at least
- *  one input tensor has one or more of its dimensions specified as -1) or shape input tensors, users need to specify
- *  at least one optimization profile. Optimization profiles are numbered 0, 1, ...
- *  The first optimization profile that has been defined (with index 0) will be used by the ICudaEngine whenever no
- *  optimization profile has been selected explicitly. If none of the inputs are dynamic, the default optimization
- *  profile will be generated automatically unless it is explicitly provided by the user (this is possible but not
- *  required in this case). If more than a single optimization profile is defined, users may set a target how
- *  much additional weight space should be maximally allocated to each additional profile (as a fraction of the
- *  maximum, unconstrained memory).
- * 
- *  Users set optimum input tensor dimensions, as well as minimum and maximum input tensor dimensions. The builder
- *  selects the kernels that result in the lowest runtime for the optimum input tensor dimensions, and are valid for
- *  all input tensor sizes in the valid range between minimum and maximum dimensions. A runtime error will be raised
- *  if the input tensor dimensions fall outside the valid range for this profile. Likewise, users provide minimum,
- *  optimum, and maximum values for all shape tensor input values.
- * 
- *  @see IBuilderConfig::addOptimizationProfile()
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IOptimizationProfile extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IOptimizationProfile(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
-     * 
-     *  This function must be called three times (for the minimum, optimum, and maximum) for any network input tensor
-     *  that has dynamic dimensions. If minDims, optDims, and maxDims are the minimum, optimum, and maximum dimensions,
-     *  and networkDims are the dimensions for this input tensor that are provided to the INetworkDefinition object,
-     *  then the following conditions must all hold:
-     * 
-     *  (1) minDims.nbDims == optDims.nbDims == maxDims.nbDims == networkDims.nbDims
-     *  (2) 0 <= minDims.d[i] <= optDims.d[i] <= maxDims.d[i] for i = 0, ..., networkDims.nbDims-1
-     *  (3) if networkDims.d[i] != -1, then minDims.d[i] == optDims.d[i] == maxDims.d[i] == networkDims.d[i]
-     * 
-     *  This function may (but need not be) called for an input tensor that does not have dynamic dimensions. In this
-     *  case, the third argument must always equal networkDims.
-     * 
-     *  @param inputName The input tensor name
-     *  @param select Whether to set the minimum, optimum, or maximum dimensions
-     *  @param dims The minimum, optimum, or maximum dimensions for this input tensor
-     * 
-     *  @return false if an inconsistency was detected (e.g. the rank does not match another dimension that was
-     *          previously set for the same input), true if no inconsistency was detected. Note that inputs can be
-     *          validated only partially; a full validation is performed at engine build time.
-     * 
-     *  \warning If run on DLA, minimum, optimum, and maximum dimensions must to be the same.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setDimensions(String inputName, OptProfileSelector select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
-    public native @Cast("bool") @NoException(true) boolean setDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
-
-    /**
-     *  \brief Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
-     * 
-     *  If the dimensions have not been previously set via setDimensions(), return an invalid Dims with nbDims == -1.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(String inputName, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
-
-    /**
-     *  \brief Set the minimum / optimum / maximum values for an input shape tensor.
-     * 
-     *  This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true).
-     *  This implies that the datatype of t is DataType::kINT32, the rank is either 0 or 1, and the dimensions of t
-     *  are fixed at network definition time. This function must not be called for any input tensor that is not a
-     *  shape tensor.
-     * 
-     *  Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1
-     *  if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the
-     *  minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for
-     *  i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals.
-     * 
-     *  Shape tensors are tensors that contribute to shape calculations in some way, and can contain
-     *  any int32_t values appropriate for the network. Examples:
-     * 
-     *  * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard.
-     *    The corresponding minVal[i] should be -1.
-     * 
-     *  * A shape tensor used as the stride input to ISliceLayer can contain any valid strides.
-     *    The values could be positive, negative, or zero.
-     * 
-     *  * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can
-     *    contain any non-positive values that yield a valid slice operation.
-     * 
-     *  Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization.
-     * 
-     *  @param inputName The input tensor name
-     *  @param select Whether to set the minimum, optimum, or maximum input values.
-     *  @param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements.
-     *  @param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1)
-     * 
-     *  @return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same
-     *          tensor), else true. As for setDimensions(), a full validation can only be performed at engine build
-     *          time.
-     * 
-     *  \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const IntPointer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntBuffer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const int[] values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntPointer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const IntBuffer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const int[] values, int nbValues);
-
-    /**
-     *  \brief Get the number of values for an input shape tensor.
-     * 
-     *  This will return the number of shape values if setShapeValues() has been called before for this input tensor.
-     *  Otherwise, return -1.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbShapeValues(String inputName);
-    public native @NoException(true) int getNbShapeValues(@Cast("const char*") BytePointer inputName);
-
-    /**
-     *  \brief Get the minimum / optimum / maximum values for an input shape tensor.
-     * 
-     *  If the shape values have not been set previously with setShapeValues(), this returns nullptr.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Const @NoException(true) IntPointer getShapeValues(String inputName, OptProfileSelector select);
-    public native @Const @NoException(true) IntBuffer getShapeValues(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
-
-    /**
-     *  \brief Set a target for extra GPU memory that may be used by this profile.
-     * 
-     *  @param target Additional memory that the builder should aim to maximally allocate for this profile, as a
-     *         fraction of the memory it would use if the user did not impose any constraints on memory. This
-     *         unconstrained case is the default; it corresponds to target == 1.0. If target == 0.0, the builder
-     *         aims to create the new optimization profile without allocating any additional weight memory.
-     *         Valid inputs lie between 0.0 and 1.0. This parameter is only a hint, and TensorRT does not guarantee
-     *         that the target will be reached. This parameter is ignored for the first (default) optimization profile
-     *         that is defined.
-     * 
-     *  @return true if the input is in the valid range (between 0 and 1 inclusive), else false
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setExtraMemoryTarget(float target);
-
-    /**
-     *  \brief Get the extra memory target that has been defined for this profile.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) float getExtraMemoryTarget();
-
-    /**
-     *  \brief Check whether the optimization profile can be passed to an IBuilderConfig object.
-     * 
-     *  This function performs partial validation, by e.g. checking that whenever one of the minimum, optimum, or
-     *  maximum dimensions of a tensor have been set, the others have also been set and have the same rank, as
-     *  well as checking that the optimum dimensions are always as least as large as the minimum dimensions, and
-     *  that the maximum dimensions are at least as large as the optimum dimensions. Some validation steps require
-     *  knowledge of the network definition and are deferred to engine build time.
-     * 
-     *  @return true if the optimization profile is valid and may be passed to an IBuilderConfig, else false
-     *  */
-    public native @Cast("bool") @NoException(true) boolean isValid();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
deleted file mode 100644
index d98a844963b..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPaddingLayer.java
+++ /dev/null
@@ -1,162 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IPaddingLayer
- * 
- *  \brief Layer that represents a padding operation.
- * 
- *  The padding layer adds zero-padding at the start and end of the input tensor. It only supports padding along the two
- *  innermost dimensions. Applying negative padding results in cropping of the input.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPaddingLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPaddingLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the padding that is applied at the start of the tensor.
-     * 
-     *  Negative padding results in trimming the edge by the specified amount
-     * 
-     *  @see getPrePadding
-     * 
-     *  @deprecated Superseded by setPrePaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setPrePadding(@ByVal DimsHW padding);
-
-    /**
-     *  \brief Get the padding that is applied at the start of the tensor.
-     * 
-     *  @see setPrePadding
-     * 
-     *  @deprecated Superseded by getPrePaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getPrePadding();
-
-    /**
-     *  \brief Set the padding that is applied at the end of the tensor.
-     * 
-     *  Negative padding results in trimming the edge by the specified amount
-     * 
-     *  @see getPostPadding
-     * 
-     *  @deprecated Superseded by setPostPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setPostPadding(@ByVal DimsHW padding);
-
-    /**
-     *  \brief Get the padding that is applied at the end of the tensor.
-     * 
-     *  @see setPostPadding
-     * 
-     *  @deprecated Superseded by getPostPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getPostPadding();
-
-    /**
-     *  \brief Set the padding that is applied at the start of the tensor.
-     * 
-     *  Negative padding results in trimming the edge by the specified amount.
-     * 
-     *  \warning Only 2 dimensional padding is currently supported.
-     * 
-     *  @see getPrePaddingNd
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPrePaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the padding that is applied at the start of the tensor.
-     * 
-     *  \warning Only 2 dimensional padding is currently supported.
-     * 
-     *  @see setPrePaddingNd
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePaddingNd();
-
-    /**
-     *  \brief Set the padding that is applied at the end of the tensor.
-     * 
-     *  Negative padding results in trimming the edge by the specified amount
-     * 
-     *  \warning Only 2 dimensional padding is currently supported.
-     * 
-     *  @see getPostPaddingNd
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPostPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the padding that is applied at the end of the tensor.
-     * 
-     *  \warning Only 2 dimensional padding is currently supported.
-     * 
-     *  @see setPostPaddingNd
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPaddingNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
deleted file mode 100644
index 8ecf2f593a9..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IParametricReLULayer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IParametricReLULayer
- * 
- *  \brief Layer that represents a parametric ReLU operation.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IParametricReLULayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IParametricReLULayer(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
deleted file mode 100644
index 9371e3db003..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPlugin.java
+++ /dev/null
@@ -1,27 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPlugin extends Pointer {
-    /** Empty constructor. Calls {@code super((Pointer)null)}. */
-    public IPlugin() { super((Pointer)null); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPlugin(Pointer p) { super(p); }
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
deleted file mode 100644
index 5e639796f44..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginCreator.java
+++ /dev/null
@@ -1,111 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IPluginCreator
- * 
- *  \brief Plugin creator class for user implemented layers.
- * 
- *  @see IPlugin and IPluginFactory
- *  */
-
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginCreator extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginCreator(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return the version of the API the plugin creator was compiled with.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getTensorRTVersion();
-
-    /**
-     *  \brief Return the plugin name.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) String getPluginName();
-
-    /**
-     *  \brief Return the plugin version.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) String getPluginVersion();
-
-    /**
-     *  \brief Return a list of fields that needs to be passed to createPlugin.
-     *  @see PluginFieldCollection
-     *  */
-    
-    
-    //!
-    //!
-    public native @Const @NoException(true) PluginFieldCollection getFieldNames();
-
-    /**
-     *  \brief Return a plugin object. Return nullptr in case of error.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) IPluginV2 createPlugin(String name, @Const PluginFieldCollection fc);
-    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer name, @Const PluginFieldCollection fc);
-
-    /**
-     *  \brief Called during deserialization of plugin layer. Return a plugin object.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) IPluginV2 deserializePlugin(String name, @Const Pointer serialData, @Cast("size_t") long serialLength);
-    public native @NoException(true) IPluginV2 deserializePlugin(@Cast("const char*") BytePointer name, @Const Pointer serialData, @Cast("size_t") long serialLength);
-
-    /**
-     *  \brief Set the namespace of the plugin creator based on the plugin
-     *  library it belongs to. This can be set while registering the plugin creator.
-     * 
-     *  @see IPluginRegistry::registerCreator()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setPluginNamespace(String pluginNamespace);
-    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer pluginNamespace);
-
-    /**
-     *  \brief Return the namespace of the plugin creator object.
-     *  */
-    public native @NoException(true) String getPluginNamespace();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
deleted file mode 100644
index 1d1aac43b71..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginExt.java
+++ /dev/null
@@ -1,27 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginExt extends Pointer {
-    /** Empty constructor. Calls {@code super((Pointer)null)}. */
-    public IPluginExt() { super((Pointer)null); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginExt(Pointer p) { super(p); }
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
deleted file mode 100644
index ff7c9c0c161..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginFactory.java
+++ /dev/null
@@ -1,33 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-/** Forward declaration of IPluginFactory for use by other interfaces.
-<p>
-//!
-//!
-//!
-//! */
-@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginFactory extends Pointer {
-    /** Empty constructor. Calls {@code super((Pointer)null)}. */
-    public IPluginFactory() { super((Pointer)null); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginFactory(Pointer p) { super(p); }
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
deleted file mode 100644
index 3c1caa8ca54..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginLayer.java
+++ /dev/null
@@ -1,27 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginLayer extends Pointer {
-    /** Empty constructor. Calls {@code super((Pointer)null)}. */
-    public IPluginLayer() { super((Pointer)null); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginLayer(Pointer p) { super(p); }
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
deleted file mode 100644
index ce49f026e24..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginRegistry.java
+++ /dev/null
@@ -1,135 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IPluginRegistry
- * 
- *  \brief Single registration point for all plugins in an application. It is
- *  used to find plugin implementations during engine deserialization.
- *  Internally, the plugin registry is considered to be a singleton so all
- *  plugins in an application are part of the same global registry.
- *  Note that the plugin registry is only supported for plugins of type
- *  IPluginV2 and should also have a corresponding IPluginCreator implementation.
- * 
- *  @see IPluginV2 and IPluginCreator
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- * 
- *  \warning In the automotive safety context, be sure to call IPluginRegistry::setErrorRecorder() to register
- *  an error recorder with the registry before using other methods in the registry.
- *  */
-
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginRegistry extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginRegistry(Pointer p) { super(p); }
-
-    /**
-     *  \brief Register a plugin creator. Returns false if one with same type
-     *  is already registered.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean registerCreator(@ByRef IPluginCreator creator, String pluginNamespace);
-    public native @Cast("bool") @NoException(true) boolean registerCreator(@ByRef IPluginCreator creator, @Cast("const char*") BytePointer pluginNamespace);
-
-    /**
-     *  \brief Return all the registered plugin creators and the number of
-     *  registered plugin creators. Returns nullptr if none found.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("nvinfer1::IPluginCreator*const*") PointerPointer getPluginCreatorList(IntPointer numCreators);
-
-    /**
-     *  \brief Return plugin creator based on plugin name, version, and
-     *  namespace associated with plugin during network creation.
-     *  */
-    public native @NoException(true) IPluginCreator getPluginCreator(
-            String pluginName, String pluginVersion, String pluginNamespace/*=""*/);
-    public native @NoException(true) IPluginCreator getPluginCreator(
-            String pluginName, String pluginVersion);
-    public native @NoException(true) IPluginCreator getPluginCreator(
-            @Cast("const char*") BytePointer pluginName, @Cast("const char*") BytePointer pluginVersion, @Cast("const char*") BytePointer pluginNamespace/*=""*/);
-    public native @NoException(true) IPluginCreator getPluginCreator(
-            @Cast("const char*") BytePointer pluginName, @Cast("const char*") BytePointer pluginVersion);
-    
-    
-    
-    
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief Set the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A default error recorder does not exist,
-     *  so a nullptr will be returned if setErrorRecorder has not been called, or an ErrorRecorder has not been
-     *  inherited.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Deregister a previously registered plugin creator.
-     * 
-     *  Since there may be a desire to limit the number of plugins,
-     *  this function provides a mechanism for removing plugin creators registered in TensorRT.
-     *  The plugin creator that is specified by \p creator is removed from TensorRT and no longer tracked.
-     * 
-     *  @return True if the plugin creator was deregistered, false if it was not found in the registry or otherwise
-     *  could
-     *      not be deregistered.
-     *  */
-    public native @Cast("bool") @NoException(true) boolean deregisterCreator(@Const @ByRef IPluginCreator creator);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
deleted file mode 100644
index a4ea26eb3bd..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2.java
+++ /dev/null
@@ -1,297 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IPluginV2
- * 
- *  \brief Plugin class for user-implemented layers.
- * 
- *  Plugins are a mechanism for applications to implement custom layers. When
- *  combined with IPluginCreator it provides a mechanism to register plugins and
- *  look up the Plugin Registry during de-serialization.
- * 
- *  @see IPluginCreator
- *  @see IPluginRegistry
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginV2 extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginV2(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return the API version with which this plugin was built.
-     * 
-     *  Do not override this method as it is used by the TensorRT library to maintain backwards-compatibility with plugins.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getTensorRTVersion();
-
-    /**
-     *  \brief Return the plugin type. Should match the plugin name returned by the corresponding plugin creator
-     *  @see IPluginCreator::getPluginName()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) String getPluginType();
-
-    /**
-     *  \brief Return the plugin version. Should match the plugin version returned by the corresponding plugin creator
-     *  @see IPluginCreator::getPluginVersion()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getPluginVersion();
-
-    /**
-     *  \brief Get the number of outputs from the layer.
-     * 
-     *  @return The number of outputs.
-     * 
-     *  This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
-     *  prior to any call to initialize().
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbOutputs();
-
-    /**
-     *  \brief Get the dimension of an output tensor.
-     * 
-     *  @param index The index of the output tensor.
-     *  @param inputs The input tensors.
-     *  @param nbInputDims The number of input tensors.
-     * 
-     *  This function is called by the implementations of INetworkDefinition and IBuilder. In particular, it is called
-     *  prior to any call to initialize().
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions(int index, @Cast("const nvinfer1::Dims*") Dims32 inputs, int nbInputDims);
-
-    /**
-     *  \brief Check format support.
-     * 
-     *  @param type DataType requested.
-     *  @param format PluginFormat requested.
-     *  @return true if the plugin supports the type-format combination.
-     * 
-     *  This function is called by the implementations of INetworkDefinition, IBuilder, and
-     *  safe::ICudaEngine/ICudaEngine. In particular, it is called when creating an engine and when deserializing an
-     *  engine.
-     * 
-     *  \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
-     *  will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
-     *  or PluginV2DynamicExt for other PluginFormats.
-     * 
-     *  \warning DataType:kBOOL not supported.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean supportsFormat(DataType type, @Cast("nvinfer1::PluginFormat") TensorFormat format);
-    public native @Cast("bool") @NoException(true) boolean supportsFormat(@Cast("nvinfer1::DataType") int type, @Cast("nvinfer1::PluginFormat") int format);
-
-    /**
-     *  \brief Configure the layer.
-     * 
-     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
-     *  algorithm choices on the basis of its weights, dimensions, and maximum batch size.
-     * 
-     *  @param inputDims The input tensor dimensions.
-     *  @param nbInputs The number of inputs.
-     *  @param outputDims The output tensor dimensions.
-     *  @param nbOutputs The number of outputs.
-     *  @param type The data type selected for the engine.
-     *  @param format The format selected for the engine.
-     *  @param maxBatchSize The maximum batch size.
-     * 
-     *  The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
-     *  3-dimensional CHW dimensions).
-     * 
-     *  \warning for the format field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and PluginFormat::kCHW32
-     *  will not be passed in, this is to keep backward compatibility with TensorRT 5.x series.  Use PluginV2IOExt
-     *  or PluginV2DynamicExt for other PluginFormats.
-     * 
-     *  \warning DataType:kBOOL not supported.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void configureWithFormat(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            DataType type, @Cast("nvinfer1::PluginFormat") TensorFormat format, int maxBatchSize);
-    public native @NoException(true) void configureWithFormat(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType") int type, @Cast("nvinfer1::PluginFormat") int format, int maxBatchSize);
-
-    /**
-     *  \brief Initialize the layer for execution. This is called when the engine is created.
-     * 
-     *  @return 0 for success, else non-zero (which will cause engine termination).
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int initialize();
-
-    /**
-     *  \brief Release resources acquired during plugin layer initialization. This is called when the engine is
-     *  destroyed.
-     *  @see initialize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void terminate();
-
-    /**
-     *  \brief Find the workspace size required by the layer.
-     * 
-     *  This function is called during engine startup, after initialize(). The workspace size returned should be
-     *  sufficient for any batch size up to the maximum.
-     * 
-     *  @return The workspace size.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("size_t") @NoException(true) long getWorkspaceSize(int maxBatchSize);
-
-    /**
-     *  \brief Execute the layer.
-     * 
-     *  @param batchSize The number of inputs in the batch.
-     *  @param inputs The memory for the input tensors.
-     *  @param outputs The memory for the output tensors.
-     *  @param workspace Workspace for execution.
-     *  @param stream The stream in which to execute the kernels.
-     * 
-     *  @return 0 for success, else non-zero (which will cause engine termination).
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int enqueue(int batchSize, @Cast("const void*const*") PointerPointer inputs, @Cast("void*const*") PointerPointer outputs, Pointer workspace,
-            CUstream_st stream);
-    public native @NoException(true) int enqueue(int batchSize, @Cast("const void*const*") @ByPtrPtr Pointer inputs, @Cast("void*const*") @ByPtrPtr Pointer outputs, Pointer workspace,
-            CUstream_st stream);
-
-    /**
-     *  \brief Find the size of the serialization buffer required.
-     * 
-     *  @return The size of the serialization buffer.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("size_t") @NoException(true) long getSerializationSize();
-
-    /**
-     *  \brief Serialize the layer.
-     * 
-     *  @param buffer A pointer to a buffer to serialize data. Size of buffer must be equal to value returned by
-     *  getSerializationSize.
-     * 
-     *  @see getSerializationSize()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void serialize(Pointer buffer);
-
-    /**
-     *  \brief Destroy the plugin object. This will be called when the network, builder or engine is destroyed.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void destroy();
-
-    /**
-     *  \brief Clone the plugin object. This copies over internal plugin parameters and returns a new plugin object with
-     *  these parameters.
-     * 
-     *  The TensorRT runtime calls clone() to clone the plugin when an execution context is created for an engine,
-     *  after the engine has been created.  The runtime does not call initialize() on the cloned plugin,
-     *  so the cloned plugin should be created in an initialized state.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) IPluginV2 clone();
-
-    /**
-     *  \brief Set the namespace that this plugin object belongs to. Ideally, all plugin
-     *  objects from the same plugin library should have the same namespace.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setPluginNamespace(String pluginNamespace);
-    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer pluginNamespace);
-
-    /**
-     *  \brief Return the namespace of the plugin object.
-     *  */
-    public native @NoException(true) String getPluginNamespace();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
deleted file mode 100644
index 3ec342c80fd..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2DynamicExt.java
+++ /dev/null
@@ -1,233 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IPluginV2DynamicExt
- * 
- *  Similar to IPluginV2Ext, but with support for dynamic shapes.
- * 
- *  Clients should override the public methods, including the following inherited methods:
- * 
- *      virtual int32_t getNbOutputs() const noexcept = 0;
- *      virtual nvinfer1::DataType getOutputDataType(int32_t index, const nvinfer1::DataType* inputTypes, int32_t
- *      nbInputs) const noexcept = 0; virtual size_t getSerializationSize() const noexcept = 0; virtual void
- *      serialize(void* buffer) const noexcept = 0; virtual void destroy() noexcept = 0; virtual void
- *      setPluginNamespace(const char* pluginNamespace) noexcept = 0; virtual const char* getPluginNamespace() const
- *      noexcept = 0;
- * 
- *  For getOutputDataType, the inputTypes will always be DataType::kFLOAT or DataType::kINT32,
- *  and the returned type is canonicalized to DataType::kFLOAT if it is DataType::kHALF or DataType:kINT8.
- *  Details about the floating-point precision are elicited later by method supportsFormatCombination.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginV2DynamicExt extends IPluginV2Ext {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginV2DynamicExt(Pointer p) { super(p); }
-
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IPluginV2DynamicExt clone();
-
-    /**
-     *  \brief Get expressions for computing dimensions of an output tensor from dimensions of the input tensors.
-     * 
-     *  @param outputIndex The index of the output tensor
-     *  @param inputs Expressions for dimensions of the input tensors
-     *  @param nbInputs The number of input tensors
-     *  @param exprBuilder Object for generating new expressions
-     * 
-     *  This function is called by the implementations of IBuilder during analysis of the network.
-     * 
-     *  Example #1: A plugin has a single output that transposes the last two dimensions of the plugin's single input.
-     *  The body of the override of getOutputDimensions can be:
-     * 
-     *      DimsExprs output(inputs[0]);
-     *      std::swap(output.d[output.nbDims-1], output.d[output.nbDims-2]);
-     *      return output;
-     * 
-     *  Example #2: A plugin concatenates its two inputs along the first dimension.
-     *  The body of the override of getOutputDimensions can be:
-     * 
-     *      DimsExprs output(inputs[0]);
-     *      output.d[0] = exprBuilder.operation(DimensionOperation::kSUM, *inputs[0].d[0], *inputs[1].d[0]);
-     *      return output;
-     *  */
-    
-    
-    //!
-    //!
-    public native @ByVal @NoException(true) DimsExprs getOutputDimensions(
-            int outputIndex, @Const DimsExprs inputs, int nbInputs, @ByRef IExprBuilder exprBuilder);
-
-    /**
-     *  Limit on number of format combinations accepted.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    @MemberGetter public static native int kFORMAT_COMBINATION_LIMIT();
-    public static final int kFORMAT_COMBINATION_LIMIT = kFORMAT_COMBINATION_LIMIT();
-
-    /**
-     *  \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
-     * 
-     *  For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
-     *  Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs-1.
-     * 
-     *  TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
-     *  by inOut[pos].format and inOut[pos].type.  The override should return true if that format/datatype at inOut[pos]
-     *  are supported by the plugin.  If support is conditional on other input/output formats/datatypes, the plugin can
-     *  make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
-     *  that the plugin supports.  The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
-     *  which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
-     * 
-     *  Some examples:
-     * 
-     *  * A definition for a plugin that supports only FP16 NCHW:
-     * 
-     *          return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
-     * 
-     *  * A definition for a plugin that supports only FP16 NCHW for its two inputs,
-     *    and FP32 NCHW for its single output:
-     * 
-     *          return inOut.format[pos] == TensorFormat::kLINEAR && (inOut.type[pos] == pos < 2 ?  DataType::kHALF :
-     *          DataType::kFLOAT);
-     * 
-     *  * A definition for a "polymorphic" plugin with two inputs and one output that supports
-     *    any format or type, but the inputs and output must have the same format and type:
-     * 
-     *          return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
-     * 
-     *  Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean supportsFormatCombination(
-            int pos, @Const PluginTensorDesc inOut, int nbInputs, int nbOutputs);
-
-    /**
-     *  \brief Configure the plugin.
-     * 
-     *  configurePlugin() can be called multiple times in both the build and execution phases. The build phase happens
-     *  before initialize() is called and only occurs during creation of an engine by IBuilder. The execution phase
-     *  happens after initialize() is called and occurs during both creation of an engine by IBuilder and execution
-     *  of an engine by IExecutionContext.
-     * 
-     *  Build phase:
-     *  IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for profiling but not for any
-     *  specific input size. This provides an opportunity for the plugin to make algorithmic choices on the basis of
-     *  input and output formats, along with the bound of possible dimensions. The min and max value of the
-     *  DynamicPluginTensorDesc correspond to the kMIN and kMAX value of the current profile that the plugin is being
-     *  profiled for, with the desc.dims field corresponding to the dimensions of plugin specified at network creation.
-     *  Wildcard dimensions will exist during this phase in the desc.dims field.
-     * 
-     *  Execution phase:
-     *  IPluginV2DynamicExt->configurePlugin is called when a plugin is being prepared for executing the plugin for a
-     *  specific dimensions. This provides an opportunity for the plugin to change algorithmic choices based on the
-     *  explicit input dimensions stored in desc.dims field.
-     *   * IBuilder will call this function once per profile, with desc.dims resolved to the values specified by the
-     *   kOPT
-     *     field of the current profile. Wildcard dimensions will not exist during this phase.
-     *   * IExecutionContext will call this during the next subsequent instance enqueue[V2]() or execute[V2]() if:
-     *     - The batch size is changed from previous call of execute()/enqueue() if hasImplicitBatchDimension() returns
-     *     true.
-     *     - The optimization profile is changed via setOptimizationProfile() or setOptimizationProfileAsync().
-     *     - An input shape binding is changed via setInputShapeBinding().
-     *     - An input execution binding is changed via setBindingDimensions().
-     *  \warning The execution phase is timing critical during IExecutionContext but is not part of the timing loop when
-     *  called from IBuilder. Performance bottlenecks of configurePlugin won't show up during engine building but will
-     *  be visible during execution after calling functions that trigger layer resource updates.
-     * 
-     *  @param in The input tensors attributes that are used for configuration.
-     *  @param nbInputs Number of input tensors.
-     *  @param out The output tensors attributes that are used for configuration.
-     *  @param nbOutputs Number of output tensors.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void configurePlugin(@Const DynamicPluginTensorDesc in, int nbInputs,
-            @Const DynamicPluginTensorDesc out, int nbOutputs);
-
-    /**
-     *  \brief Find the workspace size required by the layer.
-     * 
-     *  This function is called after the plugin is configured, and possibly during execution.
-     *  The result should be a sufficient workspace size to deal with inputs and outputs of the given size
-     *  or any smaller problem.
-     * 
-     *  @return The workspace size.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("size_t") @NoException(true) long getWorkspaceSize(@Const PluginTensorDesc inputs, int nbInputs, @Const PluginTensorDesc outputs,
-            int nbOutputs);
-
-    /**
-     *  \brief Execute the layer.
-     * 
-     *  @param inputDesc how to interpret the memory for the input tensors.
-     *  @param outputDesc how to interpret the memory for the output tensors.
-     *  @param inputs The memory for the input tensors.
-     *  @param outputs The memory for the output tensors.
-     *  @param workspace Workspace for execution.
-     *  @param stream The stream in which to execute the kernels.
-     * 
-     *  @return 0 for success, else non-zero (which will cause engine termination).
-     *  */
-    public native @NoException(true) int enqueue(@Const PluginTensorDesc inputDesc, @Const PluginTensorDesc outputDesc,
-            @Cast("const void*const*") PointerPointer inputs, @Cast("void*const*") PointerPointer outputs, Pointer workspace, CUstream_st stream);
-    public native @NoException(true) int enqueue(@Const PluginTensorDesc inputDesc, @Const PluginTensorDesc outputDesc,
-            @Cast("const void*const*") @ByPtrPtr Pointer inputs, @Cast("void*const*") @ByPtrPtr Pointer outputs, Pointer workspace, CUstream_st stream);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
deleted file mode 100644
index d9330ddd2e2..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Ext.java
+++ /dev/null
@@ -1,193 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IPluginV2Ext
- * 
- *  \brief Plugin class for user-implemented layers.
- * 
- *  Plugins are a mechanism for applications to implement custom layers. This
- *  interface provides additional capabilities to the IPluginV2 interface by
- *  supporting different output data types and broadcast across batch.
- * 
- *  @see IPluginV2
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginV2Ext extends IPluginV2 {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginV2Ext(Pointer p) { super(p); }
-
-    /**
-     *  \brief Return the DataType of the plugin output at the requested index.
-     *  The default behavior should be to return the type of the first input, or DataType::kFLOAT if the layer has no inputs.
-     *  The returned data type must have a format that is supported by the plugin.
-     *  @see supportsFormat()
-     * 
-     *  \warning DataType:kBOOL not supported.
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getOutputDataType(
-            int index, @Cast("nvinfer1::DataType*") IntPointer inputTypes, int nbInputs);
-    public native @NoException(true) @Cast("nvinfer1::DataType") int getOutputDataType(
-            int index, @Cast("nvinfer1::DataType*") IntBuffer inputTypes, int nbInputs);
-    public native @NoException(true) DataType getOutputDataType(
-            int index, @Cast("nvinfer1::DataType*") int[] inputTypes, int nbInputs);
-
-    /** \brief Return true if output tensor is broadcast across a batch.
-     * 
-     *  @param outputIndex The index of the output
-     *  @param inputIsBroadcasted The ith element is true if the tensor for the ith input is broadcast across a batch.
-     *  @param nbInputs The number of inputs
-     * 
-     *  The values in inputIsBroadcasted refer to broadcasting at the semantic level,
-     *  i.e. are unaffected by whether method canBroadcastInputAcrossBatch requests
-     *  physical replication of the values.
-     *  */
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isOutputBroadcastAcrossBatch(
-            int outputIndex, @Cast("const bool*") BoolPointer inputIsBroadcasted, int nbInputs);
-    public native @Cast("bool") @NoException(true) boolean isOutputBroadcastAcrossBatch(
-            int outputIndex, @Cast("const bool*") boolean[] inputIsBroadcasted, int nbInputs);
-
-    /** \brief Return true if plugin can use input that is broadcast across batch without replication.
-     * 
-     *  @param inputIndex Index of input that could be broadcast.
-     * 
-     *  For each input whose tensor is semantically broadcast across a batch,
-     *  TensorRT calls this method before calling configurePlugin.
-     *  If canBroadcastInputAcrossBatch returns true, TensorRT will not replicate the input tensor;
-     *  i.e., there will be a single copy that the plugin should share across the batch.
-     *  If it returns false, TensorRT will replicate the input tensor
-     *  so that it appears like a non-broadcasted tensor.
-     * 
-     *  This method is called only for inputs that can be broadcast.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean canBroadcastInputAcrossBatch(int inputIndex);
-
-    /**
-     *  \brief Configure the layer with input and output data types.
-     * 
-     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
-     *  algorithm choices on the basis of its weights, dimensions, data types and maximum batch size.
-     * 
-     *  @param inputDims The input tensor dimensions.
-     *  @param nbInputs The number of inputs.
-     *  @param outputDims The output tensor dimensions.
-     *  @param nbOutputs The number of outputs.
-     *  @param inputTypes The data types selected for the plugin inputs.
-     *  @param outputTypes The data types selected for the plugin outputs.
-     *  @param inputIsBroadcast True for each input that the plugin must broadcast across the batch.
-     *  @param outputIsBroadcast True for each output that TensorRT will broadcast across the batch.
-     *  @param floatFormat The format selected for the engine for the floating point inputs/outputs.
-     *  @param maxBatchSize The maximum batch size.
-     * 
-     *  The dimensions passed here do not include the outermost batch size (i.e. for 2-D image networks, they will be
-     *  3-dimensional CHW dimensions). When inputIsBroadcast or outputIsBroadcast is true, the outermost batch size for
-     *  that input or output should be treated as if it is one.
-     *  \ref inputIsBroadcast[i] is true only if the input is semantically broadcast across the batch and
-     *  \ref canBroadcastInputAcrossBatch(i) returned true.
-     *  \ref outputIsBroadcast[i] is true only if \ref isOutputBroadcastAcrossBatch(i) returns true.
-     * 
-     *  \warning for the floatFormat field, the values PluginFormat::kCHW4, PluginFormat::kCHW16, and
-     *  PluginFormat::kCHW32 will not be passed in, this is to keep backward compatibility with TensorRT 5.x series. Use
-     *  PluginV2IOExt or PluginV2DynamicExt for other PluginFormats.
-     *  */
-
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") IntPointer inputTypes, @Cast("nvinfer1::DataType*") IntPointer outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
-            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") IntBuffer inputTypes, @Cast("nvinfer1::DataType*") IntBuffer outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
-            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") int[] inputTypes, @Cast("nvinfer1::DataType*") int[] outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
-            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") IntPointer inputTypes, @Cast("nvinfer1::DataType*") IntPointer outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
-            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") IntBuffer inputTypes, @Cast("nvinfer1::DataType*") IntBuffer outputTypes, @Cast("const bool*") BoolPointer inputIsBroadcast,
-            @Cast("const bool*") BoolPointer outputIsBroadcast, @Cast("nvinfer1::PluginFormat") TensorFormat floatFormat, int maxBatchSize);
-    public native @NoException(true) void configurePlugin(@Cast("const nvinfer1::Dims*") Dims32 inputDims, int nbInputs, @Cast("const nvinfer1::Dims*") Dims32 outputDims, int nbOutputs,
-            @Cast("nvinfer1::DataType*") int[] inputTypes, @Cast("nvinfer1::DataType*") int[] outputTypes, @Cast("const bool*") boolean[] inputIsBroadcast,
-            @Cast("const bool*") boolean[] outputIsBroadcast, @Cast("nvinfer1::PluginFormat") int floatFormat, int maxBatchSize);
-
-    /**
-     *  \brief Attach the plugin object to an execution context and grant the plugin the access to some context resource.
-     * 
-     *  @param cudnn The CUDNN context handle of the execution context
-     *  @param cublas The cublas context handle of the execution context
-     *  @param allocator The allocator used by the execution context
-     * 
-     *  This function is called automatically for each plugin when a new execution context is created. If the context
-     *  was created without resources, this method is not called until the resources are assigned. It is also called if
-     *  new resources are assigned to the context.
-     * 
-     *  If the plugin needs per-context resource, it can be allocated here.
-     *  The plugin can also get context-owned CUDNN and CUBLAS context here.
-     * 
-     *  \note In the automotive safety context, the CUDNN and CUBLAS parameters will be nullptr because CUDNN and CUBLAS
-     *        is not used by the safe runtime.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void attachToContext(cudnnContext arg0, cublasContext arg1, IGpuAllocator arg2);
-
-    /**
-     *  \brief Detach the plugin object from its execution context.
-     * 
-     *  This function is called automatically for each plugin when a execution context is destroyed or the context
-     *  resources are unassigned from the context.
-     * 
-     *  If the plugin owns per-context resource, it can be released here.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void detachFromContext();
-
-    /**
-     *  \brief Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin object with these parameters.
-     *  If the source plugin is pre-configured with configurePlugin(), the returned object should also be pre-configured. The returned object should allow attachToContext() with a new execution context.
-     *  Cloned plugin objects can share the same per-engine immutable resource (e.g. weights) with the source object (e.g. via ref-counting) to avoid duplication.
-     *  */
-    public native @NoException(true) IPluginV2Ext clone();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
deleted file mode 100644
index 4024615c5c1..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2IOExt.java
+++ /dev/null
@@ -1,99 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IPluginV2IOExt
- * 
- *  \brief Plugin class for user-implemented layers.
- * 
- *  Plugins are a mechanism for applications to implement custom layers. This interface provides additional
- *  capabilities to the IPluginV2Ext interface by extending different I/O data types and tensor formats.
- * 
- *  @see IPluginV2Ext
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginV2IOExt extends IPluginV2Ext {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginV2IOExt(Pointer p) { super(p); }
-
-    /**
-     *  \brief Configure the layer.
-     * 
-     *  This function is called by the builder prior to initialize(). It provides an opportunity for the layer to make
-     *  algorithm choices on the basis of I/O PluginTensorDesc and the maximum batch size.
-     * 
-     *  @param in The input tensors attributes that are used for configuration.
-     *  @param nbInput Number of input tensors.
-     *  @param out The output tensors attributes that are used for configuration.
-     *  @param nbOutput Number of output tensors.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void configurePlugin(
-            @Const PluginTensorDesc in, int nbInput, @Const PluginTensorDesc out, int nbOutput);
-
-    /**
-     *  \brief Return true if plugin supports the format and datatype for the input/output indexed by pos.
-     * 
-     *  For this method inputs are numbered 0..(nbInputs-1) and outputs are numbered nbInputs..(nbInputs+nbOutputs-1).
-     *  Using this numbering, pos is an index into InOut, where 0 <= pos < nbInputs+nbOutputs-1.
-     * 
-     *  TensorRT invokes this method to ask if the input/output indexed by pos supports the format/datatype specified
-     *  by inOut[pos].format and inOut[pos].type. The override should return true if that format/datatype at inOut[pos]
-     *  are supported by the plugin. If support is conditional on other input/output formats/datatypes, the plugin can
-     *  make its result conditional on the formats/datatypes in inOut[0..pos-1], which will be set to values
-     *  that the plugin supports. The override should not inspect inOut[pos+1..nbInputs+nbOutputs-1],
-     *  which will have invalid values.  In other words, the decision for pos must be based on inOut[0..pos] only.
-     * 
-     *  Some examples:
-     * 
-     *  * A definition for a plugin that supports only FP16 NCHW:
-     * 
-     *          return inOut.format[pos] == TensorFormat::kLINEAR && inOut.type[pos] == DataType::kHALF;
-     * 
-     *  * A definition for a plugin that supports only FP16 NCHW for its two inputs,
-     *    and FP32 NCHW for its single output:
-     * 
-     *          return inOut.format[pos] == TensorFormat::kLINEAR &&
-     *                 (inOut.type[pos] == pos < 2 ?  DataType::kHALF : DataType::kFLOAT);
-     * 
-     *  * A definition for a "polymorphic" plugin with two inputs and one output that supports
-     *    any format or type, but the inputs and output must have the same format and type:
-     * 
-     *          return pos == 0 || (inOut.format[pos] == inOut.format[0] && inOut.type[pos] == inOut.type[0]);
-     * 
-     *  Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations.
-     *  */
-    public native @Cast("bool") @NoException(true) boolean supportsFormatCombination(
-            int pos, @Const PluginTensorDesc inOut, int nbInputs, int nbOutputs);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
deleted file mode 100644
index 52aa6126a81..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPluginV2Layer.java
+++ /dev/null
@@ -1,43 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IPluginV2Layer
- * 
- *  \brief Layer type for pluginV2
- * 
- *  @see IPluginV2
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPluginV2Layer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginV2Layer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Get the plugin for the layer.
-     * 
-     *  @see IPluginV2
-     *  */
-    public native @ByRef @NoException(true) IPluginV2 getPlugin();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
deleted file mode 100644
index c3c82b8929e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IPoolingLayer.java
+++ /dev/null
@@ -1,433 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/** \class IPoolingLayer
- * 
- *  \brief A Pooling layer in a network definition.
- * 
- *  The layer applies a reduction operation within a window over the input.
- * 
- *  \warning When running pooling layer with DeviceType::kDLA in Int8 mode, the dynamic ranges
- *  for input and output tensors must be equal.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IPoolingLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPoolingLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the type of activation to be performed.
-     * 
-     *  DLA only supports kMAX and kAVERAGE pooling types.
-     * 
-     *  @see getPoolingType(), PoolingType
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPoolingType(PoolingType type);
-    public native @NoException(true) void setPoolingType(@Cast("nvinfer1::PoolingType") int type);
-
-    /**
-     *  \brief Get the type of activation to be performed.
-     * 
-     *  @see setPoolingType(), PoolingType
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) PoolingType getPoolingType();
-
-    /**
-     *  \brief Set the window size for pooling.
-     * 
-     *  If executing this layer on DLA, both height and width of window size must be in the range [1,8].
-     * 
-     *  @see getWindowSize()
-     * 
-     *  @deprecated Superseded by setWindowSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setWindowSize(@ByVal DimsHW windowSize);
-
-    /**
-     *  \brief Get the window size for pooling.
-     * 
-     *  @see setWindowSize()
-     * 
-     *  @deprecated Superseded by getWindowSizeNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getWindowSize();
-
-    /**
-     *  \brief Set the stride for pooling.
-     * 
-     *  Default: 1
-     * 
-     *  If executing this layer on DLA, both height and width of stride must be in the range [1,16].
-     * 
-     *  @see getStride()
-     * 
-     *  @deprecated Superseded by setStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setStride(@ByVal DimsHW stride);
-
-    /**
-     *  \brief Get the stride for pooling.
-     * 
-     *  @see setStride()
-     * 
-     *  @deprecated Superseded by getStrideNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getStride();
-
-    /**
-     *  \brief Set the padding for pooling.
-     * 
-     *  Default: 0
-     * 
-     *  If executing this layer on DLA, both height and width of padding must be in the range [0,7].
-     * 
-     *  @see getPadding()
-     * 
-     *  @deprecated Superseded by setPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setPadding(@ByVal DimsHW padding);
-
-    /**
-     *  \brief Get the padding for pooling.
-     * 
-     *  Default: 0
-     * 
-     *  @see setPadding()
-     * 
-     *  @deprecated Superseded by getPaddingNd and will be removed in TensorRT 9.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @ByVal @NoException(true) DimsHW getPadding();
-
-    /**
-     *  \brief Set the blending factor for the max_average_blend mode:
-     *  max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
-     *  blendFactor is a user value in [0,1] with the default value of 0.0
-     *  This value only applies for the kMAX_AVERAGE_BLEND mode.
-     * 
-     *  Since DLA does not support kMAX_AVERAGE_BLEND, blendFactor is ignored on the DLA.
-     * 
-     *  @see getBlendFactor()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBlendFactor(float blendFactor);
-
-    /**
-     *  \brief Get the blending factor for the max_average_blend mode:
-     *  max_average_blendPool = (1-blendFactor)*maxPool + blendFactor*avgPool
-     *  blendFactor is a user value in [0,1] with the default value of 0.0
-     *  In modes other than kMAX_AVERAGE_BLEND, blendFactor is ignored.
-     * 
-     *  @see setBlendFactor()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) float getBlendFactor();
-
-    /**
-     *  \brief Set whether average pooling uses as a denominator the overlap area between the window
-     *  and the unpadded input.
-     *  If this is not set, the denominator is the overlap between the pooling window and the padded input.
-     * 
-     *  Default: true
-     * 
-     *  \note DLA supports only inclusive padding, and thus when executing this layer on DLA, this must be explicitly
-     *  set to false.
-     * 
-     *  @see getAverageCountExcludesPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAverageCountExcludesPadding(@Cast("bool") boolean exclusive);
-
-    /**
-     *  \brief Get whether average pooling uses as a denominator the overlap area between the window
-     *  and the unpadded input.
-     * 
-     *  @see setAverageCountExcludesPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getAverageCountExcludesPadding();
-
-    /**
-     *  \brief Set the multi-dimension pre-padding for pooling.
-     * 
-     *  The start of the input will be padded by this number of elements in each dimension.
-     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,7].
-     * 
-     *  @see getPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the pre-padding.
-     * 
-     *  @see setPrePadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-
-    /**
-     *  \brief Set the multi-dimension post-padding for pooling.
-     * 
-     *  The end of the input will be padded by this number of elements in each dimension.
-     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,7].
-     * 
-     *  @see getPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the padding.
-     * 
-     *  @see setPostPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-
-    /**
-     *  \brief Set the padding mode.
-     * 
-     *  Padding mode takes precedence if both setPaddingMode and setPre/PostPadding are used.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see getPaddingMode() */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-
-    /**
-     *  \brief Get the padding mode.
-     * 
-     *  Default: kEXPLICIT_ROUND_DOWN
-     * 
-     *  @see setPaddingMode() */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) PaddingMode getPaddingMode();
-
-    /**
-     *  \brief Set the multi-dimension window size for pooling.
-     * 
-     *  If executing this layer on DLA, only support 2D window size, both height and width of window size must be in the
-     *  range [1,8].
-     * 
-     *  @see getWindowSizeNd() setWindowSize() getWindowSize()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setWindowSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-
-    /**
-     *  \brief Get the multi-dimension window size for pooling.
-     * 
-     *  @see setWindowSizeNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getWindowSizeNd();
-
-    /**
-     *  \brief Set the multi-dimension stride for pooling.
-     * 
-     *  Default: (1, 1, ..., 1)
-     * 
-     *  If executing this layer on DLA, only support 2D stride, both height and width of stride must be in the range
-     *  [1,16].
-     * 
-     *  @see getStrideNd() setStride() getStride()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-
-    /**
-     *  \brief Get the multi-dimension stride for pooling.
-     * 
-     *  @see setStrideNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-
-    /**
-     *  \brief Set the multi-dimension padding for pooling.
-     * 
-     *  The input will be padded by this number of elements in each dimension.
-     *  Padding is symmetric.
-     *  Padding value depends on pooling type, -inf is used for max pooling and zero padding for average pooling.
-     * 
-     *  Default: (0, 0, ..., 0)
-     * 
-     *  If executing this layer on DLA, only support 2D padding, both height and width of padding must be in the range
-     *  [0,7].
-     * 
-     *  @see getPaddingNd() setPadding() getPadding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-
-    /**
-     *  \brief Get the multi-dimension padding for pooling.
-     * 
-     *  If the padding is asymmetric, the pre-padding is returned.
-     * 
-     *  @see setPaddingNd()
-     *  */
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
deleted file mode 100644
index 29ad4ca232a..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IProfiler.java
+++ /dev/null
@@ -1,57 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IProfiler
- * 
- *  \brief Application-implemented interface for profiling.
- * 
- *  When this class is added to an execution context, the profiler will be called once per layer for each invocation of execute().
- *  Note that enqueue() does not currently support profiling.
- * 
- *  The profiler will only be called after execution is complete. It has a small impact on execution time.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IProfiler extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IProfiler() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IProfiler(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IProfiler(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IProfiler position(long position) {
-        return (IProfiler)super.position(position);
-    }
-    @Override public IProfiler getPointer(long i) {
-        return new IProfiler((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Layer time reporting callback.
-     * 
-     *  @param layerName The name of the layer, set when constructing the network definition.
-     *  @param ms The time in milliseconds to execute the layer.
-     *  */
-    @Virtual(true) public native @NoException(true) void reportLayerTime(String layerName, float ms);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
deleted file mode 100644
index 8cbb7639197..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IQuantizeLayer.java
+++ /dev/null
@@ -1,106 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IQuantizeLayer
- * 
- *  \brief A Quantize layer in a network definition.
- * 
- *  This layer accepts a floating-point data input tensor, and uses the scale and zeroPt inputs to
- *  quantize the data to an 8-bit signed integer according to:
- *  \p output = clamp(round(\p input / \p scale) + \p zeroPt)
- * 
- *  Rounding type is rounding-to-nearest ties-to-even (https://en.wikipedia.org/wiki/Rounding#Round_half_to_even).
- *  Clamping is in the range [-128, 127].
- * 
- *  The first input (index 0) is the tensor to be quantized.
- *  The second (index 1) and third (index 2) are the scale and zero point respectively.
- *  Each of \p scale and \p zeroPt must be either a scalar, or a 1D tensor.
- * 
- *  The \p zeroPt tensor is optional, and if not set, will be assumed to be zero.  Its data type must be
- *  DataType::kINT8. \p zeroPt must only contain zero-valued coefficients, because only symmetric quantization is
- *  supported.
- *  The \p scale value must be either a scalar for per-tensor quantization, or a 1D tensor for per-channel
- *  quantization. All \p scale coefficients must have positive values.  The size of the 1-D \p scale tensor must match
- *  the size of the quantization axis. The size of the \p scale must match the size of the \p zeroPt.
- * 
- *  The subgraph which terminates with the \p scale tensor must be a build-time constant.  The same restrictions apply
- *  to the \p zeroPt.
- *  The output type, if constrained, must be constrained to DataType::kINT8. The input type, if constrained, must be
- *  constrained to DataType::kFLOAT (FP16 input is not supported).
- *  The output size is the same as the input size. The quantization axis is in reference to the input tensor's
- *  dimensions.
- * 
- *  IQuantizeLayer only supports DataType::kFLOAT precision and will default to this precision during instantiation.
- *  IQuantizeLayer only supports DataType::kINT8 output.
- * 
- *  As an example of the operation of this layer, imagine a 4D NCHW activation input which can be quantized using a
- *  single scale coefficient (referred to as per-tensor quantization):
- *      For each n in N:
- *          For each c in C:
- *              For each h in H:
- *                  For each w in W:
- *                      output[n,c,h,w] = clamp(round(\p input[n,c,h,w] / \p scale) + \p zeroPt)
- * 
- *  Per-channel quantization is supported only for weight inputs. Thus, Activations cannot be quantized per-channel.
- *  As an example of per-channel operation, imagine a 4D KCRS weights input and K (dimension 0) as the quantization
- *  axis. The scale is an array of coefficients, and must have the same size as the quantization axis.
- *      For each k in K:
- *          For each c in C:
- *              For each r in R:
- *                  For each s in S:
- *                      output[k,c,r,s] = clamp(round(\p input[k,c,r,s] / \p scale[k]) + \p zeroPt[k])
- * 
- *  \note Only symmetric quantization is supported.
- *  \note Currently the only allowed build-time constant \p scale and \zeroPt subgraphs are:
- *  1. Constant -> Quantize
- *  2. Constant -> Cast -> Quantize
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IQuantizeLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IQuantizeLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Get the quantization axis.
-     * 
-     *  @return axis parameter set by setAxis().
-     *  The return value is the index of the quantization axis in the input tensor's dimensions.
-     *  A value of -1 indicates per-tensor quantization.
-     *  The default value is -1.
-     *  */
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getAxis();
-    /**
-     *  \brief Set the quantization axis.
-     * 
-     *  Set the index of the quantization axis (with reference to the input tensor's dimensions).
-     *  The axis must be a valid axis if the scale tensor has more than one coefficient.
-     *  The axis value will be ignored if the scale tensor has exactly one coefficient (per-tensor quantization).
-     *  */
-    public native @NoException(true) void setAxis(int axis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
deleted file mode 100644
index 5a29e8edb16..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRNNv2Layer.java
+++ /dev/null
@@ -1,339 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IRNNv2Layer
- * 
- *  \brief An RNN layer in a network definition, version 2.
- * 
- *  This layer supersedes IRNNLayer.
- * 
- *  @deprecated IRNNv2Layer will be removed in TensorRT 9.0, use INetworkDefinition::addLoop instead.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IRNNv2Layer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IRNNv2Layer(Pointer p) { super(p); }
-
-    /** Get the layer count of the RNN. */
-    public native @NoException(true) int getLayerCount();
-    /** Get the hidden size of the RNN. */
-    public native @NoException(true) int getHiddenSize();
-    /** Get the maximum sequence length of the RNN. */
-    public native @NoException(true) int getMaxSeqLength();
-    /** Get the maximum data length of the RNN.
-    <p>
-    //!
-    //!
-    //!
-    //!
-    //!
-    //! */
-    public native @NoException(true) int getDataLength();
-
-    /**
-     *  \brief Specify individual sequence lengths in the batch with the ITensor pointed to by
-     *  \p seqLengths.
-     * 
-     *  The \p seqLengths ITensor should be a {N1, ..., Np} tensor, where N1..Np are the index dimensions
-     *  of the input tensor to the RNN.
-     * 
-     *  If this is not specified, then the RNN layer assumes all sequences are size getMaxSeqLength().
-     * 
-     *  All sequence lengths in \p seqLengths should be in the range [1, getMaxSeqLength()].  Zero-length
-     *  sequences are not supported.
-     * 
-     *  This tensor must be of type DataType::kINT32.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setSequenceLengths(@ByRef ITensor seqLengths);
-
-    /**
-     *  \brief Get the sequence lengths specified for the RNN.
-     * 
-     *  @return nullptr if no sequence lengths were specified, the sequence length data otherwise.
-     * 
-     *  @see setSequenceLengths()
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) ITensor getSequenceLengths();
-
-    /**
-     *  \brief Set the operation of the RNN layer.
-     *  @see getOperation(), RNNOperation
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setOperation(RNNOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::RNNOperation") int op);
-
-    /**
-     *  \brief Get the operation of the RNN layer.
-     *  @see setOperation(), RNNOperation
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) RNNOperation getOperation();
-
-    /**
-     *  \brief Set the input mode of the RNN layer.
-     *  @see getInputMode(), RNNInputMode
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setInputMode(RNNInputMode op);
-    public native @NoException(true) void setInputMode(@Cast("nvinfer1::RNNInputMode") int op);
-
-    /**
-     *  \brief Get the input mode of the RNN layer.
-     *  @see setInputMode(), RNNInputMode
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) RNNInputMode getInputMode();
-
-    /**
-     *  \brief Set the direction of the RNN layer.
-     * 
-     *  The direction determines if the RNN is run as a unidirectional(left to right) or
-     *  bidirectional(left to right and right to left).
-     *  In the ::kBIDIRECTION case the output is concatenated together, resulting
-     *  in output size of 2x getHiddenSize().
-     * 
-     *  @see getDirection(), RNNDirection
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setDirection(RNNDirection op);
-    public native @NoException(true) void setDirection(@Cast("nvinfer1::RNNDirection") int op);
-
-    /**
-     *  \brief Get the direction of the RNN layer.
-     *  @see setDirection(), RNNDirection
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) RNNDirection getDirection();
-
-    /**
-     *  \brief Set the weight parameters for an individual gate in the RNN.
-     * 
-     *  The #DataType for this structure must be ::kFLOAT or ::kHALF, and must be the same
-     *  datatype as the input tensor.
-     * 
-     *  Each parameter matrix is row-major in memory, and has the following dimensions:
-     * 
-     *  ~~~
-     *      Let K := { ::kUNIDIRECTION => 1
-     *               { ::kBIDIRECTION => 2
-     *          l := layer index (as described above)
-     *          H := getHiddenSize()
-     *          E := getDataLength() (the embedding length)
-     *          isW := true if the matrix is an input (W) matrix, and false if
-     *                 the matrix is a recurrent input (R) matrix.
-     * 
-     *     if isW:
-     *        if l < K and ::kSKIP:
-     *           (numRows, numCols) := (0, 0) # input matrix is skipped
-     *        elif l < K and ::kLINEAR:
-     *           (numRows, numCols) := (H, E) # input matrix acts on input data size E
-     *        elif l >= K:
-     *           (numRows, numCols) := (H, K * H) # input matrix acts on previous hidden state
-     *     else: # not isW
-     *        (numRows, numCols) := (H, H)
-     *  ~~~
-     * 
-     *  In other words, the input weights of the first layer of the RNN (if
-     *  not skipped) transform a {@code getDataLength()}-size column
-     *  vector into a {@code getHiddenSize()}-size column vector.  The input
-     *  weights of subsequent layers transform a {@code K*getHiddenSize()}-size
-     *  column vector into a {@code getHiddenSize()}-size column vector.  {@code K=2} in
-     *  the bidirectional case to account for the full hidden state being
-     *  the concatenation of the forward and backward RNN hidden states.
-     * 
-     *  The recurrent weight matrices for all layers all have shape {@code (H, H)},
-     *  both in the unidirectional and bidirectional cases.  (In the
-     *  bidirectional case, each recurrent weight matrix for the (forward or
-     *  backward) RNN cell operates on the previous (forward or
-     *  backward) RNN cell's hidden state, which is size {@code H}).
-     * 
-     *  @param layerIndex The index of the layer that contains this gate.  See the section
-     *  @param gate The name of the gate within the RNN layer.  The gate name must correspond
-     *         to one of the gates used by this layer's #RNNOperation.
-     *  @param isW True if the weight parameters are for the input matrix W[g]
-     *         and false if they are for the recurrent input matrix R[g].  See
-     *         #RNNOperation for equations showing how these matrices are used
-     *         in the RNN gate.
-     *  @param weights The weight structure holding the weight parameters, which are stored
-     *         as a row-major 2D matrix.  See \ref setRNNWeightsLayout "the layout of elements within a weight matrix"
-     *         in IRNNLayer::setWeights() for documentation on the expected
-     *         dimensions of this matrix.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights weights);
-    public native @NoException(true) void setWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights weights);
-
-    /**
-     *  \brief Get the weight parameters for an individual gate in the RNN.
-     *  @see setWeightsForGate()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
-    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
-
-    /**
-     *  \brief Set the bias parameters for an individual gate in the RNN.
-     * 
-     *  The #DataType for this structure must be ::kFLOAT or ::kHALF, and must be the same
-     *  datatype as the input tensor.
-     * 
-     *  Each bias vector has a fixed size, getHiddenSize().
-     * 
-     *  @param layerIndex The index of the layer that contains this gate.  See the section
-     *         \ref setRNNWeightsOrder "Order of weight matrices" in IRNNLayer::setWeights()
-     *         for a description of the layer index.
-     *  @param gate The name of the gate within the RNN layer.  The gate name must correspond
-     *         to one of the gates used by this layer's #RNNOperation.
-     *  @param isW True if the bias parameters are for the input bias Wb[g]
-     *         and false if they are for the recurrent input bias Rb[g].  See
-     *         #RNNOperation for equations showing how these bias vectors are used
-     *         in the RNN gate.
-     *  @param bias The weight structure holding the bias parameters, which should be an
-     *         array of size getHiddenSize().
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights bias);
-    public native @NoException(true) void setBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights bias);
-
-    /**
-     *  \brief Get the bias parameters for an individual gate in the RNN.
-     *  @see setBiasForGate()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
-    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
-
-    /**
-     *  \brief Set the initial hidden state of the RNN with the provided \p hidden ITensor.
-     * 
-     *  The \p hidden ITensor should have the dimensions {@code {N1, ..., Np, L, H}}, where:
-     * 
-     *   - {@code N1..Np} are the index dimensions specified by the input tensor
-     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
-     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
-     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
-     *      final backward hidden state is stored in {@code L= 2*l + 1}.
-     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setHiddenState(@ByRef ITensor hidden);
-
-    /**
-     *  \brief Get the initial hidden state of the RNN.
-     *  @see setHiddenState()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ITensor getHiddenState();
-
-    /**
-     *  \brief Set the initial cell state of the LSTM with the provided \p cell ITensor.
-     * 
-     *  The \p cell ITensor should have the dimensions {@code {N1, ..., Np, L, H}}, where:
-     * 
-     *   - {@code N1..Np} are the index dimensions specified by the input tensor
-     *   - {@code L} is the number of layers in the RNN, equal to getLayerCount() if getDirection is ::kUNIDIRECTION,
-     *      and 2x getLayerCount() if getDirection is ::kBIDIRECTION. In the bi-directional
-     *      case, layer {@code l}'s final forward hidden state is stored in {@code L = 2*l}, and
-     *      final backward hidden state is stored in {@code L= 2*l + 1}.
-     *   - {@code H} is the hidden state for each layer, equal to getHiddenSize().
-     * 
-     *  It is an error to call setCellState() on an RNN layer that is not configured with RNNOperation::kLSTM.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setCellState(@ByRef ITensor cell);
-
-    /**
-     *  \brief Get the initial cell state of the RNN.
-     *  @see setCellState()
-     *  */
-    public native @NoException(true) ITensor getCellState();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
deleted file mode 100644
index a6ad767ac64..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRaggedSoftMaxLayer.java
+++ /dev/null
@@ -1,42 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IRaggedSoftMaxLayer
- * 
- *  \brief A RaggedSoftmax layer in a network definition.
- * 
- *  This layer takes a ZxS input tensor and an additional Zx1 bounds tensor
- *  holding the lengths of the Z sequences.
- * 
- *  This layer computes a softmax across each of the Z sequences.
- * 
- *  The output tensor is of the same size as the input tensor.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IRaggedSoftMaxLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IRaggedSoftMaxLayer(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
deleted file mode 100644
index b5f5b10c35d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRecurrenceLayer.java
+++ /dev/null
@@ -1,46 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IRecurrenceLayer extends ILoopBoundaryLayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IRecurrenceLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor */
-    //
-    /** Sets the input tensor for the given index.
-    /**
-    /** For a recurrence layer, the values 0 and 1 are valid.
-    /** The indices are as follows:
-    /**
-    /** - 0: The initial value of the output tensor. The value must come from outside the loop.
-    /** - 1: The next value of the output tensor. The value usually comes from inside the loop, and must have the same
-    /** dimensions as input 0.
-    /**
-    /** If this function is called with a value 1, then the function getNbInputs() changes
-    /** from returning 1 to 2.
-    /** */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
deleted file mode 100644
index e3984032189..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IReduceLayer.java
+++ /dev/null
@@ -1,102 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IReduceLayer
- * 
- *  \brief Layer that represents a reduction operator across Shape, Int32, Float, and Half tensors.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IReduceLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IReduceLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the reduce operation for the layer.
-     * 
-     *  @see getOperation(), ReduceOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOperation(ReduceOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::ReduceOperation") int op);
-
-    /**
-     *  \brief Get the reduce operation for the layer.
-     * 
-     *  @see setOperation(), ReduceOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) ReduceOperation getOperation();
-
-    /**
-     *  \brief Set the axes over which to reduce.
-     * 
-     *  @see getReduceAxes
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
-
-    /**
-     *  \brief Get the axes over which to reduce for the layer.
-     * 
-     *  @see setReduceAxes
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
-
-    /**
-     *  \brief Set the boolean that specifies whether or not to keep the reduced dimensions for the layer.
-     * 
-     *  @see getKeepDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setKeepDimensions(@Cast("bool") boolean keepDimensions);
-
-    /**
-     *  \brief Get the boolean that specifies whether or not to keep the reduced dimensions for the layer.
-     * 
-     *  @see setKeepDimensions
-     *  */
-    public native @Cast("bool") @NoException(true) boolean getKeepDimensions();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
deleted file mode 100644
index ae46eb33e9c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRefitter.java
+++ /dev/null
@@ -1,336 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IRefitter
- * 
- *  \brief Updates weights in an engine.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IRefitter extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IRefitter() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IRefitter(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IRefitter(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IRefitter position(long position) {
-        return (IRefitter)super.position(position);
-    }
-    @Override public IRefitter getPointer(long i) {
-        return new IRefitter((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Specify new weights for a layer of given name.
-     *  Returns true on success, or false if new weights are rejected.
-     *  Possible reasons for rejection are:
-     * 
-     *  * There is no such layer by that name.
-     *  * The layer does not have weights with the specified role.
-     *  * The number of weights is inconsistent with the layer’s original specification.
-     * 
-     *  Modifying the weights before method refit() completes will result in undefined behavior. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setWeights(String layerName, WeightsRole role, @ByVal Weights weights);
-    public native @Cast("bool") @NoException(true) boolean setWeights(@Cast("const char*") BytePointer layerName, @Cast("nvinfer1::WeightsRole") int role, @ByVal Weights weights);
-
-    /**
-     *  \brief Updates associated engine.  Return true if successful.
-     * 
-     *  Failure occurs if getMissing() != 0 before the call.
-     * 
-     *  The behavior is undefined if the engine has pending enqueued work.
-     * 
-     *  Extant IExecutionContexts associated with the engine should not be used afterwards.
-     *  Instead, create new IExecutionContexts after refitting.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean refitCudaEngine();
-
-    /**
-     *  \brief Get description of missing weights.
-     * 
-     *  For example, if some Weights have been set, but the engine was optimized
-     *  in a way that combines weights, any unsupplied Weights in the combination
-     *  are considered missing.
-     * 
-     *  @param size The number of items that can be safely written to a non-null layerNames or roles.
-     *  @param layerNames Where to write the layer names.
-     *  @param roles Where to write the weights roles.
-     * 
-     *  @return The number of missing Weights.
-     * 
-     *  If layerNames!=nullptr, each written pointer points to a string owned by
-     *  the engine being refit, and becomes invalid when the engine is destroyed.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
-
-    /**
-     *  \brief Get description of all weights that could be refit.
-     * 
-     *  @param size The number of items that can be safely written to a non-null layerNames or roles.
-     *  @param layerNames Where to write the layer names.
-     *  @param roles Where to write the weights roles.
-     * 
-     *  @return The number of Weights that could be refit.
-     * 
-     *  If layerNames!=nullptr, each written pointer points to a string owned by
-     *  the engine being refit, and becomes invalid when the engine is destroyed.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getAll(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
-
-    /**
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  Update dynamic range for a tensor.
-     * 
-     *  @param tensorName The name of an ITensor in the network.
-     *  @param min The minimum of the dynamic range for the tensor.
-     *  @param max The maximum of the dynamic range for the tensor.
-     * 
-     *  @return True if successful; false otherwise.
-     * 
-     *  Returns false if there is no Int8 engine tensor derived from
-     *  a network tensor of that name.  If successful, then getMissing
-     *  may report that some weights need to be supplied. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(String tensorName, float min, float max);
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(@Cast("const char*") BytePointer tensorName, float min, float max);
-
-    /**
-     *  \brief Get minimum of dynamic range.
-     * 
-     *  @return Minimum of dynamic range.
-     * 
-     *  If the dynamic range was never set, returns the minimum computed during calibration.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) float getDynamicRangeMin(String tensorName);
-    public native @NoException(true) float getDynamicRangeMin(@Cast("const char*") BytePointer tensorName);
-
-    /**
-     *  \brief Get maximum of dynamic range.
-     * 
-     *  @return Maximum of dynamic range.
-     * 
-     *  If the dynamic range was never set, returns the maximum computed during calibration.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) float getDynamicRangeMax(String tensorName);
-    public native @NoException(true) float getDynamicRangeMax(@Cast("const char*") BytePointer tensorName);
-
-    /**
-     *  \brief Get names of all tensors that have refittable dynamic ranges.
-     * 
-     *  @param size The number of items that can be safely written to a non-null tensorNames.
-     *  @param tensorNames Where to write the layer names.
-     * 
-     *  @return The number of Weights that could be refit.
-     * 
-     *  If tensorNames!=nullptr, each written pointer points to a string owned by
-     *  the engine being refit, and becomes invalid when the engine is destroyed.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") PointerPointer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr BytePointer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr ByteBuffer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr byte[] tensorNames);
-
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief Get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
-     *  an error handler has not been set.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Specify new weights of given name.
-     * 
-     *  @param name The name of the weights to be refit.
-     *  @param weights The new weights to associate with the name.
-     * 
-     *  Returns true on success, or false if new weights are rejected.
-     *  Possible reasons for rejection are:
-     * 
-     *  * The name of weights is nullptr or does not correspond to any refittable weights.
-     *  * The number of weights is inconsistent with the original specification.
-     * 
-     *  Modifying the weights before method refitCudaEngine() completes will result in undefined behavior. */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setNamedWeights(String name, @ByVal Weights weights);
-    public native @Cast("bool") @NoException(true) boolean setNamedWeights(@Cast("const char*") BytePointer name, @ByVal Weights weights);
-
-    /**
-     *  \brief Get names of missing weights.
-     * 
-     *  For example, if some Weights have been set, but the engine was optimized
-     *  in a way that combines weights, any unsupplied Weights in the combination
-     *  are considered missing.
-     * 
-     *  @param size The number of weights names that can be safely written to.
-     *  @param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
-     * 
-     *  @return The number of missing Weights.
-     * 
-     *  If layerNames!=nullptr, each written pointer points to a string owned by
-     *  the engine being refit, and becomes invalid when the engine is destroyed.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") PointerPointer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
-
-    /**
-     *  \brief Get names of all weights that could be refit.
-     * 
-     *  @param size The number of weights names that can be safely written to.
-     *  @param weightsNames The names of the weights to be updated, or nullptr for unnamed weights.
-     * 
-     *  @return The number of Weights that could be refit.
-     * 
-     *  If layerNames!=nullptr, each written pointer points to a string owned by
-     *  the engine being refit, and becomes invalid when the engine is destroyed.
-     *  */
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") PointerPointer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
deleted file mode 100644
index 69533ae4a00..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IResizeLayer.java
+++ /dev/null
@@ -1,323 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/** \class IResizeLayer
- * 
- *  \brief A resize layer in a network definition.
- * 
- *  Resize layer can be used for resizing a N-D tensor.
- * 
- *  Resize layer currently supports the following configurations:
- *      -   ResizeMode::kNEAREST - resizes innermost {@code m} dimensions of N-D, where 0 < m <= min(8, N) and N > 0
- *      -   ResizeMode::kLINEAR - resizes innermost {@code m} dimensions of N-D, where 0 < m <= min(3, N) and N > 0
- * 
- *  Default resize mode is ResizeMode::kNEAREST.
- * 
- *  Resize layer provides two ways to resize tensor dimensions.
- *      -   Set output dimensions directly. It can be done for static as well as dynamic resize layer.
- *          Static resize layer requires output dimensions to be known at build-time.
- *          Dynamic resize layer requires output dimensions to be set as one of the input tensors.
- *      -   Set scales for resize. Each output dimension is calculated as floor(input dimension * scale).
- *          Only static resize layer allows setting scales where the scales are known at build-time.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IResizeLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IResizeLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the output dimensions.
-     * 
-     *  @param dimensions The output dimensions. Number of output dimensions must be the same as the number of input
-     *  dimensions.
-     * 
-     *  If there is a second input, i.e. resize layer is dynamic,
-     *  calling setOutputDimensions() is an error and does not update the
-     *  dimensions.
-     * 
-     *  Output dimensions can be specified directly, or via scale factors relative to input dimensions.
-     *  Scales for resize can be provided using setScales().
-     * 
-     *  @see setScales
-     *  @see getOutputDimensions
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOutputDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the output dimensions.
-     * 
-     *  @return The output dimensions.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions();
-
-    /**
-     *  \brief Set the resize scales.
-     * 
-     *  @param scales An array of resize scales.
-     *  @param nbScales Number of scales. Number of scales must be equal to the number of input dimensions.
-     * 
-     *  If there is a second input, i.e. resize layer is dynamic,
-     *  calling setScales() is an error and does not update the scales.
-     * 
-     *  Output dimensions are calculated as follows:
-     *  outputDims[i] = floor(inputDims[i] * scales[i])
-     * 
-     *  Output dimensions can be specified directly, or via scale factors relative to input dimensions.
-     *  Output dimensions can be provided directly using setOutputDimensions().
-     * 
-     *  @see setOutputDimensions
-     *  @see getScales
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setScales(@Const FloatPointer scales, int nbScales);
-    public native @NoException(true) void setScales(@Const FloatBuffer scales, int nbScales);
-    public native @NoException(true) void setScales(@Const float[] scales, int nbScales);
-
-    /**
-     *  \brief Copies resize scales to scales[0, ..., nbScales-1], where nbScales is the number of scales that were set.
-     * 
-     *  @param size The number of scales to get. If size != nbScales, no scales will be copied.
-     * 
-     *  @param scales Pointer to where to copy the scales. Scales will be copied only if
-     *                size == nbScales and scales != nullptr.
-     * 
-     *  In case the size is not known consider using size = 0 and scales = nullptr. This method will return
-     *  the number of resize scales.
-     * 
-     *  @return The number of resize scales i.e. nbScales if scales were set.
-     *          Return -1 in case no scales were set or resize layer is used in dynamic mode.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getScales(int size, FloatPointer scales);
-    public native @NoException(true) int getScales(int size, FloatBuffer scales);
-    public native @NoException(true) int getScales(int size, float[] scales);
-
-    /**
-     *  \brief Set resize mode for an input tensor.
-     * 
-     *  Supported resize modes are Nearest Neighbor and Linear.
-     * 
-     *  @see ResizeMode
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setResizeMode(ResizeMode resizeMode);
-    public native @NoException(true) void setResizeMode(@Cast("nvinfer1::ResizeMode") int resizeMode);
-
-    /**
-     *  \brief Get resize mode for an input tensor.
-     * 
-     *  @return The resize mode.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ResizeMode getResizeMode();
-
-    /**
-     *  \brief Set whether to align corners while resizing.
-     * 
-     *  If true, the centers of the 4 corner pixels of both input and output
-     *  tensors are aligned i.e. preserves the values of corner
-     *  pixels.
-     * 
-     *  Default: false.
-     * 
-     *  @deprecated Superseded by IResizeLayer::setCoordinateTransformation() and
-     *  will be removed in TensorRT 10.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void setAlignCorners(@Cast("bool") boolean alignCorners);
-
-    /**
-     *  \brief True if align corners has been set.
-     * 
-     *  @return True if align corners has been set, false otherwise.
-     * 
-     *  @deprecated Superseded by IResizeLayer::getCoordinateTransformation() and
-     *  will be removed in TensorRT 10.0.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @Deprecated @NoException(true) boolean getAlignCorners();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor.
-     * 
-     *  Sets the input tensor for the given index. The index must be 0 for a static resize layer.
-     *  A static resize layer is converted to a dynamic resize layer by calling setInput with an index 1.
-     *  A dynamic resize layer cannot be converted back to a static resize layer.
-     * 
-     *  For a dynamic resize layer, the values 0 and 1 are valid.
-     *  The indices in the dynamic case are as follows:
-     * 
-     *  - 0: Data or Shape tensor to be resized.
-     *  - 1: The output dimensions, as a 1D Int32 shape tensor.
-     * 
-     *  If this function is called with a value 1, then the function getNbInputs() changes
-     *  from returning 1 to 2.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-
-    /**
-     *  \brief Set coordinate transformation function.
-     * 
-     *  We have different functions mapping the coordinate in output tensor to the coordinate in input tensor.
-     * 
-     *  Default is ResizeCoordinateTransformation::kASYMMETRIC.
-     * 
-     *  @see ResizeCoordinateTransformation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform);
-    public native @NoException(true) void setCoordinateTransformation(@Cast("nvinfer1::ResizeCoordinateTransformation") int coordTransform);
-
-    /**
-     *  \brief Get coordinate transformation function.
-     * 
-     *  @return The coordinate transformation function.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ResizeCoordinateTransformation getCoordinateTransformation();
-
-    /**
-     *  \brief Set coordinate selector function when resized to single pixel.
-     * 
-     *  When resize to single pixel image, use this function to decide how to map the coordinate in the original
-     *  image.
-     * 
-     *  Default is ResizeSelector::kFORMULA.
-     * 
-     *  @see ResizeSelector
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setSelectorForSinglePixel(ResizeSelector selector);
-    public native @NoException(true) void setSelectorForSinglePixel(@Cast("nvinfer1::ResizeSelector") int selector);
-
-    /**
-     *  \brief Get the coordinate selector function when resized to single pixel.
-     * 
-     *  @return The selector function.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) ResizeSelector getSelectorForSinglePixel();
-
-    /**
-     *  \brief Set rounding mode for nearest neighbor resize.
-     * 
-     *  This value is used for nearest neighbor interpolation rounding. It is applied after coordinate transformation.
-     * 
-     *  Default is kFLOOR.
-     * 
-     *  @see ResizeRoundMode
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setNearestRounding(ResizeRoundMode value);
-    public native @NoException(true) void setNearestRounding(@Cast("nvinfer1::ResizeRoundMode") int value);
-
-    /**
-     *  \brief Get rounding mode for nearest neighbor resize.
-     * 
-     *  @return The rounding mode.
-     *  */
-    public native @NoException(true) ResizeRoundMode getNearestRounding();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
deleted file mode 100644
index 049629cab49..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IRuntime.java
+++ /dev/null
@@ -1,191 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IRuntime
- * 
- *  \brief Allows a serialized functionally unsafe engine to be deserialized.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IRuntime extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public IRuntime() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public IRuntime(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IRuntime(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public IRuntime position(long position) {
-        return (IRuntime)super.position(position);
-    }
-    @Override public IRuntime getPointer(long i) {
-        return new IRuntime((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Deserialize an engine from a stream.
-     * 
-     *  If an error recorder has been set for the runtime, it will also be passed to the engine.
-     * 
-     *  @param blob The memory that holds the serialized engine.
-     *  @param size The size of the memory in bytes.
-     *  @param pluginFactory The plugin factory, if any plugins are used by the network, otherwise nullptr.
-     * 
-     *  @return The engine, or nullptr if it could not be deserialized.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning IPluginFactory is no longer supported, therefore pluginFactory must be a nullptr.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) ICudaEngine deserializeCudaEngine(
-            @Const Pointer blob, @Cast("std::size_t") long size, IPluginFactory pluginFactory);
-
-    /**
-     *  \brief Set the DLA core that the deserialized engine must execute on.
-     *  @param dlaCore The DLA core to execute the engine on (0 to N-1, where N is the maximum number of DLA's present
-     *  on the device). Default value is 0. @see getDLACore()
-     * 
-     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDLACore(int dlaCore);
-
-    /**
-     *  \brief Get the DLA core that the engine executes on.
-     *  @return If setDLACore is called, returns DLA core from 0 to N-1, else returns 0.
-     * 
-     *  \warning Starting with TensorRT 8, the default value will be -1 if the DLA is not specified or unused.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getDLACore();
-
-    /**
-     *  \brief Returns number of DLA hardware cores accessible.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getNbDLACores();
-
-    /**
-     *  \brief Destroy this object.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Set the GPU allocator.
-     *  @param allocator Set the GPU allocator to be used by the runtime. All GPU memory acquired will use this
-     *  allocator. If NULL is passed, the default allocator will be used.
-     * 
-     *  Default: uses cudaMalloc/cudaFree.
-     * 
-     *  If nullptr is passed, the default allocator will be used.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
-
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A nullptr will be returned if
-     *  an error handler has not been set.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-
-    /**
-     *  \brief Deserialize an engine from a stream.
-     * 
-     *  @param blob The memory that holds the serialized engine.
-     *  @param size The size of the memory.
-     * 
-     *  @return The engine, or nullptr if it could not be deserialized.
-     *  */
-    public native @NoException(true) ICudaEngine deserializeCudaEngine(@Const Pointer blob, @Cast("std::size_t") long size);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
deleted file mode 100644
index 5f133650109..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IScaleLayer.java
+++ /dev/null
@@ -1,190 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IScaleLayer
- * 
- *  \brief A Scale layer in a network definition.
- * 
- *  This layer applies a per-element computation to its input:
- * 
- *  \p output = (\p input* \p scale + \p shift)^ \p power
- * 
- *  The coefficients can be applied on a per-tensor, per-channel, or per-element basis.
- * 
- *  \note If the number of weights is 0, then a default value is used for shift, power, and scale.
- *        The default shift is 0, the default power is 1, and the default scale is 1.
- * 
- *  The output size is the same as the input size.
- * 
- *  \note The input tensor for this layer is required to have a minimum of 3 dimensions in implicit batch mode
- *        and a minimum of 4 dimensions in explicit batch mode.
- * 
- *  A scale layer may be used as an INT8 quantization node in a graph, if the output is constrained to INT8 and
- *  the input to FP32. Quantization rounds ties to even, and clamps to [-128, 127].
- * 
- *  @see ScaleMode
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IScaleLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IScaleLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the scale mode.
-     * 
-     *  @see getMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setMode(ScaleMode mode);
-    public native @NoException(true) void setMode(@Cast("nvinfer1::ScaleMode") int mode);
-
-    /**
-     *  \brief Get the scale mode.
-     * 
-     *  @see setMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) ScaleMode getMode();
-
-    /**
-     *  \brief Set the shift value.
-     * 
-     *  @see getShift()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setShift(@ByVal Weights shift);
-
-    /**
-     *  \brief Get the shift value.
-     * 
-     *  @see setShift()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getShift();
-
-    /**
-     *  \brief Set the scale value.
-     * 
-     *  @see getScale()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setScale(@ByVal Weights scale);
-
-    /**
-     *  \brief Get the scale value.
-     * 
-     *  @see setScale()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getScale();
-
-    /**
-     *  \brief Set the power value.
-     * 
-     *  @see getPower()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setPower(@ByVal Weights power);
-
-    /**
-     *  \brief Get the power value.
-     * 
-     *  @see setPower()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Weights getPower();
-
-    /**
-     *  \brief Get the channel axis.
-     * 
-     *  @return channelAxis parameter passed to addScaleNd() or set by setChannelAxis()
-     * 
-     *  The value is the index of the channel axis in the input tensor's dimensions.
-     *  Scaling happens along the channel axis when ScaleMode::kCHANNEL is enabled.
-     * 
-     *  @see addScaleNd()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) int getChannelAxis();
-
-    /**
-     *  \brief Set the channel axis.
-     * 
-     *  The value is the index of the channel axis in the input tensor's dimensions.
-     * 
-     *  For ScaleMode::kCHANNEL, there can be distinct scale, shift, and power weights for each channel coordinate.
-     *  For ScaleMode::kELEMENTWISE, there can be distinct scale, shift, and power weights for each combination of
-     *  coordinates from the channel axis and axes after it.
-     * 
-     *  For example, suppose the input tensor has dimensions [10,20,30,40] and the channel axis is 1.
-     *  Let [n,c,h,w] denote an input coordinate.
-     *  For ScaleMode::kCHANNEL, the scale, shift, and power weights are indexed by c.
-     *  For ScaleMode::kELEMENTWISE, the scale, shift, and power weights are indexed by [c,h,w].
-     * 
-     *  @see addScaleNd()
-     *  */
-    public native @NoException(true) void setChannelAxis(int channelAxis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
deleted file mode 100644
index 217ac6055cb..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISelectLayer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ISelectLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ISelectLayer(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
deleted file mode 100644
index 7be7a1a07d5..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShapeLayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IShapeLayer
- * 
- *  \brief Layer type for getting shape of a tensor.
- * 
- *  This layer sets the output to a one-dimensional tensor with the dimensions of the input tensor.
- * 
- *  For example, if the input is a four-dimensional tensor (of any type) with
- *  dimensions [2,3,5,7], the output tensor is a one-dimensional Int32 tensor
- *  of length 4 containing the sequence 2, 3, 5, 7.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IShapeLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IShapeLayer(Pointer p) { super(p); }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
deleted file mode 100644
index 205383de326..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IShuffleLayer.java
+++ /dev/null
@@ -1,225 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** \class IShuffleLayer
- * 
- *  \brief Layer type for shuffling data.
- * 
- *  This layer shuffles data by applying in sequence: a transpose operation, a reshape operation
- *  and a second transpose operation. The dimension types of the output are those of the reshape dimension.
- * 
- *  The layer has an optional second input.  If present, it must be a 1D Int32 shape tensor,
- *  and the reshape dimensions are taken from it.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IShuffleLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IShuffleLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the permutation applied by the first transpose operation.
-     * 
-     *  @param permutation The dimension permutation applied before the reshape.
-     * 
-     *  The default is the identity permutation.
-     * 
-     *  @see getFirstTranspose
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setFirstTranspose(@ByVal Permutation permutation);
-
-    /**
-     *  \brief Get the permutation applied by the first transpose operation.
-     * 
-     *  @return The dimension permutation applied before the reshape.
-     * 
-     *  @see setFirstTranspose
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Permutation getFirstTranspose();
-
-    /**
-     *  \brief Set the reshaped dimensions.
-     * 
-     *  @param dimensions The reshaped dimensions.
-     * 
-     *  Two special values can be used as dimensions.
-     * 
-     *  Value 0 copies the corresponding dimension from input. This special value
-     *  can be used more than once in the dimensions. If number of reshape
-     *  dimensions is less than input, 0s are resolved by aligning the most
-     *  significant dimensions of input.
-     * 
-     *  Value -1 infers that particular dimension by looking at input and rest
-     *  of the reshape dimensions. Note that only a maximum of one dimension is
-     *  permitted to be specified as -1.
-     * 
-     *  The product of the new dimensions must be equal to the product of the old.
-     * 
-     *  If a second input had been used to create this layer, that input is reset to null by this method.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setReshapeDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the reshaped dimensions.
-     * 
-     *  @return The reshaped dimensions.
-     * 
-     *  If a second input is present and non-null, or setReshapeDimensions has
-     *  not yet been called, this function returns Dims with nbDims == -1.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getReshapeDimensions();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor */
-    //
-    /** Sets the input tensor for the given index. The index must be 0 for a static shuffle layer.
-    /** A static shuffle layer is converted to a dynamic shuffle layer by calling setInput with an index 1.
-    /** A dynamic shuffle layer cannot be converted back to a static shuffle layer.
-    /**
-    /** For a dynamic shuffle layer, the values 0 and 1 are valid.
-    /** The indices in the dynamic case are as follows:
-    /**
-    /** - 0: Data or Shape tensor to be shuffled.
-    /** - 1: The dimensions for the reshape operation, as a 1D Int32 shape tensor.
-    /**
-    /** If this function is called with a value 1, then the function getNbInputs() changes
-    /** from returning 1 to 2.
-    /**
-    /** The reshape dimensions are treated identically to how they are treated if set statically
-    /** via setReshapeDimensions. In particular, a -1 is treated as a wildcard even if dynamically
-    /** supplied at runtime, and a 0 is treated as a placeholder if getZeroIsPlaceholder() = true,
-    /** which is the default. If the placeholder interpretation of 0 is unwanted because the
-    /** runtime dimension should be 0 when the reshape dimension is 0, be sure to call
-    /** setZeroIsPlacholder(false) on the IShuffleLayer.
-    /**
-    /** @see setReshapeDimensions.
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-
-    /**
-     *  \brief Set the permutation applied by the second transpose operation.
-     * 
-     *  @param permutation The dimension permutation applied after the reshape.
-     * 
-     *  The default is the identity permutation.
-     * 
-     *  The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
-     *  permute from CHW order to HWC order, the required permutation is [1, 2, 0].
-     * 
-     *  @see getSecondTranspose
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setSecondTranspose(@ByVal Permutation permutation);
-
-    /**
-     *  \brief Get the permutation applied by the second transpose operation.
-     * 
-     *  @return The dimension permutation applied after the reshape.
-     * 
-     *  @see setSecondTranspose
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @NoException(true) Permutation getSecondTranspose();
-
-    /**
-     *  \brief Set meaning of 0 in reshape dimensions.
-     * 
-     *  If true, then a 0 in the reshape dimensions denotes copying the corresponding
-     *  dimension from the first input tensor.  If false, then a 0 in the reshape
-     *  dimensions denotes a zero-length dimension.
-     * 
-     *  Default: true
-     * 
-     *  @see getZeroIsPlaceholder();
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setZeroIsPlaceholder(@Cast("bool") boolean zeroIsPlaceholder);
-
-    /**
-     *  \brief Get meaning of 0 in reshape dimensions.
-     * 
-     *  @return true if 0 is placeholder for corresponding input dimension,
-     *          false if 0 denotes a zero-length dimension.
-     * 
-     *  @see setZeroIsPlaceholder
-     *  */
-    public native @Cast("bool") @NoException(true) boolean getZeroIsPlaceholder();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
deleted file mode 100644
index 6790a570055..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISliceLayer.java
+++ /dev/null
@@ -1,213 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \brief Slices an input tensor into an output tensor based on the offset and strides.
- * 
- *  The slice layer has two variants, static and dynamic. Static slice specifies the start, size, and stride
- *  dimensions at layer creation time via Dims and can use the get/set accessor functions of the ISliceLayer.
- *  Dynamic slice specifies one or more of start, size or stride as ITensors, by using ILayer::setTensor to add
- *  a second, third, or fourth input respectively. The corresponding Dims are used if an input
- *  is missing or null.
- * 
- *  An application can determine if the ISliceLayer has a dynamic output shape based on whether
- *  the size input (third input) is present and non-null.
- * 
- *  The slice layer selects for each dimension a start location from within the input tensor, and
- *  copies elements to the output tensor using the specified stride across the input tensor.
- *  Start, size, and stride tensors must be 1D Int32 shape tensors if not specified via Dims.
- * 
- *  A slice layer can produce a shape tensor if the following conditions are met:
- * 
- *  * start, size, and stride are build time constants, either as static Dims, or computable by constant folding.
- *  * The number of elements in the output tensor does not exceed 2*Dims::MAX_DIMS.
- * 
- *  For example using slice on a tensor:
- *  input = {{0, 2, 4}, {1, 3, 5}}
- *  start = {1, 0}
- *  size = {1, 2}
- *  stride = {1, 2}
- *  output = {{1, 5}}
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ISliceLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ISliceLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the start offset that the slice layer uses to create the output slice.
-     * 
-     *  @param start The start offset to read data from the input tensor.
-     * 
-     *  If a second input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getStart
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setStart(@ByVal @Cast("nvinfer1::Dims*") Dims32 start);
-
-    /**
-     *  \brief Get the start offset for the slice layer.
-     * 
-     *  @return The start offset, or an invalid Dims structure.
-     * 
-     *  If the second input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setStart
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStart();
-
-    /**
-     *  \brief Set the dimensions of the output slice.
-     * 
-     *  @param size The dimensions of the output slice.
-     * 
-     *  If a third input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getSize
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setSize(@ByVal @Cast("nvinfer1::Dims*") Dims32 size);
-
-    /**
-     *  \brief Get dimensions of the output slice.
-     * 
-     *  @return The output dimension, or an invalid Dims structure.
-     * 
-     *  If the third input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setSize
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getSize();
-
-    /**
-     *  \brief Set the stride for computing the output slice data.
-     * 
-     *  @param stride The dimensions of the stride to compute the values to store in the output slice.
-     * 
-     *  If a fourth input had been used to create this layer, that input is reset to null by this method.
-     * 
-     *  @see getStride
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setStride(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-
-    /**
-     *  \brief Get the stride for the output slice.
-     * 
-     *  @return The slicing stride, or an invalid Dims structure.
-     * 
-     *  If the fourth input is present and non-null,
-     *  this function returns a Dims with nbDims = -1.
-     * 
-     *  @see setStride
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStride();
-
-    /**
-     *  \brief Set the slice mode.
-     * 
-     *  @see getMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setMode(SliceMode mode);
-    public native @NoException(true) void setMode(@Cast("nvinfer1::SliceMode") int mode);
-
-    /**
-     *  \brief Get the slice mode.
-     * 
-     *  @see setMode()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) SliceMode getMode();
-
-    /**
-     *  \brief Append or replace an input of this layer with a specific tensor
-     * 
-     *  @param index the index of the input to modify.
-     *  @param tensor the new input tensor
-     * 
-     *  For a slice layer, the values 0-3 are valid. The values 1-3 override start, size or stride
-     *  dimensions, respectively. Conversely, this input tensor can be overridden via appropriate set call.
-     *  The indices are as follows:
-     * 
-     *  - 0: Data or Shape tensor to be sliced.
-     *  - 1: The start tensor to begin slicing, as a 1D Int32 shape tensor.
-     *  - 2: The size tensor of the resulting slice, as a 1D Int32 shape tensor.
-     *  - 3: The stride of the slicing operation, as a 1D Int32 shape tensor.
-     * 
-     *  If this function is called with a value greater than 0, then the function getNbInputs() changes
-     *  from returning 1 to index + 1.
-     *  */
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
deleted file mode 100644
index 66f0458e303..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ISoftMaxLayer.java
+++ /dev/null
@@ -1,82 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ISoftMaxLayer
- * 
- *  \brief A Softmax layer in a network definition.
- * 
- *  This layer applies a per-channel softmax to its input.
- * 
- *  The output size is the same as the input size.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ISoftMaxLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ISoftMaxLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the axis along which softmax is computed. Currently, only one axis can be set.
-     * 
-     *  The axis is specified by setting the bit corresponding to the axis to 1.
-     *  Let's say we have an NCHW tensor as input (three non-batch dimensions).
-     * 
-     *  In implicit mode :
-     *  Bit 0 corresponds to the C dimension boolean.
-     *  Bit 1 corresponds to the H dimension boolean.
-     *  Bit 2 corresponds to the W dimension boolean.
-     *  By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
-     *  there are fewer than 3 non-batch axes. For example, if the input is NCHW, the default axis is C. If the input
-     *  is NHW, then the default axis is H.
-     * 
-     *  In explicit mode :
-     *  Bit 0 corresponds to the N dimension boolean.
-     *  Bit 1 corresponds to the C dimension boolean.
-     *  Bit 2 corresponds to the H dimension boolean.
-     *  Bit 3 corresponds to the W dimension boolean.
-     *  By default, softmax is performed on the axis which is the number of axes minus three. It is 0 if
-     *  there are fewer than 3 axes. For example, if the input is NCHW, the default axis is C. If the input
-     *  is NHW, then the default axis is N.
-     * 
-     *  For example, to perform softmax on axis R of a NPQRCHW input, set bit 2 with implicit batch mode,
-     *  set bit 3 with explicit batch mode.
-     * 
-     *  @param axes The axis along which softmax is computed.
-     *         Here axes is a bitmap. For example, when doing softmax along axis 0, bit 0 is set to 1, axes = 1 << axis
-     *         = 1.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAxes(@Cast("uint32_t") int axes);
-
-    /**
-     *  \brief Get the axis along which softmax occurs.
-     * 
-     *  @see setAxes()
-     *  */
-    public native @Cast("uint32_t") @NoException(true) int getAxes();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
deleted file mode 100644
index fd6cb0282b0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITensor.java
+++ /dev/null
@@ -1,414 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class ITensor
- * 
- *  \brief A tensor in a network definition.
- * 
- *  To remove a tensor from a network definition, use INetworkDefinition::removeTensor().
- * 
- *  When using the DLA, the cumulative size of all Tensors that are not marked as Network Input or Output tensors,
- *  must be less than 1GB in size to fit into a single subgraph. If the build option kGPU_FALLBACK is specified, then
- *  multiple subgraphs can be created, with each subgraph limited to less than 1GB of internal tensors data.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ITensor extends INoCopy {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ITensor(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the tensor name.
-     * 
-     *  For a network input, the name is assigned by the application. For tensors which are layer outputs,
-     *  a default name is assigned consisting of the layer name followed by the index of the output in brackets.
-     * 
-     *  This method copies the name string.
-     * 
-     *  @param name The name.
-     * 
-     *  @see getName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-
-    /**
-     *  \brief Get the tensor name.
-     * 
-     *  @return The name, as a pointer to a NULL-terminated character sequence.
-     * 
-     *  @see setName()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) String getName();
-
-    /**
-     *  \brief Set the dimensions of a tensor.
-     * 
-     *  For a network input, the dimensions are assigned by the application. For a network output, the dimensions are
-     *  computed based on the layer parameters and the inputs to the layer. If a tensor size or a parameter is modified
-     *  in the network, the dimensions of all dependent tensors will be recomputed.
-     * 
-     *  This call is only legal for network input tensors, since the dimensions of layer output tensors are inferred
-     *  based on layer inputs and parameters.
-     * 
-     *  @param dimensions The dimensions of the tensor.
-     * 
-     *  @see getDimensions()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-
-    /**
-     *  \brief Get the dimensions of a tensor.
-     * 
-     *  @return The dimensions of the tensor.
-     * 
-     *  \warning getDimensions() returns a -1 for dimensions that are derived from a wildcard dimension.
-     *  @see setDimensions()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-
-    /**
-     *  \brief Set the data type of a tensor.
-     * 
-     *  @param type The data type of the tensor.
-     * 
-     *  The type is unchanged if the tensor is not a network input tensor, or marked as an output tensor or shape
-     *  output tensor.
-     * 
-     *  @see getType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setType(DataType type);
-    public native @NoException(true) void setType(@Cast("nvinfer1::DataType") int type);
-
-    /**
-     *  \brief Get the data type of a tensor.
-     * 
-     *  @return The data type of the tensor.
-     * 
-     *  @see setType()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getType();
-
-    /**
-     *  \brief Set dynamic range for the tensor
-     * 
-     *  Currently, only symmetric ranges are supported.
-     *  Therefore, the larger of the absolute values of the provided bounds is used.
-     * 
-     *  @return Whether the dynamic range was set successfully.
-     * 
-     *  Requires that min and max be finite, and min <= max.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(float min, float max);
-
-    /**
-     *  \brief Whether the tensor is a network input.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isNetworkInput();
-
-    /**
-     *  \brief Whether the tensor is a network output.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isNetworkOutput();
-
-    /**
-     *  \brief Set whether to enable broadcast of tensor across the batch.
-     * 
-     *  When a tensor is broadcast across a batch, it has the same value for every member in the batch.
-     *  Memory is only allocated once for the single member.
-     * 
-     *  This method is only valid for network input tensors, since the flags of layer output tensors are inferred based
-     *  on layer inputs and parameters.
-     *  If this state is modified for a tensor in the network, the states of all dependent tensors will be recomputed.
-     *  If the tensor is for an explicit batch network, then this function does nothing.
-     * 
-     *  \warning The broadcast flag is ignored when using explicit batch network mode.
-     * 
-     *  @param broadcastAcrossBatch Whether to enable broadcast of tensor across the batch.
-     * 
-     *  @see getBroadcastAcrossBatch()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setBroadcastAcrossBatch(@Cast("bool") boolean broadcastAcrossBatch);
-
-    /**
-     *  \brief Check if tensor is broadcast across the batch.
-     * 
-     *  When a tensor is broadcast across a batch, it has the same value for every member in the batch.
-     *  Memory is only allocated once for the single member. If the network is in explicit batch mode,
-     *  this function returns true if the leading dimension is 1.
-     * 
-     *  @return True if tensor is broadcast across the batch, false otherwise.
-     * 
-     *  @see setBroadcastAcrossBatch()
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean getBroadcastAcrossBatch();
-
-    /**
-     *  \brief Get the storage location of a tensor.
-     *  @return The location of tensor data.
-     *  @see setLocation()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) TensorLocation getLocation();
-
-    /**
-     *  \brief Set the storage location of a tensor
-     *  @param location the location of tensor data
-     * 
-     *  Only network input tensors for storing sequence lengths for RNNv2 are supported.
-     *  Using host storage for layers that do not support it will generate
-     *  errors at build time.
-     * 
-     *  @see getLocation()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setLocation(TensorLocation location);
-    public native @NoException(true) void setLocation(@Cast("nvinfer1::TensorLocation") int location);
-
-    /**
-     *  \brief Query whether dynamic range is set.
-     * 
-     *  @return True if dynamic range is set, false otherwise.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean dynamicRangeIsSet();
-
-    /**
-     *  \brief Undo effect of setDynamicRange.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void resetDynamicRange();
-
-    /**
-     *  \brief Get minimum of dynamic range.
-     * 
-     *  @return Minimum of dynamic range, or quiet NaN if range was not set.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) float getDynamicRangeMin();
-
-    /**
-     *  \brief Get maximum of dynamic range.
-     * 
-     *  @return Maximum of dynamic range, or quiet NaN if range was not set.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) float getDynamicRangeMax();
-
-    /**
-     *  \brief Set allowed formats for this tensor. By default all formats are allowed.
-     *         Shape tensors (for which isShapeTensor() returns true) may only have row major linear format.
-     * 
-     *  When running network on DLA and allowGPUFallback is disabled, if DLA format(kCHW4 with Int8, kCHW4 with
-     *  FP16, kCHW16 with FP16, kCHW32 with Int8) is set, the input format is treated as native DLA format with
-     *  line stride requirement. Input/output binding with these format should have correct layout during
-     *  inference.
-     * 
-     *  @param formats A bitmask of TensorFormat values that are supported for this tensor.
-     * 
-     *  @see ITensor::getAllowedFormats()
-     *  @see TensorFormats
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setAllowedFormats(@Cast("nvinfer1::TensorFormats") int formats);
-
-    /**
-     *  \brief Get a bitmask of TensorFormat values that the tensor supports.
-     *         For a shape tensor, only row major linear format is allowed.
-     * 
-     *  @return The value specified by setAllowedFormats or all possible formats.
-     * 
-     *  @see ITensor::setAllowedFormats()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("nvinfer1::TensorFormats") @NoException(true) int getAllowedFormats();
-
-    /**
-     *  \brief Whether the tensor is a shape tensor.
-     * 
-     *  A shape tensor is a tensor that is related to shape calculations.
-     *  It must be 0D or 1D, have type Int32 or Bool, and its shape must be determinable at build time.
-     *  Furthermore, it must be needed as a shape tensor, either marked as a network shape
-     *  output via markOutputForShapes(), or as an input that is required to be a shape
-     *  tensor, such as the second input to IShuffleLayer. Some layers are "polymorphic" in
-     *  this respect. For example, the inputs to IElementWiseLayer must be shape tensors
-     *  if the output is a shape tensor.
-     * 
-     *  The TensorRT Developer Guide give the formal rules for what tensors are shape tensors.
-     * 
-     *  The result of isShapeTensor() is reliable only when network construction is complete.
-     *  For example, if a partially built network sums two tensors T1 and T2 to create
-     *  tensor T3, and none are yet needed as shape tensors, isShapeTensor() returns false
-     *  for all three tensors.  Setting the second input of IShuffleLayer to be T3 would
-     *  cause all three tensors to be shape tensors, because IShuffleLayer requires that its
-     *  second optional input be a shape tensor, and IElementWiseLayer is "polymorphic".
-     * 
-     *  If a tensor is a shape tensor and becomes an engine input or output,
-     *  then ICudaEngine::isShapeBinding will be true for that tensor.
-     * 
-     *  It is possible for a tensor to be both a shape tensor and an execution tensor.
-     * 
-     *  @return True if tensor is a shape tensor, false otherwise.
-     * 
-     *  @see INetworkDefinition::markOutputForShapes(), ICudaEngine::isShapeBinding()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isShapeTensor();
-
-    /**
-     *  \brief Whether the tensor is an execution tensor.
-     * 
-     *  Tensors are usually execution tensors.  The exceptions are tensors used
-     *  solely for shape calculations or whose contents not needed to compute the outputs.
-     * 
-     *  The result of isExecutionTensor() is reliable only when network construction is complete.
-     *  For example, if a partially built network has no path from a tensor to a network output,
-     *  isExecutionTensor() returns false. Completing the path would cause it to become true.
-     * 
-     *  If a tensor is an execution tensor and becomes an engine input or output,
-     *  then ICudaEngine::isExecutionBinding will be true for that tensor.
-     * 
-     *  A tensor with isShapeTensor() == false and isExecutionTensor() == false
-     *  can still show up as an input to the engine if its dimensions are required.
-     *  In that case, only its dimensions need to be set at runtime and a nullptr
-     *  can be passed instead of a pointer to its contents.
-     *  */
-    public native @Cast("bool") @NoException(true) boolean isExecutionTensor();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
deleted file mode 100644
index 0259fd2a6fa..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITimingCache.java
+++ /dev/null
@@ -1,123 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ITimingCache
- * 
- *  \brief Class to handle tactic timing info collected from builder.
- * 
- *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
- *  to accelerate the builder wallclock time.
- * 
- *  @see IBuilderConfig
- * 
- * 
- *  \class ITimingCache
- * 
- *  \brief Class to handle tactic timing info collected from builder.
- * 
- *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
- *  to accelerate the builder wallclock time.
- * 
- *  @see IBuilderConfig
- * 
- * 
- *  \class ITimingCache
- * 
- *  \brief Class to handle tactic timing info collected from builder.
- * 
- *  The timing cache is created or initialized by IBuilderConfig. It can be shared across builder instances
- *  to accelerate the builder wallclock time.
- * 
- *  @see IBuilderConfig
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ITimingCache extends INoCopy {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public ITimingCache() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public ITimingCache(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ITimingCache(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public ITimingCache position(long position) {
-        return (ITimingCache)super.position(position);
-    }
-    @Override public ITimingCache getPointer(long i) {
-        return new ITimingCache((Pointer)this).offsetAddress(i);
-    }
-
-
-    /**
-     *  \brief Serialize a timing cache to IHostMemory object.
-     * 
-     *  This function allows serialization of current timing cache.
-     * 
-     *  @return A pointer to a IHostMemory object that contains a serialized timing cache.
-     * 
-     *  @see IHostMemory
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IHostMemory serialize();
-
-    /**
-     *  \brief Combine input timing cache into local instance.
-     * 
-     *  This function allows combining entries in the input timing cache to local cache object.
-     * 
-     *  @param inputCache The input timing cache.
-     *  @param ignoreMismatch Whether or not to allow cache verification header mismatch.
-     * 
-     *  @return True if combined successfully, false otherwise.
-     * 
-     *  Append entries in input cache to local cache. Conflicting entries will be skipped
-     *  The input cache must be generated by a TensorRT build of exact same version, otherwise
-     *  combine will be skipped and return false.
-     *  ignoreMismatch must be set to true if combining a timing cache created from a
-     *  different device.
-     * 
-     *  \warning Combining caches generated from devices with different device properties may
-     *           lead to functional/performance bugs!
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean combine(@Const @ByRef ITimingCache inputCache, @Cast("bool") boolean ignoreMismatch);
-
-    /**
-     *  \brief Empty the timing cache
-     * 
-     *  @return True if reset successfully, false otherwise.
-     *  */
-    public native @Cast("bool") @NoException(true) boolean reset();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
deleted file mode 100644
index d593f4b8bd1..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITopKLayer.java
+++ /dev/null
@@ -1,105 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class ITopKLayer
- * 
- *  \brief Layer that represents a TopK reduction.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ITopKLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ITopKLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the operation for the layer.
-     * 
-     *  @see getOperation(), TopKOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOperation(TopKOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::TopKOperation") int op);
-
-    /**
-     *  \brief Get the operation for the layer.
-     * 
-     *  @see setOperation(), TopKOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) TopKOperation getOperation();
-
-    /**
-     *  \brief Set the k value for the layer.
-     * 
-     *  Currently only values up to 3840 are supported.
-     * 
-     *  @see getK()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setK(int k);
-
-    /**
-     *  \brief Get the k value for the layer.
-     * 
-     *  @see setK()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) int getK();
-
-    /**
-     *  \brief Set which axes to reduce for the layer.
-     * 
-     *  @see getReduceAxes()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
-
-    /**
-     *  \brief Get the axes to reduce for the layer.
-     * 
-     *  @see setReduceAxes()
-     *  */
-    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
deleted file mode 100644
index 34bd0b3683c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/ITripLimitLayer.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class ITripLimitLayer extends ILoopBoundaryLayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ITripLimitLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) TripLimit getTripLimit();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
deleted file mode 100644
index 3a9a335de5a..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/IUnaryLayer.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class IUnaryLayer
- * 
- *  \brief Layer that represents an unary operation.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class IUnaryLayer extends ILayer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IUnaryLayer(Pointer p) { super(p); }
-
-    /**
-     *  \brief Set the unary operation for the layer.
-     * 
-     *  @see getOperation(), UnaryOperation
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) void setOperation(UnaryOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::UnaryOperation") int op);
-
-    /**
-     *  \brief Get the unary operation for the layer.
-     * 
-     *  @see setOperation(), UnaryOperation
-     *  */
-    public native @NoException(true) UnaryOperation getOperation();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
deleted file mode 100644
index 8c0ab84d5b8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Permutation.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Permutation extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public Permutation() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Permutation(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Permutation(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public Permutation position(long position) {
-        return (Permutation)super.position(position);
-    }
-    @Override public Permutation getPointer(long i) {
-        return new Permutation((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  The elements of the permutation.
-     *  The permutation is applied as outputDimensionIndex = permutation.order[inputDimensionIndex], so to
-     *  permute from CHW order to HWC order, the required permutation is [1, 2, 0], and to permute
-     *  from HWC to CHW, the required permutation is [2, 0, 1].
-     *  */
-    public native int order(int i); public native Permutation order(int i, int setter);
-    @MemberGetter public native IntPointer order();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
deleted file mode 100644
index 5e09fbc5198..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginField.java
+++ /dev/null
@@ -1,81 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \class PluginField
- * 
- *  \brief Structure containing plugin attribute field names and associated data
- *  This information can be parsed to decode necessary plugin metadata
- * 
- *  */
-@Namespace("nvinfer1") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class PluginField extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public PluginField(Pointer p) { super(p); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public PluginField(long size) { super((Pointer)null); allocateArray(size); }
-    private native void allocateArray(long size);
-    @Override public PluginField position(long position) {
-        return (PluginField)super.position(position);
-    }
-    @Override public PluginField getPointer(long i) {
-        return new PluginField((Pointer)this).offsetAddress(i);
-    }
-
-    /**
-     *  \brief Plugin field attribute name
-     *  */
-    
-    //!
-    //!
-    public native String name(); public native PluginField name(String setter);
-    /**
-     *  \brief Plugin field attribute data
-     *  */
-    
-    //!
-    //!
-    public native @Const Pointer data(); public native PluginField data(Pointer setter);
-    /**
-     *  \brief Plugin field attribute type
-     *  @see PluginFieldType
-     *  */
-    
-    //!
-    //!
-    public native PluginFieldType type(); public native PluginField type(PluginFieldType setter);
-    /**
-     *  \brief Number of data entries in the Plugin attribute
-     *  */
-    public native int length(); public native PluginField length(int setter);
-
-    public PluginField(String name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
-            PluginFieldType type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/) { super((Pointer)null); allocate(name_, data_, type_, length_); }
-    @NoException(true) private native void allocate(String name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
-            PluginFieldType type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/);
-    public PluginField() { super((Pointer)null); allocate(); }
-    @NoException(true) private native void allocate();
-    public PluginField(@Cast("const char*") BytePointer name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
-            @Cast("nvinfer1::PluginFieldType") int type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/) { super((Pointer)null); allocate(name_, data_, type_, length_); }
-    @NoException(true) private native void allocate(@Cast("const char*") BytePointer name_/*=nullptr*/, @Const Pointer data_/*=nullptr*/,
-            @Cast("nvinfer1::PluginFieldType") int type_/*=nvinfer1::PluginFieldType::kUNKNOWN*/, int length_/*=0*/);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
deleted file mode 100644
index 504b941791e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginFieldCollection.java
+++ /dev/null
@@ -1,45 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/** Plugin field collection struct. */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class PluginFieldCollection extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public PluginFieldCollection() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public PluginFieldCollection(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public PluginFieldCollection(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public PluginFieldCollection position(long position) {
-        return (PluginFieldCollection)super.position(position);
-    }
-    @Override public PluginFieldCollection getPointer(long i) {
-        return new PluginFieldCollection((Pointer)this).offsetAddress(i);
-    }
-
-    /** Number of PluginField entries. */
-    public native int nbFields(); public native PluginFieldCollection nbFields(int setter);
-    /** Pointer to PluginField entries. */
-    public native @Const PluginField fields(); public native PluginFieldCollection fields(PluginField setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
deleted file mode 100644
index e34ac288bf8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/PluginTensorDesc.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/** \struct PluginTensorDesc
- * 
- *  \brief Fields that a plugin might see for an input or output.
- * 
- *  Scale is only valid when data type is DataType::kINT8. TensorRT will set
- *  the value to -1.0f if it is invalid.
- * 
- *  @see IPluginV2IOExt::supportsFormatCombination
- *  @see IPluginV2IOExt::configurePlugin
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class PluginTensorDesc extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public PluginTensorDesc() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public PluginTensorDesc(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public PluginTensorDesc(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public PluginTensorDesc position(long position) {
-        return (PluginTensorDesc)super.position(position);
-    }
-    @Override public PluginTensorDesc getPointer(long i) {
-        return new PluginTensorDesc((Pointer)this).offsetAddress(i);
-    }
-
-    /** Dimensions. */
-    public native @ByRef @Cast("nvinfer1::Dims*") Dims32 dims(); public native PluginTensorDesc dims(Dims32 setter);
-    /** \warning DataType:kBOOL not supported. */
-    public native DataType type(); public native PluginTensorDesc type(DataType setter);
-    /** Tensor format. */
-    public native TensorFormat format(); public native PluginTensorDesc format(TensorFormat setter);
-    /** Scale for INT8 data type. */
-    public native float scale(); public native PluginTensorDesc scale(float setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
deleted file mode 100644
index eb723e73d62..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VActivationLayer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VActivationLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VActivationLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setActivationType(ActivationType type);
-    public native @NoException(true) void setActivationType(@Cast("nvinfer1::ActivationType") int type);
-    public native @NoException(true) ActivationType getActivationType();
-    public native @NoException(true) void setAlpha(float alpha);
-    public native @NoException(true) void setBeta(float beta);
-    public native @NoException(true) float getAlpha();
-    public native @NoException(true) float getBeta();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
deleted file mode 100644
index e95bb61ccdf..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithm.java
+++ /dev/null
@@ -1,33 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VAlgorithm extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VAlgorithm(Pointer p) { super(p); }
-
-    public native @Const @ByRef @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfo(int index);
-    public native @Const @ByRef @NoException(true) IAlgorithmVariant getAlgorithmVariant();
-    public native @NoException(true) float getTimingMSec();
-    public native @Cast("std::size_t") @NoException(true) long getWorkspaceSize();
-    public native @Const @NoException(true) IAlgorithmIOInfo getAlgorithmIOInfoByIndex(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
deleted file mode 100644
index 492cee4c440..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmContext.java
+++ /dev/null
@@ -1,33 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VAlgorithmContext extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VAlgorithmContext(Pointer p) { super(p); }
-
-    public native @NoException(true) String getName();
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(int index, @Cast("nvinfer1::OptProfileSelector") int select);
-    public native @NoException(true) int getNbInputs();
-    public native @NoException(true) int getNbOutputs();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
deleted file mode 100644
index 5efc7c65c44..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmIOInfo.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VAlgorithmIOInfo extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VAlgorithmIOInfo(Pointer p) { super(p); }
-
-    public native @NoException(true) TensorFormat getTensorFormat();
-    public native @NoException(true) DataType getDataType();
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
deleted file mode 100644
index f788a26a219..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VAlgorithmVariant.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VAlgorithmVariant extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VAlgorithmVariant(Pointer p) { super(p); }
-
-    public native @Cast("int64_t") @NoException(true) long getImplementation();
-    public native @Cast("int64_t") @NoException(true) long getTactic();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
deleted file mode 100644
index f3f5c2b5fc8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilder.java
+++ /dev/null
@@ -1,45 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VBuilder extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VBuilder(Pointer p) { super(p); }
-
-    public native @NoException(true) void setMaxBatchSize(int batchSize);
-    public native @NoException(true) int getMaxBatchSize();
-    public native @Cast("bool") @NoException(true) boolean platformHasFastFp16();
-    public native @Cast("bool") @NoException(true) boolean platformHasFastInt8();
-    public native @NoException(true) int getMaxDLABatchSize();
-    public native @NoException(true) int getNbDLACores();
-    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
-    public native @NoException(true) IBuilderConfig createBuilderConfig();
-    public native @NoException(true) ICudaEngine buildEngineWithConfig(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
-    public native @NoException(true) INetworkDefinition createNetworkV2(@Cast("nvinfer1::NetworkDefinitionCreationFlags") int flags);
-    public native @NoException(true) IOptimizationProfile createOptimizationProfile();
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-    public native @NoException(true) void reset();
-    public native @Cast("bool") @NoException(true) boolean platformHasTf32();
-    public native @NoException(true) IHostMemory buildSerializedNetwork(@ByRef INetworkDefinition network, @ByRef IBuilderConfig config);
-    public native @Cast("bool") @NoException(true) boolean isNetworkSupported(@Const @ByRef INetworkDefinition network, @Const @ByRef IBuilderConfig config);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
deleted file mode 100644
index c7dbdfb63bd..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VBuilderConfig.java
+++ /dev/null
@@ -1,83 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VBuilderConfig extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VBuilderConfig(Pointer p) { super(p); }
-
-    public native @NoException(true) void setMinTimingIterations(int minTiming);
-    public native @NoException(true) int getMinTimingIterations();
-    public native @NoException(true) void setAvgTimingIterations(int avgTiming);
-    public native @NoException(true) int getAvgTimingIterations();
-    public native @NoException(true) void setEngineCapability(EngineCapability capability);
-    public native @NoException(true) void setEngineCapability(@Cast("nvinfer1::EngineCapability") int capability);
-    public native @NoException(true) EngineCapability getEngineCapability();
-    public native @NoException(true) void setInt8Calibrator(IInt8Calibrator calibrator);
-    public native @NoException(true) IInt8Calibrator getInt8Calibrator();
-    public native @NoException(true) void setMaxWorkspaceSize(@Cast("std::size_t") long workspaceSize);
-    public native @Cast("std::size_t") @NoException(true) long getMaxWorkspaceSize();
-    public native @NoException(true) void setFlags(@Cast("nvinfer1::BuilderFlags") int builderFlags);
-    public native @Cast("nvinfer1::BuilderFlags") @NoException(true) int getFlags();
-    public native @NoException(true) void clearFlag(BuilderFlag builderFlag);
-    public native @NoException(true) void clearFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-    public native @NoException(true) void setFlag(BuilderFlag builderFlag);
-    public native @NoException(true) void setFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-    public native @Cast("bool") @NoException(true) boolean getFlag(BuilderFlag builderFlag);
-    public native @Cast("bool") @NoException(true) boolean getFlag(@Cast("nvinfer1::BuilderFlag") int builderFlag);
-    public native @NoException(true) void setDeviceType(@Const ILayer layer, DeviceType deviceType);
-    public native @NoException(true) void setDeviceType(@Const ILayer layer, @Cast("nvinfer1::DeviceType") int deviceType);
-    public native @NoException(true) DeviceType getDeviceType(@Const ILayer layer);
-    public native @Cast("bool") @NoException(true) boolean isDeviceTypeSet(@Const ILayer layer);
-    public native @NoException(true) void resetDeviceType(@Const ILayer layer);
-    public native @Cast("bool") @NoException(true) boolean canRunOnDLA(@Const ILayer layer);
-    public native @NoException(true) void setDLACore(int dlaCore);
-    public native @NoException(true) int getDLACore();
-    public native @NoException(true) void setDefaultDeviceType(DeviceType deviceType);
-    public native @NoException(true) void setDefaultDeviceType(@Cast("nvinfer1::DeviceType") int deviceType);
-    public native @NoException(true) DeviceType getDefaultDeviceType();
-    public native @NoException(true) void reset();
-    public native @NoException(true) void setProfileStream(CUstream_st stream);
-    public native @NoException(true) CUstream_st getProfileStream();
-    public native @NoException(true) int addOptimizationProfile(@Const IOptimizationProfile profile);
-    public native @NoException(true) int getNbOptimizationProfiles();
-    public native @NoException(true) void setProfilingVerbosity(ProfilingVerbosity verbosity);
-    public native @NoException(true) void setProfilingVerbosity(@Cast("nvinfer1::ProfilingVerbosity") int verbosity);
-    public native @NoException(true) ProfilingVerbosity getProfilingVerbosity();
-    public native @NoException(true) void setAlgorithmSelector(IAlgorithmSelector selector);
-    public native @NoException(true) IAlgorithmSelector getAlgorithmSelector();
-    public native @Cast("bool") @NoException(true) boolean setCalibrationProfile(@Const IOptimizationProfile profile);
-    public native @Const @NoException(true) IOptimizationProfile getCalibrationProfile();
-    public native @NoException(true) void setQuantizationFlags(@Cast("nvinfer1::QuantizationFlags") int flags);
-    public native @Cast("nvinfer1::QuantizationFlags") @NoException(true) int getQuantizationFlags();
-    public native @NoException(true) void clearQuantizationFlag(QuantizationFlag flag);
-    public native @NoException(true) void clearQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-    public native @NoException(true) void setQuantizationFlag(QuantizationFlag flag);
-    public native @NoException(true) void setQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(QuantizationFlag flag);
-    public native @Cast("bool") @NoException(true) boolean getQuantizationFlag(@Cast("nvinfer1::QuantizationFlag") int flag);
-    public native @Cast("bool") @NoException(true) boolean setTacticSources(@Cast("nvinfer1::TacticSources") int tacticSources);
-    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
-    public native @NoException(true) ITimingCache createTimingCache(@Const Pointer blob, @Cast("std::size_t") long size);
-    public native @Cast("bool") @NoException(true) boolean setTimingCache(@Const @ByRef ITimingCache cache, @Cast("bool") boolean ignoreMismatch);
-    public native @Const @NoException(true) ITimingCache getTimingCache();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
deleted file mode 100644
index 533ed40a161..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConcatenationLayer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VConcatenationLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VConcatenationLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setAxis(int axis);
-    public native @NoException(true) int getAxis();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
deleted file mode 100644
index c8f2ae9c965..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConstantLayer.java
+++ /dev/null
@@ -1,32 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VConstantLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VConstantLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getWeights();
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
deleted file mode 100644
index dd23ae4aef2..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VConvolutionLayer.java
+++ /dev/null
@@ -1,59 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VConvolutionLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VConvolutionLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
-    public native @ByVal @NoException(true) DimsHW getKernelSize();
-    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
-    public native @NoException(true) int getNbOutputMaps();
-    public native @NoException(true) void setStride(@ByVal DimsHW stride);
-    public native @ByVal @NoException(true) DimsHW getStride();
-    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
-    public native @ByVal @NoException(true) DimsHW getPadding();
-    public native @NoException(true) void setNbGroups(int nbGroups);
-    public native @NoException(true) int getNbGroups();
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-    public native @NoException(true) void setDilation(@ByVal DimsHW dilation);
-    public native @ByVal @NoException(true) DimsHW getDilation();
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-    public native @NoException(true) PaddingMode getPaddingMode();
-    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
deleted file mode 100644
index a4122e055c3..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VCudaEngine.java
+++ /dev/null
@@ -1,63 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VCudaEngine extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VCudaEngine(Pointer p) { super(p); }
-
-    public native @NoException(true) int getNbBindings();
-    public native @NoException(true) int getBindingIndex(String name);
-    public native @NoException(true) int getBindingIndex(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getBindingName(int bindingIndex);
-    public native @Cast("bool") @NoException(true) boolean bindingIsInput(int bindingIndex);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
-    public native @NoException(true) DataType getBindingDataType(int bindingIndex);
-    public native @NoException(true) int getMaxBatchSize();
-    public native @NoException(true) int getNbLayers();
-    public native @NoException(true) IHostMemory serialize();
-    public native @NoException(true) IExecutionContext createExecutionContext();
-    public native @NoException(true) TensorLocation getLocation(int bindingIndex);
-    public native @NoException(true) IExecutionContext createExecutionContextWithoutDeviceMemory();
-    public native @Cast("size_t") @NoException(true) long getDeviceMemorySize();
-    public native @Cast("bool") @NoException(true) boolean isRefittable();
-    public native @NoException(true) int getBindingBytesPerComponent(int bindingIndex);
-    public native @NoException(true) int getBindingComponentsPerElement(int bindingIndex);
-    public native @NoException(true) TensorFormat getBindingFormat(int bindingIndex);
-    public native @NoException(true) String getBindingFormatDesc(int bindingIndex);
-    public native @NoException(true) int getBindingVectorizedDim(int bindingIndex);
-    public native @NoException(true) String getName();
-    public native @NoException(true) int getNbOptimizationProfiles();
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getProfileDimensions(int bindingIndex, int profileIndex, @Cast("nvinfer1::OptProfileSelector") int select);
-    public native @Const @NoException(true) IntPointer getProfileShapeValues(
-            int profileIndex, int inputIndex, OptProfileSelector select);
-    public native @Const @NoException(true) IntBuffer getProfileShapeValues(
-            int profileIndex, int inputIndex, @Cast("nvinfer1::OptProfileSelector") int select);
-    public native @Cast("bool") @NoException(true) boolean isShapeBinding(int bindingIndex);
-    public native @Cast("bool") @NoException(true) boolean isExecutionBinding(int bindingIndex);
-    public native @NoException(true) EngineCapability getEngineCapability();
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
-    public native @Cast("nvinfer1::TacticSources") @NoException(true) int getTacticSources();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
deleted file mode 100644
index 8db00f02e62..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDeconvolutionLayer.java
+++ /dev/null
@@ -1,57 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VDeconvolutionLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VDeconvolutionLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setKernelSize(@ByVal DimsHW kernelSize);
-    public native @ByVal @NoException(true) DimsHW getKernelSize();
-    public native @NoException(true) void setNbOutputMaps(int nbOutputMaps);
-    public native @NoException(true) int getNbOutputMaps();
-    public native @NoException(true) void setStride(@ByVal DimsHW stride);
-    public native @ByVal @NoException(true) DimsHW getStride();
-    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
-    public native @ByVal @NoException(true) DimsHW getPadding();
-    public native @NoException(true) void setNbGroups(int nbGroups);
-    public native @NoException(true) int getNbGroups();
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-    public native @NoException(true) PaddingMode getPaddingMode();
-    public native @NoException(true) void setKernelSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getKernelSizeNd();
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-    public native @NoException(true) void setDilationNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 dilation);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDilationNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
deleted file mode 100644
index fb9889da7a9..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDequantizeLayer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VDequantizeLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VDequantizeLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) int getAxis();
-    public native @NoException(true) void setAxis(int axis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
deleted file mode 100644
index dca88dec6cb..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VDimensionExpr.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VDimensionExpr extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VDimensionExpr(Pointer p) { super(p); }
-
-    public native @Cast("bool") boolean isConstant();
-    public native int getConstantValue();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
deleted file mode 100644
index 22f02ebcf50..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VElementWiseLayer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VElementWiseLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VElementWiseLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOperation(ElementWiseOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::ElementWiseOperation") int op);
-    public native @NoException(true) ElementWiseOperation getOperation();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
deleted file mode 100644
index d8422f1a895..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExecutionContext.java
+++ /dev/null
@@ -1,63 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VExecutionContext extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VExecutionContext(Pointer p) { super(p); }
-
-    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") PointerPointer bindings);
-    public native @Cast("bool") @NoException(true) boolean execute(int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings);
-    public native @Cast("bool") @NoException(true) boolean enqueue(
-            int batchSize, @Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @Cast("bool") @NoException(true) boolean enqueue(
-            int batchSize, @Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @NoException(true) void setDebugSync(@Cast("bool") boolean sync);
-    public native @Cast("bool") @NoException(true) boolean getDebugSync();
-    public native @NoException(true) void setProfiler(IProfiler arg0);
-    public native @NoException(true) IProfiler getProfiler();
-    public native @Const @ByRef @NoException(true) ICudaEngine getEngine();
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getName();
-    public native @NoException(true) void setDeviceMemory(Pointer memory);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrides(int bindingIndex);
-    public native @Cast("bool") @NoException(true) boolean setOptimizationProfile(int profileIndex);
-    public native @NoException(true) int getOptimizationProfile();
-    public native @Cast("bool") @NoException(true) boolean setBindingDimensions(int bindingIndex, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getBindingDimensions(int bindingIndex);
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntPointer data);
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const IntBuffer data);
-    public native @Cast("bool") @NoException(true) boolean setInputShapeBinding(int bindingIndex, @Const int[] data);
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntPointer data);
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, IntBuffer data);
-    public native @Cast("bool") @NoException(true) boolean getShapeBinding(int bindingIndex, int[] data);
-    public native @Cast("bool") @NoException(true) boolean allInputDimensionsSpecified();
-    public native @Cast("bool") @NoException(true) boolean allInputShapesSpecified();
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") PointerPointer bindings);
-    public native @Cast("bool") @NoException(true) boolean executeV2(@Cast("void*const*") @ByPtrPtr Pointer bindings);
-    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") PointerPointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @Cast("bool") @NoException(true) boolean enqueueV2(@Cast("void*const*") @ByPtrPtr Pointer bindings, CUstream_st stream, @ByPtrPtr CUevent_st inputConsumed);
-    public native @Cast("bool") @NoException(true) boolean setOptimizationProfileAsync(int profileIndex, CUstream_st stream);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
deleted file mode 100644
index a71ca934abb..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VExprBuilder.java
+++ /dev/null
@@ -1,33 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VExprBuilder extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VExprBuilder(Pointer p) { super(p); }
-
-    public native @Const IDimensionExpr constant(int value);
-    public native @Const IDimensionExpr operation(
-            DimensionOperation op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
-    public native @Const IDimensionExpr operation(
-            @Cast("nvinfer1::DimensionOperation") int op, @Const @ByRef IDimensionExpr first, @Const @ByRef IDimensionExpr second);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
deleted file mode 100644
index 5d1bb1e60c8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFillLayer.java
+++ /dev/null
@@ -1,37 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VFillLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VFillLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-    public native @NoException(true) void setOperation(FillOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::FillOperation") int op);
-    public native @NoException(true) FillOperation getOperation();
-    public native @NoException(true) void setAlpha(double alpha);
-    public native @NoException(true) double getAlpha();
-    public native @NoException(true) void setBeta(double beta);
-    public native @NoException(true) double getBeta();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
deleted file mode 100644
index 4113e407b10..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VFullyConnectedLayer.java
+++ /dev/null
@@ -1,34 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VFullyConnectedLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VFullyConnectedLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setNbOutputChannels(int nbOutputs);
-    public native @NoException(true) int getNbOutputChannels();
-    public native @NoException(true) void setKernelWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getKernelWeights();
-    public native @NoException(true) void setBiasWeights(@ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getBiasWeights();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
deleted file mode 100644
index a624cf5335a..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VGatherLayer.java
+++ /dev/null
@@ -1,32 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VGatherLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VGatherLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setGatherAxis(int axis);
-    public native @NoException(true) int getGatherAxis();
-    public native @NoException(true) void setNbElementWiseDims(int k);
-    public native @NoException(true) int getNbElementWiseDims();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
deleted file mode 100644
index c1532e6c9d7..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VHostMemory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VHostMemory extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VHostMemory(Pointer p) { super(p); }
-
-    public native @NoException(true) Pointer data();
-    public native @Cast("std::size_t") @NoException(true) long size();
-    public native @NoException(true) DataType type();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
deleted file mode 100644
index 1d7f8c0440c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIdentityLayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VIdentityLayer extends VRoot {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VIdentityLayer() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VIdentityLayer(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VIdentityLayer(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VIdentityLayer position(long position) {
-        return (VIdentityLayer)super.position(position);
-    }
-    @Override public VIdentityLayer getPointer(long i) {
-        return new VIdentityLayer((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
deleted file mode 100644
index 50bf6c92357..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VIteratorLayer.java
+++ /dev/null
@@ -1,32 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VIteratorLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VIteratorLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setAxis(int axis);
-    public native @NoException(true) int getAxis();
-    public native @NoException(true) void setReverse(@Cast("bool") boolean reverse);
-    public native @Cast("bool") @NoException(true) boolean getReverse();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
deleted file mode 100644
index 6d65b4b3af0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLRNLayer.java
+++ /dev/null
@@ -1,36 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VLRNLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VLRNLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setWindowSize(int windowSize);
-    public native @NoException(true) int getWindowSize();
-    public native @NoException(true) void setAlpha(float alpha);
-    public native @NoException(true) float getAlpha();
-    public native @NoException(true) void setBeta(float beta);
-    public native @NoException(true) float getBeta();
-    public native @NoException(true) void setK(float k);
-    public native @NoException(true) float getK();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
deleted file mode 100644
index f3b92ef20df..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLayer.java
+++ /dev/null
@@ -1,46 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) LayerType getType();
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getName();
-    public native @NoException(true) int getNbInputs();
-    public native @NoException(true) ITensor getInput(int index);
-    public native @NoException(true) int getNbOutputs();
-    public native @NoException(true) ITensor getOutput(int index);
-    public native @NoException(true) void setInput(int index, @ByRef ITensor tensor);
-    public native @NoException(true) void setPrecision(DataType dataType);
-    public native @NoException(true) void setPrecision(@Cast("nvinfer1::DataType") int dataType);
-    public native @NoException(true) DataType getPrecision();
-    public native @Cast("bool") @NoException(true) boolean precisionIsSet();
-    public native @NoException(true) void resetPrecision();
-    public native @NoException(true) void setOutputType(int index, DataType dataType);
-    public native @NoException(true) void setOutputType(int index, @Cast("nvinfer1::DataType") int dataType);
-    public native @NoException(true) DataType getOutputType(int index);
-    public native @Cast("bool") @NoException(true) boolean outputTypeIsSet(int index);
-    public native @NoException(true) void resetOutputType(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
deleted file mode 100644
index fde95340760..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoop.java
+++ /dev/null
@@ -1,39 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VLoop extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VLoop(Pointer p) { super(p); }
-
-    public native @NoException(true) IRecurrenceLayer addRecurrence(@ByRef ITensor initialValue);
-    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, TripLimit _limit);
-    public native @NoException(true) ITripLimitLayer addTripLimit(@ByRef ITensor tensor, @Cast("nvinfer1::TripLimit") int _limit);
-    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor, int axis/*=0*/, @Cast("bool") boolean reverse/*=false*/);
-    public native @NoException(true) IIteratorLayer addIterator(@ByRef ITensor tensor);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind, int axis/*=0*/);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, LoopOutput outputKind);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind, int axis/*=0*/);
-    public native @NoException(true) ILoopOutputLayer addLoopOutput(@ByRef ITensor tensor, @Cast("nvinfer1::LoopOutput") int outputKind);
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getName();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
deleted file mode 100644
index cba6b945a82..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopBoundaryLayer.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VLoopBoundaryLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VLoopBoundaryLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) ILoop getLoop();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
deleted file mode 100644
index f4c1c6d5ed6..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VLoopOutputLayer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VLoopOutputLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VLoopOutputLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) LoopOutput getLoopOutput();
-    public native @NoException(true) void setAxis(int axis);
-    public native @NoException(true) int getAxis();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
deleted file mode 100644
index 2adddb25764..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VMatrixMultiplyLayer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VMatrixMultiplyLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VMatrixMultiplyLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOperation(int index, MatrixOperation op);
-    public native @NoException(true) void setOperation(int index, @Cast("nvinfer1::MatrixOperation") int op);
-    public native @NoException(true) MatrixOperation getOperation(int index);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
deleted file mode 100644
index c5344534126..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VNetworkDefinition.java
+++ /dev/null
@@ -1,113 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VNetworkDefinition extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VNetworkDefinition(Pointer p) { super(p); }
-
-    public native @NoException(true) ITensor addInput(String name, DataType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @NoException(true) ITensor addInput(@Cast("const char*") BytePointer name, @Cast("nvinfer1::DataType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @NoException(true) void markOutput(@ByRef ITensor tensor);
-    public native @NoException(true) IConvolutionLayer addConvolution(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-    public native @NoException(true) IFullyConnectedLayer addFullyConnected(
-            @ByRef ITensor input, int nbOutputs, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, ActivationType type);
-    public native @NoException(true) IActivationLayer addActivation(@ByRef ITensor input, @Cast("nvinfer1::ActivationType") int type);
-    public native @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, PoolingType type, @ByVal DimsHW windowSize);
-    public native @NoException(true) IPoolingLayer addPooling(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal DimsHW windowSize);
-    public native @NoException(true) ILRNLayer addLRN(@ByRef ITensor input, int window, float alpha, float beta, float k);
-    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
-    public native @NoException(true) IScaleLayer addScale(@ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power);
-    public native @NoException(true) ISoftMaxLayer addSoftMax(@ByRef ITensor input);
-    public native @NoException(true) IConcatenationLayer addConcatenation(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs);
-    public native @NoException(true) IConcatenationLayer addConcatenation(@ByPtrPtr ITensor inputs, int nbInputs);
-    public native @NoException(true) IDeconvolutionLayer addDeconvolution(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal DimsHW kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, ElementWiseOperation op);
-    public native @NoException(true) IElementWiseLayer addElementWise(@ByRef ITensor input1, @ByRef ITensor input2, @Cast("nvinfer1::ElementWiseOperation") int op);
-    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, UnaryOperation operation);
-    public native @NoException(true) IUnaryLayer addUnary(@ByRef ITensor input, @Cast("nvinfer1::UnaryOperation") int operation);
-    public native @NoException(true) IPaddingLayer addPadding(@ByRef ITensor input, @ByVal DimsHW prePadding, @ByVal DimsHW postPadding);
-    public native @NoException(true) IShuffleLayer addShuffle(@ByRef ITensor input);
-    public native @NoException(true) int getNbLayers();
-    public native @NoException(true) ILayer getLayer(int index);
-    public native @NoException(true) int getNbInputs();
-    public native @NoException(true) ITensor getInput(int index);
-    public native @NoException(true) int getNbOutputs();
-    public native @NoException(true) ITensor getOutput(int index);
-    public native @NoException(true) IReduceLayer addReduce(
-            @ByRef ITensor input, ReduceOperation operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
-    public native @NoException(true) IReduceLayer addReduce(
-            @ByRef ITensor input, @Cast("nvinfer1::ReduceOperation") int operation, @Cast("uint32_t") int reduceAxes, @Cast("bool") boolean keepDimensions);
-    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, TopKOperation op, int k, @Cast("uint32_t") int reduceAxes);
-    public native @NoException(true) ITopKLayer addTopK(@ByRef ITensor input, @Cast("nvinfer1::TopKOperation") int op, int k, @Cast("uint32_t") int reduceAxes);
-    public native @NoException(true) IGatherLayer addGather(@ByRef ITensor data, @ByRef ITensor indices, int axis);
-    public native @NoException(true) IRaggedSoftMaxLayer addRaggedSoftMax(@ByRef ITensor input, @ByRef ITensor bounds);
-    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
-            @ByRef ITensor input0, MatrixOperation op0, @ByRef ITensor input1, MatrixOperation op1);
-    public native @NoException(true) IMatrixMultiplyLayer addMatrixMultiply(
-            @ByRef ITensor input0, @Cast("nvinfer1::MatrixOperation") int op0, @ByRef ITensor input1, @Cast("nvinfer1::MatrixOperation") int op1);
-    public native @NoException(true) IConstantLayer addConstant(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @ByVal Weights weights);
-    public native @NoException(true) IRNNv2Layer addRNNv2(
-            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, RNNOperation op);
-    public native @NoException(true) IRNNv2Layer addRNNv2(
-            @ByRef ITensor input, int layerCount, int hiddenSize, int maxSeqLen, @Cast("nvinfer1::RNNOperation") int op);
-    public native @NoException(true) IIdentityLayer addIdentity(@ByRef ITensor input);
-    public native @NoException(true) void removeTensor(@ByRef ITensor tensor);
-    public native @NoException(true) void unmarkOutput(@ByRef ITensor tensor);
-    public native @NoException(true) IPluginV2Layer addPluginV2(@Cast("nvinfer1::ITensor*const*") PointerPointer inputs, int nbInputs, @ByRef IPluginV2 plugin);
-    public native @NoException(true) IPluginV2Layer addPluginV2(@ByPtrPtr ITensor inputs, int nbInputs, @ByRef IPluginV2 plugin);
-    public native @NoException(true) ISliceLayer addSlice(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 start, @ByVal @Cast("nvinfer1::Dims*") Dims32 size, @ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getName();
-    public native @NoException(true) IShapeLayer addShape(@ByRef ITensor input);
-    public native @Cast("bool") @NoException(true) boolean hasImplicitBatchDimension();
-    public native @Cast("bool") @NoException(true) boolean markOutputForShapes(@ByRef ITensor tensor);
-    public native @Cast("bool") @NoException(true) boolean unmarkOutputForShapes(@ByRef ITensor tensor);
-    public native @NoException(true) IParametricReLULayer addParametricReLU(@ByRef ITensor input, @ByRef ITensor slope);
-    public native @NoException(true) IConvolutionLayer addConvolutionNd(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, PoolingType type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-    public native @NoException(true) IPoolingLayer addPoolingNd(@ByRef ITensor input, @Cast("nvinfer1::PoolingType") int type, @ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-    public native @NoException(true) IDeconvolutionLayer addDeconvolutionNd(
-            @ByRef ITensor input, int nbOutputMaps, @ByVal @Cast("nvinfer1::Dims*") Dims32 kernelSize, @ByVal Weights kernelWeights, @ByVal Weights biasWeights);
-    public native @NoException(true) IScaleLayer addScaleNd(
-            @ByRef ITensor input, ScaleMode mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
-    public native @NoException(true) IScaleLayer addScaleNd(
-            @ByRef ITensor input, @Cast("nvinfer1::ScaleMode") int mode, @ByVal Weights shift, @ByVal Weights scale, @ByVal Weights power, int channelAxis);
-    public native @NoException(true) IResizeLayer addResize(@ByRef ITensor input);
-    public native @Cast("bool") @NoException(true) boolean hasExplicitPrecision();
-    public native @NoException(true) ILoop addLoop();
-    public native @NoException(true) ISelectLayer addSelect(@ByRef ITensor condition, @ByRef ITensor thenInput, @ByRef ITensor elseInput);
-    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, FillOperation op);
-    public native @NoException(true) IFillLayer addFill(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions, @Cast("nvinfer1::FillOperation") int op);
-    public native @NoException(true) IPaddingLayer addPaddingNd(@ByRef ITensor input, @ByVal @Cast("nvinfer1::Dims*") Dims32 prePadding, @ByVal @Cast("nvinfer1::Dims*") Dims32 postPadding);
-    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, String name);
-    public native @Cast("bool") @NoException(true) boolean setWeightsName(@ByVal Weights weights, @Cast("const char*") BytePointer name);
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-    public native @NoException(true) IDequantizeLayer addDequantize(@ByRef ITensor input, @ByRef ITensor scale);
-    public native @NoException(true) IQuantizeLayer addQuantize(@ByRef ITensor input, @ByRef ITensor scale);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
deleted file mode 100644
index 9a3302f4610..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VOptimizationProfile.java
+++ /dev/null
@@ -1,51 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VOptimizationProfile extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VOptimizationProfile(Pointer p) { super(p); }
-
-    public native @Cast("bool") @NoException(true) boolean setDimensions(String inputName, OptProfileSelector select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
-    public native @Cast("bool") @NoException(true) boolean setDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @ByVal @Cast("nvinfer1::Dims*") Dims32 dims);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(String inputName, OptProfileSelector select);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const IntPointer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntBuffer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const int[] values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const IntPointer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            String inputName, OptProfileSelector select, @Const IntBuffer values, int nbValues);
-    public native @Cast("bool") @NoException(true) boolean setShapeValues(
-            @Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select, @Const int[] values, int nbValues);
-    public native @NoException(true) int getNbShapeValues(String inputName);
-    public native @NoException(true) int getNbShapeValues(@Cast("const char*") BytePointer inputName);
-    public native @Const @NoException(true) IntPointer getShapeValues(String inputName, OptProfileSelector select);
-    public native @Const @NoException(true) IntBuffer getShapeValues(@Cast("const char*") BytePointer inputName, @Cast("nvinfer1::OptProfileSelector") int select);
-    public native @Cast("bool") @NoException(true) boolean setExtraMemoryTarget(float target);
-    public native @NoException(true) float getExtraMemoryTarget();
-    public native @Cast("bool") @NoException(true) boolean isValid();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
deleted file mode 100644
index bfdc87e0c17..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPaddingLayer.java
+++ /dev/null
@@ -1,36 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VPaddingLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VPaddingLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setPrePadding(@ByVal DimsHW padding);
-    public native @ByVal @NoException(true) DimsHW getPrePadding();
-    public native @NoException(true) void setPostPadding(@ByVal DimsHW padding);
-    public native @ByVal @NoException(true) DimsHW getPostPadding();
-    public native @NoException(true) void setPrePaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePaddingNd();
-    public native @NoException(true) void setPostPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPaddingNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
deleted file mode 100644
index c612b39358d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VParametricReLULayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VParametricReLULayer extends VRoot {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VParametricReLULayer() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VParametricReLULayer(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VParametricReLULayer(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VParametricReLULayer position(long position) {
-        return (VParametricReLULayer)super.position(position);
-    }
-    @Override public VParametricReLULayer getPointer(long i) {
-        return new VParametricReLULayer((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
deleted file mode 100644
index ce3fd8b65c9..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginLayer.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VPluginLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VPluginLayer(Pointer p) { super(p); }
-
-    public native @ByRef @NoException(true) IPlugin getPlugin();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
deleted file mode 100644
index a37c9691fd2..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPluginV2Layer.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VPluginV2Layer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VPluginV2Layer(Pointer p) { super(p); }
-
-    public native @ByRef @NoException(true) IPluginV2 getPlugin();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
deleted file mode 100644
index 36020d02e1b..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VPoolingLayer.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VPoolingLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VPoolingLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setPoolingType(PoolingType type);
-    public native @NoException(true) void setPoolingType(@Cast("nvinfer1::PoolingType") int type);
-    public native @NoException(true) PoolingType getPoolingType();
-    public native @NoException(true) void setWindowSize(@ByVal DimsHW windowSize);
-    public native @ByVal @NoException(true) DimsHW getWindowSize();
-    public native @NoException(true) void setStride(@ByVal DimsHW stride);
-    public native @ByVal @NoException(true) DimsHW getStride();
-    public native @NoException(true) void setPadding(@ByVal DimsHW padding);
-    public native @ByVal @NoException(true) DimsHW getPadding();
-    public native @NoException(true) void setBlendFactor(float blendFactor);
-    public native @NoException(true) float getBlendFactor();
-    public native @NoException(true) void setAverageCountExcludesPadding(@Cast("bool") boolean exclusive);
-    public native @Cast("bool") @NoException(true) boolean getAverageCountExcludesPadding();
-    public native @NoException(true) void setPrePadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPrePadding();
-    public native @NoException(true) void setPostPadding(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPostPadding();
-    public native @NoException(true) void setPaddingMode(PaddingMode paddingMode);
-    public native @NoException(true) void setPaddingMode(@Cast("nvinfer1::PaddingMode") int paddingMode);
-    public native @NoException(true) PaddingMode getPaddingMode();
-    public native @NoException(true) void setWindowSizeNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 windowSize);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getWindowSizeNd();
-    public native @NoException(true) void setStrideNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStrideNd();
-    public native @NoException(true) void setPaddingNd(@ByVal @Cast("nvinfer1::Dims*") Dims32 padding);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getPaddingNd();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
deleted file mode 100644
index 6a8e3b83a7c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VQuantizeLayer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VQuantizeLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VQuantizeLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) int getAxis();
-    public native @NoException(true) void setAxis(int axis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
deleted file mode 100644
index f13567922da..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRNNv2Layer.java
+++ /dev/null
@@ -1,55 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRNNv2Layer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRNNv2Layer(Pointer p) { super(p); }
-
-    public native @NoException(true) int getLayerCount();
-    public native @NoException(true) int getHiddenSize();
-    public native @NoException(true) int getMaxSeqLength();
-    public native @NoException(true) int getDataLength();
-    public native @NoException(true) void setSequenceLengths(@ByRef ITensor seqLengths);
-    public native @NoException(true) ITensor getSequenceLengths();
-    public native @NoException(true) void setOperation(RNNOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::RNNOperation") int op);
-    public native @NoException(true) RNNOperation getOperation();
-    public native @NoException(true) void setInputMode(RNNInputMode op);
-    public native @NoException(true) void setInputMode(@Cast("nvinfer1::RNNInputMode") int op);
-    public native @NoException(true) RNNInputMode getInputMode();
-    public native @NoException(true) void setDirection(RNNDirection op);
-    public native @NoException(true) void setDirection(@Cast("nvinfer1::RNNDirection") int op);
-    public native @NoException(true) RNNDirection getDirection();
-    public native @NoException(true) void setWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights weights);
-    public native @NoException(true) void setWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights weights);
-    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
-    public native @ByVal @NoException(true) Weights getWeightsForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
-    public native @NoException(true) void setBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW, @ByVal Weights bias);
-    public native @NoException(true) void setBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW, @ByVal Weights bias);
-    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, RNNGateType gate, @Cast("bool") boolean isW);
-    public native @ByVal @NoException(true) Weights getBiasForGate(int layerIndex, @Cast("nvinfer1::RNNGateType") int gate, @Cast("bool") boolean isW);
-    public native @NoException(true) void setHiddenState(@ByRef ITensor hidden);
-    public native @NoException(true) ITensor getHiddenState();
-    public native @NoException(true) void setCellState(@ByRef ITensor cell);
-    public native @NoException(true) ITensor getCellState();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
deleted file mode 100644
index 0a32c06e847..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRaggedSoftMaxLayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRaggedSoftMaxLayer extends VRoot {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VRaggedSoftMaxLayer() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VRaggedSoftMaxLayer(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRaggedSoftMaxLayer(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VRaggedSoftMaxLayer position(long position) {
-        return (VRaggedSoftMaxLayer)super.position(position);
-    }
-    @Override public VRaggedSoftMaxLayer getPointer(long i) {
-        return new VRaggedSoftMaxLayer((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
deleted file mode 100644
index 24a7052c6dc..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRecurrenceLayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRecurrenceLayer extends VRoot {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VRecurrenceLayer() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VRecurrenceLayer(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRecurrenceLayer(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VRecurrenceLayer position(long position) {
-        return (VRecurrenceLayer)super.position(position);
-    }
-    @Override public VRecurrenceLayer getPointer(long i) {
-        return new VRecurrenceLayer((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
deleted file mode 100644
index 85979041534..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VReduceLayer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VReduceLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VReduceLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOperation(ReduceOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::ReduceOperation") int op);
-    public native @NoException(true) ReduceOperation getOperation();
-    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
-    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
-    public native @NoException(true) void setKeepDimensions(@Cast("bool") boolean keepDimensions);
-    public native @Cast("bool") @NoException(true) boolean getKeepDimensions();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
deleted file mode 100644
index 1b2a606d472..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRefitter.java
+++ /dev/null
@@ -1,61 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRefitter extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRefitter(Pointer p) { super(p); }
-
-    public native @Cast("bool") @NoException(true) boolean setWeights(String layerName, WeightsRole role, @Const @ByVal Weights weights);
-    public native @Cast("bool") @NoException(true) boolean setWeights(@Cast("const char*") BytePointer layerName, @Cast("nvinfer1::WeightsRole") int role, @Const @ByVal Weights weights);
-    public native @Cast("bool") @NoException(true) boolean refitCudaEngine();
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
-    public native @NoException(true) int getMissing(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") PointerPointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr BytePointer layerNames, @Cast("nvinfer1::WeightsRole*") IntPointer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr ByteBuffer layerNames, @Cast("nvinfer1::WeightsRole*") IntBuffer roles);
-    public native @NoException(true) int getAll(int size, @Cast("const char**") @ByPtrPtr byte[] layerNames, @Cast("nvinfer1::WeightsRole*") int[] roles);
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(String tensorName, float min, float max);
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(@Cast("const char*") BytePointer tensorName, float min, float max);
-    public native @NoException(true) float getDynamicRangeMin(String tensorName);
-    public native @NoException(true) float getDynamicRangeMin(@Cast("const char*") BytePointer tensorName);
-    public native @NoException(true) float getDynamicRangeMax(String tensorName);
-    public native @NoException(true) float getDynamicRangeMax(@Cast("const char*") BytePointer tensorName);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") PointerPointer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr BytePointer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr ByteBuffer tensorNames);
-    public native @NoException(true) int getTensorsWithDynamicRange(int size, @Cast("const char**") @ByPtrPtr byte[] tensorNames);
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-    public native @Cast("bool") @NoException(true) boolean setNamedWeights(String name, @ByVal Weights weights);
-    public native @Cast("bool") @NoException(true) boolean setNamedWeights(@Cast("const char*") BytePointer name, @ByVal Weights weights);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") PointerPointer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
-    public native @NoException(true) int getMissingWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") PointerPointer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr BytePointer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr ByteBuffer weightsNames);
-    public native @NoException(true) int getAllWeights(int size, @Cast("const char**") @ByPtrPtr byte[] weightsNames);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
deleted file mode 100644
index bb7eaa1dc4e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VResizeLayer.java
+++ /dev/null
@@ -1,50 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VResizeLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VResizeLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOutputDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getOutputDimensions();
-    public native @NoException(true) void setScales(@Const FloatPointer scales, int nbScales);
-    public native @NoException(true) void setScales(@Const FloatBuffer scales, int nbScales);
-    public native @NoException(true) void setScales(@Const float[] scales, int nbScales);
-    public native @NoException(true) int getScales(int size, FloatPointer scales);
-    public native @NoException(true) int getScales(int size, FloatBuffer scales);
-    public native @NoException(true) int getScales(int size, float[] scales);
-    public native @NoException(true) void setResizeMode(ResizeMode resizeMode);
-    public native @NoException(true) void setResizeMode(@Cast("nvinfer1::ResizeMode") int resizeMode);
-    public native @NoException(true) ResizeMode getResizeMode();
-    public native @NoException(true) void setAlignCorners(@Cast("bool") boolean alignCorners);
-    public native @Cast("bool") @NoException(true) boolean getAlignCorners();
-    public native @NoException(true) void setCoordinateTransformation(ResizeCoordinateTransformation coordTransform);
-    public native @NoException(true) void setCoordinateTransformation(@Cast("nvinfer1::ResizeCoordinateTransformation") int coordTransform);
-    public native @NoException(true) ResizeCoordinateTransformation getCoordinateTransformation();
-    public native @NoException(true) void setSelectorForSinglePixel(ResizeSelector selector);
-    public native @NoException(true) void setSelectorForSinglePixel(@Cast("nvinfer1::ResizeSelector") int selector);
-    public native @NoException(true) ResizeSelector getSelectorForSinglePixel();
-    public native @NoException(true) void setNearestRounding(ResizeRoundMode value);
-    public native @NoException(true) void setNearestRounding(@Cast("nvinfer1::ResizeRoundMode") int value);
-    public native @NoException(true) ResizeRoundMode getNearestRounding();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
deleted file mode 100644
index fa5dc93c028..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRoot.java
+++ /dev/null
@@ -1,48 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-/**
- *  \file NvInferImpl.h
- * 
- *  This file contains definitions for API methods that cross the shared library boundary. These
- *  methods must not be called directly by applications; they should only be called through the
- *  API classes.
- *  */
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRoot extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VRoot() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VRoot(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRoot(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VRoot position(long position) {
-        return (VRoot)super.position(position);
-    }
-    @Override public VRoot getPointer(long i) {
-        return new VRoot((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
deleted file mode 100644
index 8f0bf9484fe..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VRuntime.java
+++ /dev/null
@@ -1,36 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VRuntime extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VRuntime(Pointer p) { super(p); }
-
-    public native @NoException(true) ICudaEngine deserializeCudaEngine(
-            @Const Pointer blob, @Cast("std::size_t") long size, IPluginFactory pluginFactory);
-    public native @NoException(true) void setDLACore(int dlaCore);
-    public native @NoException(true) int getDLACore();
-    public native @NoException(true) int getNbDLACores();
-    public native @NoException(true) void setGpuAllocator(IGpuAllocator allocator);
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
deleted file mode 100644
index da9f76ffd30..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VScaleLayer.java
+++ /dev/null
@@ -1,39 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VScaleLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VScaleLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setMode(ScaleMode mode);
-    public native @NoException(true) void setMode(@Cast("nvinfer1::ScaleMode") int mode);
-    public native @NoException(true) ScaleMode getMode();
-    public native @NoException(true) void setShift(@ByVal Weights shift);
-    public native @ByVal @NoException(true) Weights getShift();
-    public native @NoException(true) void setScale(@ByVal Weights scale);
-    public native @ByVal @NoException(true) Weights getScale();
-    public native @NoException(true) void setPower(@ByVal Weights power);
-    public native @ByVal @NoException(true) Weights getPower();
-    public native @NoException(true) int getChannelAxis();
-    public native @NoException(true) void setChannelAxis(int channelAxis);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
deleted file mode 100644
index 87819d1bfdd..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSelectLayer.java
+++ /dev/null
@@ -1,27 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-@Namespace("nvinfer1::apiv") @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VSelectLayer extends VRoot {
-    /** Empty constructor. Calls {@code super((Pointer)null)}. */
-    public VSelectLayer() { super((Pointer)null); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VSelectLayer(Pointer p) { super(p); }
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
deleted file mode 100644
index cfddc2c83d3..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShapeLayer.java
+++ /dev/null
@@ -1,40 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VShapeLayer extends VRoot {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public VShapeLayer() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public VShapeLayer(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VShapeLayer(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public VShapeLayer position(long position) {
-        return (VShapeLayer)super.position(position);
-    }
-    @Override public VShapeLayer getPointer(long i) {
-        return new VShapeLayer((Pointer)this).offsetAddress(i);
-    }
-
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
deleted file mode 100644
index f19ceb6538f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VShuffleLayer.java
+++ /dev/null
@@ -1,36 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VShuffleLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VShuffleLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setFirstTranspose(@Const @ByRef Permutation permutation);
-    public native @Const @ByRef @NoException(true) Permutation getFirstTranspose();
-    public native @NoException(true) void setReshapeDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getReshapeDimensions();
-    public native @NoException(true) void setSecondTranspose(@Const @ByRef Permutation permutation);
-    public native @Const @ByRef @NoException(true) Permutation getSecondTranspose();
-    public native void setZeroIsPlaceholder(@Cast("bool") boolean zeroIsPlaceholder);
-    public native @Cast("bool") boolean getZeroIsPlaceholder();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
deleted file mode 100644
index 61eafa3b4fa..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSliceLayer.java
+++ /dev/null
@@ -1,37 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VSliceLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VSliceLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setStart(@ByVal @Cast("nvinfer1::Dims*") Dims32 start);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStart();
-    public native @NoException(true) void setSize(@ByVal @Cast("nvinfer1::Dims*") Dims32 size);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getSize();
-    public native @NoException(true) void setStride(@ByVal @Cast("nvinfer1::Dims*") Dims32 stride);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getStride();
-    public native @NoException(true) void setMode(SliceMode mode);
-    public native @NoException(true) void setMode(@Cast("nvinfer1::SliceMode") int mode);
-    public native @NoException(true) SliceMode getMode();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
deleted file mode 100644
index b1dab0e9cba..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VSoftMaxLayer.java
+++ /dev/null
@@ -1,30 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VSoftMaxLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VSoftMaxLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setAxes(@Cast("uint32_t") int axes);
-    public native @Cast("uint32_t") @NoException(true) int getAxes();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
deleted file mode 100644
index 1bbb65af97d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTensor.java
+++ /dev/null
@@ -1,52 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VTensor extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VTensor(Pointer p) { super(p); }
-
-    public native @NoException(true) void setName(String name);
-    public native @NoException(true) void setName(@Cast("const char*") BytePointer name);
-    public native @NoException(true) String getName();
-    public native @NoException(true) void setDimensions(@ByVal @Cast("nvinfer1::Dims*") Dims32 dimensions);
-    public native @ByVal @Cast("nvinfer1::Dims*") @NoException(true) Dims32 getDimensions();
-    public native @NoException(true) void setType(DataType type);
-    public native @NoException(true) void setType(@Cast("nvinfer1::DataType") int type);
-    public native @NoException(true) DataType getType();
-    public native @Cast("bool") @NoException(true) boolean setDynamicRange(float min, float max);
-    public native @Cast("bool") @NoException(true) boolean isNetworkInput();
-    public native @Cast("bool") @NoException(true) boolean isNetworkOutput();
-    public native @NoException(true) void setBroadcastAcrossBatch(@Cast("bool") boolean broadcastAcrossBatch);
-    public native @Cast("bool") @NoException(true) boolean getBroadcastAcrossBatch();
-    public native @NoException(true) TensorLocation getLocation();
-    public native @NoException(true) void setLocation(TensorLocation location);
-    public native @NoException(true) void setLocation(@Cast("nvinfer1::TensorLocation") int location);
-    public native @Cast("bool") @NoException(true) boolean dynamicRangeIsSet();
-    public native @NoException(true) void resetDynamicRange();
-    public native @NoException(true) float getDynamicRangeMin();
-    public native @NoException(true) float getDynamicRangeMax();
-    public native @NoException(true) void setAllowedFormats(@Cast("nvinfer1::TensorFormats") int formats);
-    public native @Cast("nvinfer1::TensorFormats") @NoException(true) int getAllowedFormats();
-    public native @Cast("bool") @NoException(true) boolean isShapeTensor();
-    public native @Cast("bool") @NoException(true) boolean isExecutionTensor();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
deleted file mode 100644
index bf747271a0d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTimingCache.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VTimingCache extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VTimingCache(Pointer p) { super(p); }
-
-    public native @NoException(true) IHostMemory serialize();
-    public native @Cast("bool") @NoException(true) boolean combine(@Const @ByRef ITimingCache inputCache, @Cast("bool") boolean ignoreMismatch);
-    public native @Cast("bool") @NoException(true) boolean reset();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
deleted file mode 100644
index 53e8945c5c8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTopKLayer.java
+++ /dev/null
@@ -1,35 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VTopKLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VTopKLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOperation(TopKOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::TopKOperation") int op);
-    public native @NoException(true) TopKOperation getOperation();
-    public native @NoException(true) void setK(int k);
-    public native @NoException(true) int getK();
-    public native @NoException(true) void setReduceAxes(@Cast("uint32_t") int reduceAxes);
-    public native @Cast("uint32_t") @NoException(true) int getReduceAxes();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
deleted file mode 100644
index 03352af105e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VTripLimitLayer.java
+++ /dev/null
@@ -1,29 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VTripLimitLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VTripLimitLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) TripLimit getTripLimit();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
deleted file mode 100644
index e7d4939d5fe..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/VUnaryLayer.java
+++ /dev/null
@@ -1,31 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-
-@Namespace("nvinfer1::apiv") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class VUnaryLayer extends VRoot {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public VUnaryLayer(Pointer p) { super(p); }
-
-    public native @NoException(true) void setOperation(UnaryOperation op);
-    public native @NoException(true) void setOperation(@Cast("nvinfer1::UnaryOperation") int op);
-    public native @NoException(true) UnaryOperation getOperation();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
deleted file mode 100644
index fc02f313395..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/Weights.java
+++ /dev/null
@@ -1,58 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
- // namespace impl
-
-/**
- *  \class Weights
- * 
- *  \brief An array of weights used as a layer parameter.
- * 
- *  When using the DLA, the cumulative size of all Weights used in a network
- *  must be less than 512MB in size. If the build option kGPU_FALLBACK is specified,
- *  then multiple DLA sub-networks may be generated from the single original network.
- * 
- *  The weights are held by reference until the engine has been built. Therefore the data referenced
- *  by \p values field should be preserved until the build is complete.
- *  */
-@Namespace("nvinfer1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class Weights extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public Weights() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Weights(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Weights(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public Weights position(long position) {
-        return (Weights)super.position(position);
-    }
-    @Override public Weights getPointer(long i) {
-        return new Weights((Pointer)this).offsetAddress(i);
-    }
-
-    /** The type of the weights. */
-    public native DataType type(); public native Weights type(DataType setter);
-    /** The weight values, in a contiguous array. */
-    public native @Const Pointer values(); public native Weights values(Pointer values);
-    /** The number of weights in the array. */
-    public native @Cast("int64_t") long count(); public native Weights count(long setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
deleted file mode 100644
index 23b6b350151..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cublasContext.java
+++ /dev/null
@@ -1,28 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-    /** Forward declaration of cublasContext to use in other interfaces */
-    @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class cublasContext extends Pointer {
-        /** Empty constructor. Calls {@code super((Pointer)null)}. */
-        public cublasContext() { super((Pointer)null); }
-        /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-        public cublasContext(Pointer p) { super(p); }
-    }
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
deleted file mode 100644
index 87e352e38d0..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer/cudnnContext.java
+++ /dev/null
@@ -1,28 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-    /** Forward declaration of cudnnContext to use in other interfaces */
-    @Opaque @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer.class)
-public class cudnnContext extends Pointer {
-        /** Empty constructor. Calls {@code super((Pointer)null)}. */
-        public cudnnContext() { super((Pointer)null); }
-        /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-        public cudnnContext(Pointer p) { super(p); }
-    }
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
deleted file mode 100644
index eb4eb56906d..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/DetectionOutputParameters.java
+++ /dev/null
@@ -1,75 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief The DetectionOutput plugin layer generates the detection output based on location and confidence predictions by doing non maximum suppression.
- *  This plugin first decodes the bounding boxes based on the anchors generated. It then performs non_max_suppression on the decoded bounding boxes.
- *  DetectionOutputParameters defines a set of parameters for creating the DetectionOutput plugin layer.
- *  It contains:
- *  @param shareLocation If true, bounding box are shared among different classes.
- *  @param varianceEncodedInTarget If true, variance is encoded in target. Otherwise we need to adjust the predicted offset accordingly.
- *  @param backgroundLabelId Background label ID. If there is no background class, set it as -1.
- *  @param numClasses Number of classes to be predicted.
- *  @param topK Number of boxes per image with top confidence scores that are fed into the NMS algorithm.
- *  @param keepTopK Number of total bounding boxes to be kept per image after NMS step.
- *  @param confidenceThreshold Only consider detections whose confidences are larger than a threshold.
- *  @param nmsThreshold Threshold to be used in NMS.
- *  @param codeType Type of coding method for bbox.
- *  @param inputOrder Specifies the order of inputs {loc_data, conf_data, priorbox_data}.
- *  @param confSigmoid Set to true to calculate sigmoid of confidence scores.
- *  @param isNormalized Set to true if bounding box data is normalized by the network.
- *  @param isBatchAgnostic Defaults to true. Set to false if prior boxes are unique per batch
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class DetectionOutputParameters extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public DetectionOutputParameters() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public DetectionOutputParameters(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public DetectionOutputParameters(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public DetectionOutputParameters position(long position) {
-        return (DetectionOutputParameters)super.position(position);
-    }
-    @Override public DetectionOutputParameters getPointer(long i) {
-        return new DetectionOutputParameters((Pointer)this).offsetAddress(i);
-    }
-
-    public native @Cast("bool") boolean shareLocation(); public native DetectionOutputParameters shareLocation(boolean setter);
-    public native @Cast("bool") boolean varianceEncodedInTarget(); public native DetectionOutputParameters varianceEncodedInTarget(boolean setter);
-    public native int backgroundLabelId(); public native DetectionOutputParameters backgroundLabelId(int setter);
-    public native int numClasses(); public native DetectionOutputParameters numClasses(int setter);
-    public native int topK(); public native DetectionOutputParameters topK(int setter);
-    public native int keepTopK(); public native DetectionOutputParameters keepTopK(int setter);
-    public native float confidenceThreshold(); public native DetectionOutputParameters confidenceThreshold(float setter);
-    public native float nmsThreshold(); public native DetectionOutputParameters nmsThreshold(float setter);
-    public native CodeTypeSSD codeType(); public native DetectionOutputParameters codeType(CodeTypeSSD setter);
-    public native int inputOrder(int i); public native DetectionOutputParameters inputOrder(int i, int setter);
-    @MemberGetter public native IntPointer inputOrder();
-    public native @Cast("bool") boolean confSigmoid(); public native DetectionOutputParameters confSigmoid(boolean setter);
-    public native @Cast("bool") boolean isNormalized(); public native DetectionOutputParameters isNormalized(boolean setter);
-    public native @Cast("bool") boolean isBatchAgnostic(); public native DetectionOutputParameters isBatchAgnostic(boolean setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
deleted file mode 100644
index b0391a3d4ae..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/GridAnchorParameters.java
+++ /dev/null
@@ -1,63 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-
-/**
- *  \brief The Anchor Generator plugin layer generates the prior boxes of designated sizes and aspect ratios across all dimensions (H x W).
- *  GridAnchorParameters defines a set of parameters for creating the plugin layer for all feature maps.
- *  It contains:
- *  @param minScale Scale of anchors corresponding to finest resolution.
- *  @param maxScale Scale of anchors corresponding to coarsest resolution.
- *  @param aspectRatios List of aspect ratios to place on each grid point.
- *  @param numAspectRatios Number of elements in aspectRatios.
- *  @param H Height of feature map to generate anchors for.
- *  @param W Width of feature map to generate anchors for.
- *  @param variance Variance for adjusting the prior boxes.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class GridAnchorParameters extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public GridAnchorParameters() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public GridAnchorParameters(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public GridAnchorParameters(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public GridAnchorParameters position(long position) {
-        return (GridAnchorParameters)super.position(position);
-    }
-    @Override public GridAnchorParameters getPointer(long i) {
-        return new GridAnchorParameters((Pointer)this).offsetAddress(i);
-    }
-
-    public native float minSize(); public native GridAnchorParameters minSize(float setter);
-    public native float maxSize(); public native GridAnchorParameters maxSize(float setter);
-    public native FloatPointer aspectRatios(); public native GridAnchorParameters aspectRatios(FloatPointer setter);
-    public native int numAspectRatios(); public native GridAnchorParameters numAspectRatios(int setter);
-    public native int H(); public native GridAnchorParameters H(int setter);
-    public native int W(); public native GridAnchorParameters W(int setter);
-    public native float variance(int i); public native GridAnchorParameters variance(int i, float setter);
-    @MemberGetter public native FloatPointer variance();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
deleted file mode 100644
index 190a633add1..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/NMSParameters.java
+++ /dev/null
@@ -1,67 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief The NMSParameters are used by the BatchedNMSPlugin for performing
- *  the non_max_suppression operation over boxes for object detection networks.
- *  @param shareLocation If set to true, the boxes inputs are shared across all
- *         classes. If set to false, the boxes input should account for per class box data.
- *  @param backgroundLabelId Label ID for the background class. If there is no background class, set it as -1
- *  @param numClasses Number of classes in the network.
- *  @param topK Number of bounding boxes to be fed into the NMS step.
- *  @param keepTopK Number of total bounding boxes to be kept per image after NMS step.
- *         Should be less than or equal to the topK value.
- *  @param scoreThreshold Scalar threshold for score (low scoring boxes are removed).
- *  @param iouThreshold scalar threshold for IOU (new boxes that have high IOU overlap
- *         with previously selected boxes are removed).
- *  @param isNormalized Set to false, if the box coordinates are not
- *         normalized, i.e. not in the range [0,1]. Defaults to false.
- *  */
-
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class NMSParameters extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public NMSParameters() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public NMSParameters(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public NMSParameters(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public NMSParameters position(long position) {
-        return (NMSParameters)super.position(position);
-    }
-    @Override public NMSParameters getPointer(long i) {
-        return new NMSParameters((Pointer)this).offsetAddress(i);
-    }
-
-    public native @Cast("bool") boolean shareLocation(); public native NMSParameters shareLocation(boolean setter);
-    public native int backgroundLabelId(); public native NMSParameters backgroundLabelId(int setter);
-    public native int numClasses(); public native NMSParameters numClasses(int setter);
-    public native int topK(); public native NMSParameters topK(int setter);
-    public native int keepTopK(); public native NMSParameters keepTopK(int setter);
-    public native float scoreThreshold(); public native NMSParameters scoreThreshold(float setter);
-    public native float iouThreshold(); public native NMSParameters iouThreshold(float setter);
-    public native @Cast("bool") boolean isNormalized(); public native NMSParameters isNormalized(boolean setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
deleted file mode 100644
index 666b28a8c8b..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/PriorBoxParameters.java
+++ /dev/null
@@ -1,77 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief The PriorBox plugin layer generates the prior boxes of designated sizes and aspect ratios across all
- *  dimensions (H x W). PriorBoxParameters defines a set of parameters for creating the PriorBox plugin layer. It
- *  contains:
- *  @param minSize Minimum box size in pixels. Can not be nullptr.
- *  @param maxSize Maximum box size in pixels. Can be nullptr.
- *  @param aspectRatios Aspect ratios of the boxes. Can be nullptr.
- *  @param numMinSize Number of elements in minSize. Must be larger than 0.
- *  @param numMaxSize Number of elements in maxSize. Can be 0 or same as numMinSize.
- *  @param numAspectRatios Number of elements in aspectRatios. Can be 0.
- *  @param flip If true, will flip each aspect ratio. For example, if there is an aspect ratio "r", the aspect ratio
- *  "1.0/r" will be generated as well.
- *  @param clip If true, will clip the prior so that it is within [0,1].
- *  @param variance Variance for adjusting the prior boxes.
- *  @param imgH Image height. If 0, then the H dimension of the data tensor will be used.
- *  @param imgW Image width. If 0, then the W dimension of the data tensor will be used.
- *  @param stepH Step in H. If 0, then (float)imgH/h will be used where h is the H dimension of the 1st input tensor.
- *  @param stepW Step in W. If 0, then (float)imgW/w will be used where w is the W dimension of the 1st input tensor.
- *  @param offset Offset to the top left corner of each cell.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class PriorBoxParameters extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public PriorBoxParameters() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public PriorBoxParameters(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public PriorBoxParameters(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public PriorBoxParameters position(long position) {
-        return (PriorBoxParameters)super.position(position);
-    }
-    @Override public PriorBoxParameters getPointer(long i) {
-        return new PriorBoxParameters((Pointer)this).offsetAddress(i);
-    }
-
-    public native FloatPointer minSize(); public native PriorBoxParameters minSize(FloatPointer setter);
-    public native FloatPointer maxSize(); public native PriorBoxParameters maxSize(FloatPointer setter);
-    public native FloatPointer aspectRatios(); public native PriorBoxParameters aspectRatios(FloatPointer setter);
-    public native int numMinSize(); public native PriorBoxParameters numMinSize(int setter);
-    public native int numMaxSize(); public native PriorBoxParameters numMaxSize(int setter);
-    public native int numAspectRatios(); public native PriorBoxParameters numAspectRatios(int setter);
-    public native @Cast("bool") boolean flip(); public native PriorBoxParameters flip(boolean setter);
-    public native @Cast("bool") boolean clip(); public native PriorBoxParameters clip(boolean setter);
-    public native float variance(int i); public native PriorBoxParameters variance(int i, float setter);
-    @MemberGetter public native FloatPointer variance();
-    public native int imgH(); public native PriorBoxParameters imgH(int setter);
-    public native int imgW(); public native PriorBoxParameters imgW(int setter);
-    public native float stepH(); public native PriorBoxParameters stepH(float setter);
-    public native float stepW(); public native PriorBoxParameters stepW(float setter);
-    public native float offset(); public native PriorBoxParameters offset(float setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
deleted file mode 100644
index 62f70100ec8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/Quadruple.java
+++ /dev/null
@@ -1,49 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief The Permute plugin layer permutes the input tensor by changing the memory order of the data.
- *  Quadruple defines a structure that contains an array of 4 integers. They can represent the permute orders or the
- *  strides in each dimension.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class Quadruple extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public Quadruple() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public Quadruple(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public Quadruple(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public Quadruple position(long position) {
-        return (Quadruple)super.position(position);
-    }
-    @Override public Quadruple getPointer(long i) {
-        return new Quadruple((Pointer)this).offsetAddress(i);
-    }
-
-    public native int data(int i); public native Quadruple data(int i, int setter);
-    @MemberGetter public native IntPointer data();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
deleted file mode 100644
index 4d3661a87c3..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RPROIParams.java
+++ /dev/null
@@ -1,67 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief RPROIParams is used to create the RPROIPlugin instance.
- *  It contains:
- *  @param poolingH Height of the output in pixels after ROI pooling on feature map.
- *  @param poolingW Width of the output in pixels after ROI pooling on feature map.
- *  @param featureStride Feature stride; ratio of input image size to feature map size. Assuming that max pooling layers
- *  in the neural network use square filters.
- *  @param preNmsTop Number of proposals to keep before applying NMS.
- *  @param nmsMaxOut Number of remaining proposals after applying NMS.
- *  @param anchorsRatioCount Number of anchor box ratios.
- *  @param anchorsScaleCount Number of anchor box scales.
- *  @param iouThreshold IoU (Intersection over Union) threshold used for the NMS step.
- *  @param minBoxSize Minimum allowed bounding box size before scaling, used for anchor box calculation.
- *  @param spatialScale Spatial scale between the input image and the last feature map.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class RPROIParams extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public RPROIParams() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public RPROIParams(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public RPROIParams(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public RPROIParams position(long position) {
-        return (RPROIParams)super.position(position);
-    }
-    @Override public RPROIParams getPointer(long i) {
-        return new RPROIParams((Pointer)this).offsetAddress(i);
-    }
-
-    public native int poolingH(); public native RPROIParams poolingH(int setter);
-    public native int poolingW(); public native RPROIParams poolingW(int setter);
-    public native int featureStride(); public native RPROIParams featureStride(int setter);
-    public native int preNmsTop(); public native RPROIParams preNmsTop(int setter);
-    public native int nmsMaxOut(); public native RPROIParams nmsMaxOut(int setter);
-    public native int anchorsRatioCount(); public native RPROIParams anchorsRatioCount(int setter);
-    public native int anchorsScaleCount(); public native RPROIParams anchorsScaleCount(int setter);
-    public native float iouThreshold(); public native RPROIParams iouThreshold(float setter);
-    public native float minBoxSize(); public native RPROIParams minBoxSize(float setter);
-    public native float spatialScale(); public native RPROIParams spatialScale(float setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
deleted file mode 100644
index 3d1ef39a84c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/RegionParameters.java
+++ /dev/null
@@ -1,56 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief The Region plugin layer performs region proposal calculation: generate 5 bounding boxes per cell (for yolo9000, generate 3 bounding boxes per cell).
- *  For each box, calculating its probablities of objects detections from 80 pre-defined classifications (yolo9000 has 9418 pre-defined classifications,
- *  and these 9418 items are organized as work-tree structure).
- *  RegionParameters defines a set of parameters for creating the Region plugin layer.
- *  @param num Number of predicted bounding box for each grid cell.
- *  @param coords Number of coordinates for a bounding box.
- *  @param classes Number of classifications to be predicted.
- *  @param smTree Helping structure to do softmax on confidence scores.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class RegionParameters extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public RegionParameters() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public RegionParameters(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public RegionParameters(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public RegionParameters position(long position) {
-        return (RegionParameters)super.position(position);
-    }
-    @Override public RegionParameters getPointer(long i) {
-        return new RegionParameters((Pointer)this).offsetAddress(i);
-    }
-
-    public native int num(); public native RegionParameters num(int setter);
-    public native int coords(); public native RegionParameters coords(int setter);
-    public native int classes(); public native RegionParameters classes(int setter);
-    public native softmaxTree smTree(); public native RegionParameters smTree(softmaxTree setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
deleted file mode 100644
index a4041359efc..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvinfer_plugin/softmaxTree.java
+++ /dev/null
@@ -1,56 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvinfer_plugin;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-
-/**
- *  \brief When performing yolo9000, softmaxTree is helping to do softmax on confidence scores, for element to get the precise classification through word-tree structured classification definition.
- *  */
-@Namespace("nvinfer1::plugin") @Properties(inherit = org.bytedeco.tensorrt.presets.nvinfer_plugin.class)
-public class softmaxTree extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public softmaxTree() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public softmaxTree(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public softmaxTree(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public softmaxTree position(long position) {
-        return (softmaxTree)super.position(position);
-    }
-    @Override public softmaxTree getPointer(long i) {
-        return new softmaxTree((Pointer)this).offsetAddress(i);
-    }
-
-    public native IntPointer leaf(); public native softmaxTree leaf(IntPointer setter);
-    public native int n(); public native softmaxTree n(int setter);
-    public native IntPointer parent(); public native softmaxTree parent(IntPointer setter);
-    public native IntPointer child(); public native softmaxTree child(IntPointer setter);
-    public native IntPointer group(); public native softmaxTree group(IntPointer setter);
-    public native @Cast("char*") BytePointer name(int i); public native softmaxTree name(int i, BytePointer setter);
-    public native @Cast("char**") PointerPointer name(); public native softmaxTree name(PointerPointer setter);
-
-    public native int groups(); public native softmaxTree groups(int setter);
-    public native IntPointer groupSize(); public native softmaxTree groupSize(IntPointer setter);
-    public native IntPointer groupOffset(); public native softmaxTree groupOffset(IntPointer setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
deleted file mode 100644
index 61ff91ac5f1..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParser.java
+++ /dev/null
@@ -1,134 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvonnxparser;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvonnxparser.*;
-
-
-/** \class IParser
- *
- * \brief an object for parsing ONNX models into a TensorRT network definition
- */
-@Namespace("nvonnxparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
-public class IParser extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IParser(Pointer p) { super(p); }
-
-    /** \brief Parse a serialized ONNX model into the TensorRT network.
-     *         This method has very limited diagnostics. If parsing the serialized model
-     *         fails for any reason (e.g. unsupported IR version, unsupported opset, etc.)
-     *         it the user responsibility to intercept and report the error.
-     *         To obtain a better diagnostic, use the parseFromFile method below.
-     *
-     * @param serialized_onnx_model Pointer to the serialized ONNX model
-     * @param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * @param model_path Absolute path to the model file for loading external weights if required
-     * @return true if the model was parsed successfully
-     * @see getNbErrors() getError()
-     */
-    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
-                           @Cast("size_t") long serialized_onnx_model_size,
-                           String model_path/*=nullptr*/);
-    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
-                           @Cast("size_t") long serialized_onnx_model_size);
-    public native @Cast("bool") boolean parse(@Const Pointer serialized_onnx_model,
-                           @Cast("size_t") long serialized_onnx_model_size,
-                           @Cast("const char*") BytePointer model_path/*=nullptr*/);
-
-    /** \brief Parse an onnx model file, which can be a binary protobuf or a text onnx model
-     *         calls parse method inside.
-     *
-     * @param File name
-     * @param Verbosity Level
-     *
-     * @return true if the model was parsed successfully
-     *
-     */
-    public native @Cast("bool") boolean parseFromFile(String onnxModelFile, int verbosity);
-    public native @Cast("bool") boolean parseFromFile(@Cast("const char*") BytePointer onnxModelFile, int verbosity);
-
-    /** \brief Check whether TensorRT supports a particular ONNX model
-     *
-     * @param serialized_onnx_model Pointer to the serialized ONNX model
-     * @param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * @param sub_graph_collection Container to hold supported subgraphs
-     * @param model_path Absolute path to the model file for loading external weights if required
-     * @return true if the model is supported
-     */
-    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
-                                   @Cast("size_t") long serialized_onnx_model_size,
-                                   @ByRef SubGraphCollection_t sub_graph_collection,
-                                   String model_path/*=nullptr*/);
-    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
-                                   @Cast("size_t") long serialized_onnx_model_size,
-                                   @ByRef SubGraphCollection_t sub_graph_collection);
-    public native @Cast("bool") boolean supportsModel(@Const Pointer serialized_onnx_model,
-                                   @Cast("size_t") long serialized_onnx_model_size,
-                                   @ByRef SubGraphCollection_t sub_graph_collection,
-                                   @Cast("const char*") BytePointer model_path/*=nullptr*/);
-
-    /** \brief Parse a serialized ONNX model into the TensorRT network
-     * with consideration of user provided weights
-     *
-     * @param serialized_onnx_model Pointer to the serialized ONNX model
-     * @param serialized_onnx_model_size Size of the serialized ONNX model
-     *        in bytes
-     * @return true if the model was parsed successfully
-     * @see getNbErrors() getError()
-     */
-    public native @Cast("bool") boolean parseWithWeightDescriptors(
-            @Const Pointer serialized_onnx_model, @Cast("size_t") long serialized_onnx_model_size);
-
-    /** \brief Returns whether the specified operator may be supported by the
-     *         parser.
-     *
-     * Note that a result of true does not guarantee that the operator will be
-     * supported in all cases (i.e., this function may return false-positives).
-     *
-     * @param op_name The name of the ONNX operator to check for support
-     */
-    public native @Cast("bool") boolean supportsOperator(String op_name);
-    public native @Cast("bool") boolean supportsOperator(@Cast("const char*") BytePointer op_name);
-    /** \brief destroy this object
-     *
-     * \warning deprecated and planned on being removed in TensorRT 10.0
-     */
-    public native @Deprecated void destroy();
-    /** \brief Get the number of errors that occurred during prior calls to
-     *         \p parse
-     *
-     * @see getError() clearErrors() IParserError
-     */
-    public native int getNbErrors();
-    /** \brief Get an error that occurred during prior calls to \p parse
-     *
-     * @see getNbErrors() clearErrors() IParserError
-     */
-    public native @Const IParserError getError(int index);
-    /** \brief Clear errors from prior calls to \p parse
-     *
-     * @see getNbErrors() getError() IParserError
-     */
-    public native void clearErrors();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
deleted file mode 100644
index 387bd577431..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/IParserError.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvonnxparser;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvonnxparser.*;
-
-
-/** \class IParserError
- *
- * \brief an object containing information about an error
- */
-@Namespace("nvonnxparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
-public class IParserError extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IParserError(Pointer p) { super(p); }
-
-    /** \brief the error code
-     */
-    public native org.bytedeco.tensorrt.global.nvonnxparser.ErrorCode code();
-    /** \brief description of the error
-     */
-    public native String desc();
-    /** \brief source file in which the error occurred
-     */
-    public native String file();
-    /** \brief source line at which the error occurred
-     */
-    public native int line();
-    /** \brief source function in which the error occurred
-     */
-    public native String func();
-    /** \brief index of the ONNX model node in which the error occurred
-     */
-    public native int node();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
deleted file mode 100644
index 3e681c73aad..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraphCollection_t.java
+++ /dev/null
@@ -1,93 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvonnxparser;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvonnxparser.*;
-
-@Name("std::vector<SubGraph_t>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
-public class SubGraphCollection_t extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public SubGraphCollection_t(Pointer p) { super(p); }
-    public SubGraphCollection_t(SubGraph_t value) { this(1); put(0, value); }
-    public SubGraphCollection_t(SubGraph_t ... array) { this(array.length); put(array); }
-    public SubGraphCollection_t()       { allocate();  }
-    public SubGraphCollection_t(long n) { allocate(n); }
-    private native void allocate();
-    private native void allocate(@Cast("size_t") long n);
-    public native @Name("operator =") @ByRef SubGraphCollection_t put(@ByRef SubGraphCollection_t x);
-
-    public boolean empty() { return size() == 0; }
-    public native long size();
-    public void clear() { resize(0); }
-    public native void resize(@Cast("size_t") long n);
-
-    @Index(function = "at") public native @ByRef SubGraph_t get(@Cast("size_t") long i);
-    public native SubGraphCollection_t put(@Cast("size_t") long i, SubGraph_t value);
-
-    public native @ByVal Iterator insert(@ByVal Iterator pos, @ByRef SubGraph_t value);
-    public native @ByVal Iterator erase(@ByVal Iterator pos);
-    public native @ByVal Iterator begin();
-    public native @ByVal Iterator end();
-    @NoOffset @Name("iterator") public static class Iterator extends Pointer {
-        public Iterator(Pointer p) { super(p); }
-        public Iterator() { }
-
-        public native @Name("operator ++") @ByRef Iterator increment();
-        public native @Name("operator ==") boolean equals(@ByRef Iterator it);
-        public native @Name("operator *") @ByRef @Const SubGraph_t get();
-    }
-
-    public SubGraph_t[] get() {
-        SubGraph_t[] array = new SubGraph_t[size() < Integer.MAX_VALUE ? (int)size() : Integer.MAX_VALUE];
-        for (int i = 0; i < array.length; i++) {
-            array[i] = get(i);
-        }
-        return array;
-    }
-    @Override public String toString() {
-        return java.util.Arrays.toString(get());
-    }
-
-    public SubGraph_t pop_back() {
-        long size = size();
-        SubGraph_t value = get(size - 1);
-        resize(size - 1);
-        return value;
-    }
-    public SubGraphCollection_t push_back(SubGraph_t value) {
-        long size = size();
-        resize(size + 1);
-        return put(size, value);
-    }
-    public SubGraphCollection_t put(SubGraph_t value) {
-        if (size() != 1) { resize(1); }
-        return put(0, value);
-    }
-    public SubGraphCollection_t put(SubGraph_t ... array) {
-        if (size() != array.length) { resize(array.length); }
-        for (int i = 0; i < array.length; i++) {
-            put(i, array[i]);
-        }
-        return this;
-    }
-}
-
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
deleted file mode 100644
index 57e1927cb7b..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvonnxparser/SubGraph_t.java
+++ /dev/null
@@ -1,45 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvonnxparser;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvonnxparser.*;
-
-@NoOffset @Name("std::pair<std::vector<size_t>,bool>") @Properties(inherit = org.bytedeco.tensorrt.presets.nvonnxparser.class)
-public class SubGraph_t extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public SubGraph_t(Pointer p) { super(p); }
-    public SubGraph_t(SizeTPointer firstValue, boolean secondValue) { this(); put(firstValue, secondValue); }
-    public SubGraph_t()       { allocate();  }
-    private native void allocate();
-    public native @Name("operator =") @ByRef SubGraph_t put(@ByRef SubGraph_t x);
-
-
-    @MemberGetter public native @StdVector SizeTPointer first(); public native SubGraph_t first(SizeTPointer first);
-    @MemberGetter public native @Cast("bool") boolean second();  public native SubGraph_t second(boolean second);
-
-    public SubGraph_t put(SizeTPointer firstValue, boolean secondValue) {
-        first(firstValue);
-        second(secondValue);
-        return this;
-    }
-}
-
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
deleted file mode 100644
index 6e452afd6b8..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldCollection.java
+++ /dev/null
@@ -1,46 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-@Namespace("nvuffparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class FieldCollection extends Pointer {
-    static { Loader.load(); }
-    /** Default native constructor. */
-    public FieldCollection() { super((Pointer)null); allocate(); }
-    /** Native array allocator. Access with {@link Pointer#position(long)}. */
-    public FieldCollection(long size) { super((Pointer)null); allocateArray(size); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public FieldCollection(Pointer p) { super(p); }
-    private native void allocate();
-    private native void allocateArray(long size);
-    @Override public FieldCollection position(long position) {
-        return (FieldCollection)super.position(position);
-    }
-    @Override public FieldCollection getPointer(long i) {
-        return new FieldCollection((Pointer)this).offsetAddress(i);
-    }
-
-    public native int nbFields(); public native FieldCollection nbFields(int setter);
-    public native @Const FieldMap fields(); public native FieldCollection fields(FieldMap setter);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
deleted file mode 100644
index 2902faaba83..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/FieldMap.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-/**
- *  \class FieldMap
- * 
- *  \brief An array of field params used as a layer parameter for plugin layers.
- * 
- *  The node fields are passed by the parser to the API through the plugin
- *  constructor. The implementation of the plugin should parse the contents of
- *  the fieldMap as part of the plugin constructor
- *  */
-@Namespace("nvuffparser") @NoOffset @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class FieldMap extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public FieldMap(Pointer p) { super(p); }
-
-    public native String name(); public native FieldMap name(String setter);
-    public native @Const Pointer data(); public native FieldMap data(Pointer setter);
-    public native FieldType type(); public native FieldMap type(FieldType setter);
-    public native int length(); public native FieldMap length(int setter);
-
-    public FieldMap(String name, @Const Pointer data, FieldType type, int length/*=1*/) { super((Pointer)null); allocate(name, data, type, length); }
-    private native void allocate(String name, @Const Pointer data, FieldType type, int length/*=1*/);
-    public FieldMap(String name, @Const Pointer data, FieldType type) { super((Pointer)null); allocate(name, data, type); }
-    private native void allocate(String name, @Const Pointer data, FieldType type);
-    public FieldMap(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type, int length/*=1*/) { super((Pointer)null); allocate(name, data, type, length); }
-    private native void allocate(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type, int length/*=1*/);
-    public FieldMap(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type) { super((Pointer)null); allocate(name, data, type); }
-    private native void allocate(@Cast("const char*") BytePointer name, @Const Pointer data, @Cast("nvuffparser::FieldType") int type);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
deleted file mode 100644
index 7efdc74ee6c..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBinaryProtoBlob.java
+++ /dev/null
@@ -1,54 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-/**
- *  \class IBinaryProtoBlob
- * 
- *  \brief Object used to store and query data extracted from a binaryproto file using the ICaffeParser.
- * 
- *  @see nvcaffeparser1::ICaffeParser
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class IBinaryProtoBlob extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IBinaryProtoBlob(Pointer p) { super(p); }
-
-    public native @Const @NoException(true) Pointer getData();
-    public native @ByVal @NoException(true) Dims4 getDimensions();
-    
-    //!
-    //!
-    //!
-    public native @NoException(true) DataType getDataType();
-    /**
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    public native @Deprecated @NoException(true) void destroy();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
deleted file mode 100644
index 30d6017a14e..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IBlobNameToTensor.java
+++ /dev/null
@@ -1,51 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-/**
- *  \class IBlobNameToTensor
- * 
- *  \brief Object used to store and query Tensors after they have been extracted from a Caffe model using the ICaffeParser.
- * 
- *  \note The lifetime of IBlobNameToTensor is the same as the lifetime of its parent ICaffeParser.
- * 
- *  @see nvcaffeparser1::ICaffeParser
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class IBlobNameToTensor extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IBlobNameToTensor(Pointer p) { super(p); }
-
-    /** \brief Given a blob name, returns a pointer to a ITensor object.
-     * 
-     *  @param name Caffe blob name for which the user wants the corresponding ITensor.
-     * 
-     *  @return ITensor* corresponding to the queried name. If no such ITensor exists, then nullptr is returned.
-     *  */
-    public native @NoException(true) ITensor find(String name);
-    public native @NoException(true) ITensor find(@Cast("const char*") BytePointer name);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
deleted file mode 100644
index 2e8b185168f..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/ICaffeParser.java
+++ /dev/null
@@ -1,195 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-/**
- *  \class ICaffeParser
- * 
- *  \brief Class used for parsing Caffe models.
- * 
- *  Allows users to export models trained using Caffe to TRT.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class ICaffeParser extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public ICaffeParser(Pointer p) { super(p); }
-
-    /**
-     *  \brief Parse a prototxt file and a binaryproto Caffe model to extract
-     *    network definition and weights associated with the network, respectively.
-     * 
-     *  @param deploy The plain text, prototxt file used to define the network definition.
-     *  @param model The binaryproto Caffe model that contains the weights associated with the network.
-     *  @param network Network in which the CaffeParser will fill the layers.
-     *  @param weightType The type to which the weights will transformed.
-     * 
-     *  @return A pointer to an IBlobNameToTensor object that contains the extracted data.
-     * 
-     *  @see nvcaffeparser1::IBlobNameToTensor
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Const @NoException(true) IBlobNameToTensor parse(String deploy, String model, @ByRef INetworkDefinition network,
-            DataType weightType);
-    public native @Const @NoException(true) IBlobNameToTensor parse(@Cast("const char*") BytePointer deploy, @Cast("const char*") BytePointer model, @ByRef INetworkDefinition network,
-            @Cast("nvinfer1::DataType") int weightType);
-
-    /**
-     *  \brief Parse a deploy prototxt and a binaryproto Caffe model from memory buffers to extract
-     *    network definition and weights associated with the network, respectively.
-     * 
-     *  @param deployBuffer The plain text deploy prototxt used to define the network definition.
-     *  @param deployLength The length of the deploy buffer.
-     *  @param modelBuffer The binaryproto Caffe memory buffer that contains the weights associated with the network.
-     *  @param modelLength The length of the model buffer.
-     *  @param network Network in which the CaffeParser will fill the layers.
-     *  @param weightType The type to which the weights will transformed.
-     * 
-     *  @return A pointer to an IBlobNameToTensor object that contains the extracted data.
-     * 
-     *  @see nvcaffeparser1::IBlobNameToTensor
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @Const @NoException(true) IBlobNameToTensor parseBuffers(String deployBuffer, @Cast("std::size_t") long deployLength,
-            String modelBuffer, @Cast("std::size_t") long modelLength, @ByRef INetworkDefinition network,
-            DataType weightType);
-    public native @Const @NoException(true) IBlobNameToTensor parseBuffers(@Cast("const char*") BytePointer deployBuffer, @Cast("std::size_t") long deployLength,
-            @Cast("const char*") BytePointer modelBuffer, @Cast("std::size_t") long modelLength, @ByRef INetworkDefinition network,
-            @Cast("nvinfer1::DataType") int weightType);
-
-    /**
-     *  \brief Parse and extract data stored in binaryproto file.
-     * 
-     *  The binaryproto file contains data stored in a binary blob. parseBinaryProto() converts it
-     *  to an IBinaryProtoBlob object which gives the user access to the data and meta-data about data.
-     * 
-     *  @param fileName Path to file containing binary proto.
-     * 
-     *  @return A pointer to an IBinaryProtoBlob object that contains the extracted data.
-     * 
-     *  @see nvcaffeparser1::IBinaryProtoBlob
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) IBinaryProtoBlob parseBinaryProto(String fileName);
-    public native @NoException(true) IBinaryProtoBlob parseBinaryProto(@Cast("const char*") BytePointer fileName);
-
-    /**
-     *  \brief Set buffer size for the parsing and storage of the learned model.
-     * 
-     *  @param size The size of the buffer specified as the number of bytes.
-     * 
-     *  \note  Default size is 2^30 bytes.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setProtobufBufferSize(@Cast("size_t") long size);
-
-    /**
-     *  \brief Destroy this ICaffeParser object.
-     * 
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     * 
-     *  \warning Calling destroy on a managed pointer will result in a double-free error.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Set the IPluginFactoryV2 used to create the user defined pluginV2 objects.
-     * 
-     *  @param factory Pointer to an instance of the user implementation of IPluginFactoryV2.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) void setPluginFactoryV2(IPluginFactoryV2 factory);
-
-    /**
-     *  \brief Set the namespace used to lookup and create plugins in the network.
-     *  */
-    public native @NoException(true) void setPluginNamespace(String libNamespace);
-    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer libNamespace);
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface.
-     * 
-     *  @see getErrorRecorder()
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A
-     *  nullptr will be returned if setErrorRecorder has not been called.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
deleted file mode 100644
index 2f7209e4965..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IPluginFactoryV2.java
+++ /dev/null
@@ -1,66 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-/**
- *  \class IPluginFactoryV2
- * 
- *  \brief Plugin factory used to configure plugins.
- *  */
-@Namespace("nvcaffeparser1") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class IPluginFactoryV2 extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IPluginFactoryV2(Pointer p) { super(p); }
-
-    /**
-     *  \brief A user implemented function that determines if a layer configuration is provided by an IPluginV2.
-     * 
-     *  @param layerName Name of the layer which the user wishes to validate.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean isPluginV2(String layerName);
-    public native @Cast("bool") @NoException(true) boolean isPluginV2(@Cast("const char*") BytePointer layerName);
-
-    /**
-     *  \brief Creates a plugin.
-     * 
-     *  @param layerName Name of layer associated with the plugin.
-     *  @param weights Weights used for the layer.
-     *  @param nbWeights Number of weights.
-     *  @param libNamespace Library Namespace associated with the plugin object
-     *  */
-    public native @NoException(true) IPluginV2 createPlugin(String layerName, @Const Weights weights,
-            int nbWeights, String libNamespace/*=""*/);
-    public native @NoException(true) IPluginV2 createPlugin(String layerName, @Const Weights weights,
-            int nbWeights);
-    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer layerName, @Const Weights weights,
-            int nbWeights, @Cast("const char*") BytePointer libNamespace/*=""*/);
-    public native @NoException(true) IPluginV2 createPlugin(@Cast("const char*") BytePointer layerName, @Const Weights weights,
-            int nbWeights);
-}
diff --git a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java b/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java
deleted file mode 100644
index 47016d71da1..00000000000
--- a/tritonserver/src/gen.nouse/java/org/bytedeco/tensorrt/nvparsers/IUffParser.java
+++ /dev/null
@@ -1,180 +0,0 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
-
-package org.bytedeco.tensorrt.nvparsers;
-
-import java.nio.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.annotation.*;
-
-import static org.bytedeco.javacpp.presets.javacpp.*;
-import org.bytedeco.cuda.cudart.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import org.bytedeco.cuda.cublas.*;
-import static org.bytedeco.cuda.global.cublas.*;
-import org.bytedeco.cuda.cudnn.*;
-import static org.bytedeco.cuda.global.cudnn.*;
-import org.bytedeco.cuda.nvrtc.*;
-import static org.bytedeco.cuda.global.nvrtc.*;
-import org.bytedeco.tensorrt.nvinfer.*;
-import static org.bytedeco.tensorrt.global.nvinfer.*;
-import org.bytedeco.tensorrt.nvinfer_plugin.*;
-import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
-
-import static org.bytedeco.tensorrt.global.nvparsers.*;
-
-
-/**
- *  \class IUffParser
- * 
- *  \brief Class used for parsing models described using the UFF format.
- * 
- *  \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
- *  */
-@Namespace("nvuffparser") @Properties(inherit = org.bytedeco.tensorrt.presets.nvparsers.class)
-public class IUffParser extends Pointer {
-    static { Loader.load(); }
-    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
-    public IUffParser(Pointer p) { super(p); }
-
-    /**
-     *  \brief Register an input name of a UFF network with the associated Dimensions.
-     * 
-     *  @param inputName Input name.
-     *  @param inputDims Input dimensions.
-     *  @param inputOrder Input order on which the framework input was originally.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean registerInput(String inputName, @ByVal @Cast("nvinfer1::Dims*") Dims32 inputDims, UffInputOrder inputOrder);
-    public native @Cast("bool") @NoException(true) boolean registerInput(@Cast("const char*") BytePointer inputName, @ByVal @Cast("nvinfer1::Dims*") Dims32 inputDims, @Cast("nvuffparser::UffInputOrder") int inputOrder);
-
-    /**
-     *  \brief Register an output name of a UFF network.
-     * 
-     *  @param outputName Output name.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean registerOutput(String outputName);
-    public native @Cast("bool") @NoException(true) boolean registerOutput(@Cast("const char*") BytePointer outputName);
-
-    /**
-     *  \brief Parse a UFF file.
-     * 
-     *  @param file File name of the UFF file.
-     *  @param network Network in which the UFFParser will fill the layers.
-     *  @param weightsType The type on which the weights will transformed in.
-     *  */
-    
-    
-    //!
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean parse(String file, @ByRef INetworkDefinition network,
-            DataType weightsType/*=nvinfer1::DataType::kFLOAT*/);
-    public native @Cast("bool") @NoException(true) boolean parse(String file, @ByRef INetworkDefinition network);
-    public native @Cast("bool") @NoException(true) boolean parse(@Cast("const char*") BytePointer file, @ByRef INetworkDefinition network,
-            @Cast("nvinfer1::DataType") int weightsType/*=nvinfer1::DataType::kFLOAT*/);
-    public native @Cast("bool") @NoException(true) boolean parse(@Cast("const char*") BytePointer file, @ByRef INetworkDefinition network);
-
-    /**
-     *  \brief Parse a UFF buffer, useful if the file already live in memory.
-     * 
-     *  @param buffer Buffer of the UFF file.
-     *  @param size Size of buffer of the UFF file.
-     *  @param network Network in which the UFFParser will fill the layers.
-     *  @param weightsType The type on which the weights will transformed in.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Cast("bool") @NoException(true) boolean parseBuffer(String buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network,
-            DataType weightsType/*=nvinfer1::DataType::kFLOAT*/);
-    public native @Cast("bool") @NoException(true) boolean parseBuffer(String buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network);
-    public native @Cast("bool") @NoException(true) boolean parseBuffer(@Cast("const char*") BytePointer buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network,
-            @Cast("nvinfer1::DataType") int weightsType/*=nvinfer1::DataType::kFLOAT*/);
-    public native @Cast("bool") @NoException(true) boolean parseBuffer(@Cast("const char*") BytePointer buffer, @Cast("std::size_t") long size, @ByRef INetworkDefinition network);
-
-    /**
-     *  @deprecated Deprecated interface will be removed in TensorRT 10.0.
-     *  */
-    
-    
-    //!
-    //!
-    public native @Deprecated @NoException(true) void destroy();
-
-    /**
-     *  \brief Return Version Major of the UFF.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getUffRequiredVersionMajor();
-
-    /**
-     *  \brief Return Version Minor of the UFF.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getUffRequiredVersionMinor();
-
-    /**
-     *  \brief Return Patch Version of the UFF.
-     *  */
-    
-    
-    //!
-    //!
-    public native @NoException(true) int getUffRequiredVersionPatch();
-
-    /**
-     *  \brief Set the namespace used to lookup and create plugins in the network.
-     *  */
-    public native @NoException(true) void setPluginNamespace(String libNamespace);
-    public native @NoException(true) void setPluginNamespace(@Cast("const char*") BytePointer libNamespace);
-    /**
-     *  \brief Set the ErrorRecorder for this interface
-     * 
-     *  Assigns the ErrorRecorder to this interface. The ErrorRecorder will track all errors during execution.
-     *  This function will call incRefCount of the registered ErrorRecorder at least once. Setting
-     *  recorder to nullptr unregisters the recorder with the interface, resulting in a call to decRefCount if
-     *  a recorder has been registered.
-     * 
-     *  If an error recorder is not set, messages will be sent to the global log stream.
-     * 
-     *  @param recorder The error recorder to register with this interface. */
-    //
-    /** @see getErrorRecorder()
-    /** */
-    
-    
-    //!
-    //!
-    //!
-    //!
-    //!
-    public native @NoException(true) void setErrorRecorder(IErrorRecorder recorder);
-
-    /**
-     *  \brief get the ErrorRecorder assigned to this interface.
-     * 
-     *  Retrieves the assigned error recorder object for the given class. A
-     *  nullptr will be returned if setErrorRecorder has not been called.
-     * 
-     *  @return A pointer to the IErrorRecorder object that has been registered.
-     * 
-     *  @see setErrorRecorder()
-     *  */
-    public native @NoException(true) IErrorRecorder getErrorRecorder();
-}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
index d4f1977d529..97859a1fe6d 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -29,7 +29,7 @@
 public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver {
     static { Loader.load(); }
 
-// Parsed from tritonbackend.h
+// Parsed from tritonserver.h
 
 // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 //
@@ -58,2127 +58,2198 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // #pragma once
 
+/** \file */
+
+// #include <stdbool.h>
 // #include <stddef.h>
 // #include <stdint.h>
-// #include "triton/core/tritonserver.h"
 
 // #ifdef __cplusplus
 // #endif
 
-// #ifdef _COMPILING_TRITONBACKEND
+// #ifdef _COMPILING_TRITONSERVER
 // #if defined(_MSC_VER)
-// #define TRITONBACKEND_DECLSPEC __declspec(dllexport)
-// #define TRITONBACKEND_ISPEC __declspec(dllimport)
+// #define TRITONSERVER_DECLSPEC __declspec(dllexport)
 // #elif defined(__GNUC__)
-// #define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
-// #define TRITONBACKEND_ISPEC
+// #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
 // #else
-// #define TRITONBACKEND_DECLSPEC
-// #define TRITONBACKEND_ISPEC
+// #define TRITONSERVER_DECLSPEC
 // #endif
 // #else
 // #if defined(_MSC_VER)
-// #define TRITONBACKEND_DECLSPEC __declspec(dllimport)
-// #define TRITONBACKEND_ISPEC __declspec(dllexport)
+// #define TRITONSERVER_DECLSPEC __declspec(dllimport)
 // #else
-// #define TRITONBACKEND_DECLSPEC
-// #define TRITONBACKEND_ISPEC
-// Targeting ../tritonserver/TRITONBACKEND_MemoryManager.java
+// #define TRITONSERVER_DECLSPEC
+// Targeting ../tritonserver/TRITONSERVER_Error.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Input.java
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequest.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Output.java
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponse.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Request.java
+// Targeting ../tritonserver/TRITONSERVER_InferenceTrace.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_ResponseFactory.java
+// Targeting ../tritonserver/TRITONSERVER_Message.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Response.java
+// Targeting ../tritonserver/TRITONSERVER_Metrics.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Backend.java
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocator.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_Model.java
+// Targeting ../tritonserver/TRITONSERVER_Server.java
 
 
-// Targeting ../tritonserver/TRITONBACKEND_ModelInstance.java
+// Targeting ../tritonserver/TRITONSERVER_ServerOptions.java
 
 
 
 /**
- *  TRITONBACKEND API Version
+ *  TRITONSERVER API Version
  * 
- *  The TRITONBACKEND API is versioned with major and minor version
+ *  The TRITONSERVER API is versioned with major and minor version
  *  numbers. Any change to the API that does not impact backwards
  *  compatibility (for example, adding a non-required function)
  *  increases the minor version number. Any change that breaks
  *  backwards compatibility (for example, deleting or changing the
  *  behavior of a function) increases the major version number. A
- *  backend should check that the API version used to compile the
- *  backend is compatible with the API version of the Triton server
- *  that it is running in. This is typically done by code similar to
- *  the following which makes sure that the major versions are equal
- *  and that the minor version of Triton is >= the minor version used
- *  to build the backend.
+ *  client should check that the API version used to compile the
+ *  client is compatible with the API version of the Triton shared
+ *  library that it is linking against. This is typically done by code
+ *  similar to the following which makes sure that the major versions
+ *  are equal and that the minor version of the Triton shared library
+ *  is >= the minor version used to build the client.
  * 
  *    uint32_t api_version_major, api_version_minor;
- *    TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
- *    if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
- *        (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+ *    TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
  *      return TRITONSERVER_ErrorNew(
  *        TRITONSERVER_ERROR_UNSUPPORTED,
- *        "triton backend API version does not support this backend");
+ *        "triton server API version does not support this client");
  *    }
  *  */
-public static final int TRITONBACKEND_API_VERSION_MAJOR = 1;
+public static final int TRITONSERVER_API_VERSION_MAJOR = 1;
 
 ///
-public static final int TRITONBACKEND_API_VERSION_MINOR = 4;
+public static final int TRITONSERVER_API_VERSION_MINOR = 3;
 
-/** Get the TRITONBACKEND API version supported by Triton. This value
- *  can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
- *  TRITONBACKEND_API_VERSION_MINOR used to build the backend to
- *  ensure that Triton is compatible with the backend.
+/** Get the TRITONBACKEND API version supported by the Triton shared
+ *  library. This value can be compared against the
+ *  TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
+ *  used to build the client to ensure that Triton shared library is
+ *  compatible with the client.
  * 
- *  @param major Returns the TRITONBACKEND API major version supported
+ *  @param major Returns the TRITONSERVER API major version supported
  *  by Triton.
- *  @param minor Returns the TRITONBACKEND API minor version supported
+ *  @param minor Returns the TRITONSERVER API minor version supported
  *  by Triton.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
     @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
-public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
     @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
-public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
     @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
 
-/** TRITONBACKEND_ArtifactType
- * 
- *  The ways that the files that make up a backend or model are
- *  communicated to the backend.
+/** TRITONSERVER_DataType
  * 
- *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
- *      artifacts are made available to Triton via a locally
- *      accessible filesystem. The backend can access these files
- *      using an appropriate system API.
+ *  Tensor data types recognized by TRITONSERVER.
  *  */
-public enum TRITONBACKEND_ArtifactType {
-  TRITONBACKEND_ARTIFACT_FILESYSTEM(0);
+public enum TRITONSERVER_DataType {
+  TRITONSERVER_TYPE_INVALID(0),
+  TRITONSERVER_TYPE_BOOL(1),
+  TRITONSERVER_TYPE_UINT8(2),
+  TRITONSERVER_TYPE_UINT16(3),
+  TRITONSERVER_TYPE_UINT32(4),
+  TRITONSERVER_TYPE_UINT64(5),
+  TRITONSERVER_TYPE_INT8(6),
+  TRITONSERVER_TYPE_INT16(7),
+  TRITONSERVER_TYPE_INT32(8),
+  TRITONSERVER_TYPE_INT64(9),
+  TRITONSERVER_TYPE_FP16(10),
+  TRITONSERVER_TYPE_FP32(11),
+  TRITONSERVER_TYPE_FP64(12),
+  TRITONSERVER_TYPE_BYTES(13);
 
     public final int value;
-    private TRITONBACKEND_ArtifactType(int v) { this.value = v; }
-    private TRITONBACKEND_ArtifactType(TRITONBACKEND_ArtifactType e) { this.value = e.value; }
-    public TRITONBACKEND_ArtifactType intern() { for (TRITONBACKEND_ArtifactType e : values()) if (e.value == value) return e; return this; }
+    private TRITONSERVER_DataType(int v) { this.value = v; }
+    private TRITONSERVER_DataType(TRITONSERVER_DataType e) { this.value = e.value; }
+    public TRITONSERVER_DataType intern() { for (TRITONSERVER_DataType e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-
-/**
- *  TRITONBACKEND_MemoryManager
- * 
- *  Object representing an memory manager that is capable of
- *  allocating and otherwise managing different memory types. For
- *  improved performance Triton maintains pools for GPU and CPU-pinned
- *  memory and the memory manager allows backends to access those
- *  pools.
- * 
- <p>
- *  Allocate a contiguous block of memory of a specific type using a
- *  memory manager. Two error codes have specific interpretations for
- *  this function:
+/** Get the string representation of a data type. The returned string
+ *  is not owned by the caller and so should not be modified or freed.
  * 
- *    TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
- *      incapable of allocating the requested memory type and memory
- *      type ID. Requests for the memory type and ID will always fail
- *      no matter 'byte_size' of the request.
+ *  @param datatype The data type.
+ *  @return The string representation of the data type. */
+
+///
+public static native String TRITONSERVER_DataTypeString(
+    TRITONSERVER_DataType datatype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_DataTypeString(
+    @Cast("TRITONSERVER_DataType") int datatype);
+
+/** Get the Triton datatype corresponding to a string representation
+ *  of a datatype.
  * 
- *    TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
- *       allocate the memory type and ID but that currently it cannot
- *       allocate a contiguous block of memory of the requested
- *       'byte_size'.
+ *  @param dtype The datatype string representation.
+ *  @return The Triton data type or TRITONSERVER_TYPE_INVALID if the
+ *  string does not represent a data type. */
+
+///
+public static native TRITONSERVER_DataType TRITONSERVER_StringToDataType(String dtype);
+public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(@Cast("const char*") BytePointer dtype);
+
+/** Get the size of a Triton datatype in bytes. Zero is returned for
+ *  TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
+ *  returned for TRITONSERVER_TYPE_INVALID.
  * 
- *  @param manager The memory manager.
- *  @param buffer Returns the allocated memory.
- *  @param memory_type The type of memory to allocate.
- *  @param memory_type_id The ID associated with the memory type to
- *  allocate. For GPU memory this indicates the device ID of the GPU
- *  to allocate from.
- *  @param byte_size The size of memory to allocate, in bytes.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param dtype The datatype.
+ *  @return The size of the datatype. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
-    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
-    @Cast("const uint64_t") long byte_size);
-public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
-    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
-    @Cast("const uint64_t") long byte_size);
+///
+public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
+public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(@Cast("TRITONSERVER_DataType") int datatype);
 
-/** Free a buffer that was previously allocated with
- *  TRITONBACKEND_MemoryManagerAllocate. The call must provide the
- *  same values for 'memory_type' and 'memory_type_id' as were used
- *  when the buffer was allocate or else the behavior is undefined.
+/** TRITONSERVER_MemoryType
  * 
- *  @param manager The memory manager.
- *  @param buffer The allocated memory buffer to free.
- *  @param memory_type The type of memory of the buffer.
- *  @param memory_type_id The ID associated with the memory type of
- *  the buffer.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  Types of memory recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_MemoryType {
+  TRITONSERVER_MEMORY_CPU(0),
+  TRITONSERVER_MEMORY_CPU_PINNED(1),
+  TRITONSERVER_MEMORY_GPU(2);
+
+    public final int value;
+    private TRITONSERVER_MemoryType(int v) { this.value = v; }
+    private TRITONSERVER_MemoryType(TRITONSERVER_MemoryType e) { this.value = e.value; }
+    public TRITONSERVER_MemoryType intern() { for (TRITONSERVER_MemoryType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
+/** Get the string representation of a memory type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param memtype The memory type.
+ *  @return The string representation of the memory type. */
 
 ///
 ///
+public static native String TRITONSERVER_MemoryTypeString(
+    TRITONSERVER_MemoryType memtype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_MemoryTypeString(
+    @Cast("TRITONSERVER_MemoryType") int memtype);
+
+/** TRITONSERVER_ParameterType
+ * 
+ *  Types of parameters recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_ParameterType {
+  TRITONSERVER_PARAMETER_STRING(0),
+  TRITONSERVER_PARAMETER_INT(1),
+  TRITONSERVER_PARAMETER_BOOL(2);
+
+    public final int value;
+    private TRITONSERVER_ParameterType(int v) { this.value = v; }
+    private TRITONSERVER_ParameterType(TRITONSERVER_ParameterType e) { this.value = e.value; }
+    public TRITONSERVER_ParameterType intern() { for (TRITONSERVER_ParameterType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of a parmeter type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param paramtype The parameter type.
+ *  @return The string representation of the parameter type. */
+
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager manager, Pointer buffer,
-    @Const @ByVal TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id);
+public static native String TRITONSERVER_ParameterTypeString(
+    TRITONSERVER_ParameterType paramtype);
+public static native @Cast("const char*") BytePointer TRITONSERVER_ParameterTypeString(
+    @Cast("TRITONSERVER_ParameterType") int paramtype);
 
-/**
- *  TRITONBACKEND_Input
- * 
- *  Object representing an input tensor.
+/** TRITONSERVER_InstanceGroupKind
  * 
- <p>
- *  Get the name and properties of an input tensor. The returned
- *  strings and other properties are owned by the input, not the
- *  caller, and so should not be modified or freed.
+ *  Kinds of instance groups recognized by TRITONSERVER.
+ *  */
+public enum TRITONSERVER_InstanceGroupKind {
+  TRITONSERVER_INSTANCEGROUPKIND_AUTO(0),
+  TRITONSERVER_INSTANCEGROUPKIND_CPU(1),
+  TRITONSERVER_INSTANCEGROUPKIND_GPU(2),
+  TRITONSERVER_INSTANCEGROUPKIND_MODEL(3);
+
+    public final int value;
+    private TRITONSERVER_InstanceGroupKind(int v) { this.value = v; }
+    private TRITONSERVER_InstanceGroupKind(TRITONSERVER_InstanceGroupKind e) { this.value = e.value; }
+    public TRITONSERVER_InstanceGroupKind intern() { for (TRITONSERVER_InstanceGroupKind e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the string representation of an instance-group kind. The
+ *  returned string is not owned by the caller and so should not be
+ *  modified or freed.
  * 
- *  @param input The input tensor.
- *  @param name If non-nullptr, returns the tensor name.
- *  @param datatype If non-nullptr, returns the tensor datatype.
- *  @param shape If non-nullptr, returns the tensor shape.
- *  @param dim_count If non-nullptr, returns the number of dimensions
- *  in the tensor shape.
- *  @param byte_size If non-nullptr, returns the size of the available
- *  data for the tensor, in bytes. This size reflects the actual data
- *  available, and does not necessarily match what is
- *  expected/required for the tensor given its shape and datatype. It
- *  is the responsibility of the backend to handle mismatches in these
- *  sizes appropriately.
- *  @param buffer_count If non-nullptr, returns the number of buffers
- *  holding the contents of the tensor. These buffers are accessed
- *  using TRITONBACKEND_InputBuffer.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param kind The instance-group kind.
+ *  @return The string representation of the kind. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input input, @Cast("const char**") PointerPointer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
-    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr BytePointer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
-    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr ByteBuffer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
-    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
-    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr byte[] name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
-    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+///
+public static native String TRITONSERVER_InstanceGroupKindString(
+    TRITONSERVER_InstanceGroupKind kind);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InstanceGroupKindString(
+    @Cast("TRITONSERVER_InstanceGroupKind") int kind);
 
-/** Get the name and properties of an input tensor associated with a given
- *  host policy. If there are no input buffers for the specified  host policy,
- *  the properties of the fallback input buffers are returned. The returned
- *  strings and other properties are owned by the input, not the caller, and so
- *  should not be modified or freed.
+/** TRITONSERVER_Logging
  * 
- *  @param input The input tensor.
- *  @param host_policy_name The host policy name. Fallback input properties
- *  will be return if nullptr is provided.
- *  @param name If non-nullptr, returns the tensor name.
- *  @param datatype If non-nullptr, returns the tensor datatype.
- *  @param shape If non-nullptr, returns the tensor shape.
- *  @param dim_count If non-nullptr, returns the number of dimensions
- *  in the tensor shape.
- *  @param byte_size If non-nullptr, returns the size of the available
- *  data for the tensor, in bytes. This size reflects the actual data
- *  available, and does not necessarily match what is
- *  expected/required for the tensor given its shape and datatype. It
- *  is the responsibility of the backend to handle mismatches in these
- *  sizes appropriately.
- *  @param buffer_count If non-nullptr, returns the number of buffers
- *  holding the contents of the tensor. These buffers are accessed
- *  using TRITONBACKEND_InputBufferForHostPolicy.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  Types/levels of logging.
+ *  */
+public enum TRITONSERVER_LogLevel {
+  TRITONSERVER_LOG_INFO(0),
+  TRITONSERVER_LOG_WARN(1),
+  TRITONSERVER_LOG_ERROR(2),
+  TRITONSERVER_LOG_VERBOSE(3);
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") PointerPointer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") PointerPointer shape,
-    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
-    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
-    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
-    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
-    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
-    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
-public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
-    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+    public final int value;
+    private TRITONSERVER_LogLevel(int v) { this.value = v; }
+    private TRITONSERVER_LogLevel(TRITONSERVER_LogLevel e) { this.value = e.value; }
+    public TRITONSERVER_LogLevel intern() { for (TRITONSERVER_LogLevel e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
-/** Get a buffer holding (part of) the tensor data for an input. For a
- *  given input the number of buffers composing the input are found
- *  from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
- *  returned buffer is owned by the input and so should not be
- *  modified or freed by the caller. The lifetime of the buffer
- *  matches that of the input and so the buffer should not be accessed
- *  after the input tensor object is released.
+/** Is a log level enabled?
  * 
- *  @param input The input tensor.
- *  @param index The index of the buffer. Must be 0 <= index <
- *  buffer_count, where buffer_count is the value returned by
- *  TRITONBACKEND_InputProperties.
- *  @param buffer Returns a pointer to a contiguous block of data for
- *  the named input.
- *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
- *  @param memory_type Acts as both input and output. On input gives
- *  the buffer memory type preferred by the function caller.  Returns
- *  the actual memory type of 'buffer'.
- *  @param memory_type_id Acts as both input and output. On input
- *  gives the buffer memory type id preferred by the function caller.
- *  Returns the actual memory type id of 'buffer'.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param level The log level.
+ *  @return True if the log level is enabled, false if not enabled. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer,
-    @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
-    @Cast("uint64_t*") LongPointer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
-    @Cast("uint64_t*") LongBuffer buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongBuffer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
-    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
-    @Cast("uint64_t*") long[] buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") long[] memory_type_id);
+public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
+    TRITONSERVER_LogLevel level);
+public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
+    @Cast("TRITONSERVER_LogLevel") int level);
 
-/** Get a buffer holding (part of) the tensor data for an input for a specific
- *  host policy. If there are no input buffers specified for this host policy,
- *  the fallback input buffer is returned.
- *  For a given input the number of buffers composing the input are found
- *  from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
- *  The returned buffer is owned by the input and so should not be modified or
- *  freed by the caller. The lifetime of the buffer matches that of the input
- *  and so the buffer should not be accessed after the input tensor object is
- *  released.
+/** Log a message at a given log level if that level is enabled.
  * 
- *  @param input The input tensor.
- *  @param host_policy_name The host policy name. Fallback input buffer
- *  will be return if nullptr is provided.
- *  @param index The index of the buffer. Must be 0 <= index <
- *  buffer_count, where buffer_count is the value returned by
- *  TRITONBACKEND_InputPropertiesForHostPolicy.
- *  @param buffer Returns a pointer to a contiguous block of data for
- *  the named input.
- *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
- *  @param memory_type Acts as both input and output. On input gives
- *  the buffer memory type preferred by the function caller.  Returns
- *  the actual memory type of 'buffer'.
- *  @param memory_type_id Acts as both input and output. On input
- *  gives the buffer memory type id preferred by the function caller.
- *  Returns the actual memory type id of 'buffer'.
+ *  @param level The log level.
+ *  @param filename The file name of the location of the log message.
+ *  @param line The line number of the log message.
+ *  @param msg The log message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-
-///
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, String host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
-    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
-    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
-    TRITONSERVER_MemoryType memory_type, @Cast("int64_t*") long[] memory_type_id);
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    TRITONSERVER_LogLevel level, String filename, int line,
+    String msg);
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    @Cast("TRITONSERVER_LogLevel") int level, @Cast("const char*") BytePointer filename, int line,
+    @Cast("const char*") BytePointer msg);
 
-/**
- *  TRITONBACKEND_Output
+/** TRITONSERVER_Error
  * 
- *  Object representing a response output tensor.
+ *  Errors are reported by a TRITONSERVER_Error object. A NULL
+ *  TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
+ *  indicates error and the code and message for the error can be
+ *  retrieved from the object.
  * 
- <p>
- *  Get a buffer to use to hold the tensor data for the output. The
- *  returned buffer is owned by the output and so should not be freed
- *  by the caller. The caller can and should fill the buffer with the
- *  output data for the tensor. The lifetime of the buffer matches
- *  that of the output and so the buffer should not be accessed after
- *  the output tensor object is released.
+ *  The caller takes ownership of a TRITONSERVER_Error object returned by
+ *  the API and must call TRITONSERVER_ErrorDelete to release the object.
  * 
- *  @param buffer Returns a pointer to a buffer where the contents of
- *  the output tensor should be placed.
- *  @param buffer_byte_size The size, in bytes, of the buffer required
- *  by the caller.
- *  @param memory_type Acts as both input and output. On input gives
- *  the buffer memory type preferred by the caller.  Returns the
- *  actual memory type of 'buffer'.
- *  @param memory_type_id Acts as both input and output. On input
- *  gives the buffer memory type id preferred by the caller. Returns
- *  the actual memory type id of 'buffer'.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ <p>
+ *  The TRITONSERVER_Error error codes */
+public enum TRITONSERVER_Error_Code {
+  TRITONSERVER_ERROR_UNKNOWN(0),
+  TRITONSERVER_ERROR_INTERNAL(1),
+  TRITONSERVER_ERROR_NOT_FOUND(2),
+  TRITONSERVER_ERROR_INVALID_ARG(3),
+  TRITONSERVER_ERROR_UNAVAILABLE(4),
+  TRITONSERVER_ERROR_UNSUPPORTED(5),
+  TRITONSERVER_ERROR_ALREADY_EXISTS(6);
 
+    public final int value;
+    private TRITONSERVER_Error_Code(int v) { this.value = v; }
+    private TRITONSERVER_Error_Code(TRITONSERVER_Error_Code e) { this.value = e.value; }
+    public TRITONSERVER_Error_Code intern() { for (TRITONSERVER_Error_Code e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Create a new error object. The caller takes ownership of the
+ *  TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
+ *  release the object.
+ * 
+ *  @param code The error code.
+ *  @param msg The error message.
+ *  @return A new TRITONSERVER_Error object. */
 
 ///
-///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output output, @Cast("void**") PointerPointer buffer,
-    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
-    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongPointer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
-    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") LongBuffer memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
-    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
-    @Cast("const uint64_t") long buffer_byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t*") long[] memory_type_id);
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    TRITONSERVER_Error_Code code, String msg);
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    @Cast("TRITONSERVER_Error_Code") int code, @Cast("const char*") BytePointer msg);
 
-/**
- *  TRITONBACKEND_Request
+/** Delete an error object.
  * 
- *  Object representing an inference request.
+ *  @param error The error object. */
+
+///
+public static native void TRITONSERVER_ErrorDelete(TRITONSERVER_Error error);
+
+/** Get the error code.
  * 
- <p>
- *  Get the ID of the request. Can be nullptr if request doesn't have
- *  an ID. The returned string is owned by the request, not the
- *  caller, and so should not be modified or freed.
+ *  @param error The error object.
+ *  @return The error code. */
+
+///
+public static native TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
+
+/** Get the string representation of an error code. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed. The lifetime of the returned string extends only as long as
+ *  'error' and must not be accessed once 'error' is deleted.
  * 
- *  @param request The inference request.
- *  @param id Returns the ID.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param error The error object.
+ *  @return The string representation of the error code. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request request, @Cast("const char**") PointerPointer id);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr BytePointer id);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
-    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr byte[] id);
+public static native String TRITONSERVER_ErrorCodeString(
+    TRITONSERVER_Error error);
 
-/** Get the correlation ID of the request. Zero indicates that the
- *  request does not have a correlation ID.
+/** Get the error message. The returned string is not owned by the
+ *  caller and so should not be modified or freed. The lifetime of the
+ *  returned string extends only as long as 'error' and must not be
+ *  accessed once 'error' is deleted.
  * 
- *  @param request The inference request.
- *  @param id Returns the correlation ID.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param error The error object.
+ *  @return The error message. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
-    TRITONBACKEND_Request request, @Cast("uint64_t*") LongPointer id);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
-    TRITONBACKEND_Request request, @Cast("uint64_t*") LongBuffer id);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
-    TRITONBACKEND_Request request, @Cast("uint64_t*") long[] id);
+///
+///
+public static native String TRITONSERVER_ErrorMessage(
+    TRITONSERVER_Error error);
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
 
-/** Get the number of input tensors specified in the request.
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
+
+
+
+/** Create a new response allocator object.
+ * 
+ *  The response allocator object is used by Triton to allocate
+ *  buffers to hold the output tensors in inference responses. Most
+ *  models generate a single response for each inference request
+ *  (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
+ *  callbacks will be:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn : optional (and typically not required)
+ *     - alloc_fn : called once for each output tensor in response
+ *    TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in response
+ * 
+ *  For models that generate multiple responses for each inference
+ *  request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
+ *  used to determine sets of alloc_fn callbacks that belong to the
+ *  same response:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *       ...
+ *    For each response, TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in the response
+ * 
+ *  In all cases the start_fn, alloc_fn and release_fn callback
+ *  functions must be thread-safe. Typically making these functions
+ *  thread-safe does not require explicit locking. The recommended way
+ *  to implement these functions is to have each inference request
+ *  provide a 'response_allocator_userp' object that is unique to that
+ *  request with TRITONSERVER_InferenceRequestSetResponseCallback. The
+ *  callback functions then operate only on this unique state. Locking
+ *  is required only when the callback function needs to access state
+ *  that is shared across inference requests (for example, a common
+ *  allocation pool).
  * 
- *  @param request The inference request.
- *  @param count Returns the number of input tensors.
+ *  @param allocator Returns the new response allocator object.
+ *  @param alloc_fn The function to call to allocate buffers for result
+ *  tensors.
+ *  @param release_fn The function to call when the server no longer
+ *  holds a reference to an allocated buffer.
+ *  @param start_fn The function to call to indicate that the
+ *  subsequent 'alloc_fn' calls are for a new response. This callback
+ *  is optional (use nullptr to indicate that it should not be
+ *  invoked).
+ <p>
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @Cast("TRITONSERVER_ResponseAllocator**") PointerPointer allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @ByPtrPtr TRITONSERVER_ResponseAllocator allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
 
-/** Get the name of an input tensor. The caller does not own
- *  the returned string and must not modify or delete it. The lifetime
- *  of the returned string extends only as long as 'request'.
+/** Delete a response allocator.
  * 
- *  @param request The inference request.
- *  @param index The index of the input tensor. Must be 0 <= index <
- *  count, where count is the value returned by
- *  TRITONBACKEND_RequestInputCount.
- *  @param input_name Returns the name of the input tensor
- *  corresponding to the index.
+ *  @param allocator The response allocator object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") PointerPointer input_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr BytePointer input_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr ByteBuffer input_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr byte[] input_name);
-
-/** Get a named request input. The lifetime of the returned input
- *  object matches that of the request and so the input object should
- *  not be accessed after the request object is released.
- * 
- *  @param request The inference request.
- *  @param name The name of the input.
- *  @param input Returns the input corresponding to the name.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request request, String name,
-    @Cast("TRITONBACKEND_Input**") PointerPointer input);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request request, String name,
-    @ByPtrPtr TRITONBACKEND_Input input);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
-    TRITONBACKEND_Request request, @Cast("const char*") BytePointer name,
-    @ByPtrPtr TRITONBACKEND_Input input);
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorDelete(
+    TRITONSERVER_ResponseAllocator allocator);
 
-/** Get a request input by index. The order of inputs in a given
- *  request is not necessarily consistent with other requests, even if
- *  the requests are in the same batch. As a result, you can not
- *  assume that an index obtained from one request will point to the
- *  same input in a different request.
+/** TRITONSERVER_Message
  * 
- *  The lifetime of the returned input object matches that of the
- *  request and so the input object should not be accessed after the
- *  request object is released.
+ *  Object representing a Triton Server message.
  * 
- *  @param request The inference request.
- *  @param index The index of the input tensor. Must be 0 <= index <
- *  count, where count is the value returned by
- *  TRITONBACKEND_RequestInputCount.
- *  @param input Returns the input corresponding to the index.
+ <p>
+ *  Create a new message object from serialized JSON string.
+ * 
+ *  @param message The message object.
+ *  @param base The base of the serialized JSON.
+ *  @param byte_size The size, in bytes, of the serialized message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("TRITONBACKEND_Input**") PointerPointer input);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @ByPtrPtr TRITONBACKEND_Input input);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @Cast("TRITONSERVER_Message**") PointerPointer message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, @Cast("const char*") BytePointer base, @Cast("size_t") long byte_size);
 
-/** Get the number of output tensors requested to be returned in the
- *  request.
+/** Delete a message object.
  * 
- *  @param request The inference request.
- *  @param count Returns the number of output tensors.
+ *  @param message The message object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
-    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+public static native TRITONSERVER_Error TRITONSERVER_MessageDelete(
+    TRITONSERVER_Message message);
 
-/** Get the name of a requested output tensor. The caller does not own
- *  the returned string and must not modify or delete it. The lifetime
- *  of the returned string extends only as long as 'request'.
+/** Get the base and size of the buffer containing the serialized
+ *  message in JSON format. The buffer is owned by the
+ *  TRITONSERVER_Message object and should not be modified or freed by
+ *  the caller. The lifetime of the buffer extends only as long as
+ *  'message' and must not be accessed once 'message' is deleted.
  * 
- *  @param request The inference request.
- *  @param index The index of the requested output tensor. Must be 0
- *  <= index < count, where count is the value returned by
- *  TRITONBACKEND_RequestOutputCount.
- *  @param output_name Returns the name of the requested output tensor
- *  corresponding to the index.
+ *  @param message The message object.
+ *  @param base Returns the base of the serialized message.
+ *  @param byte_size Returns the size, in bytes, of the serialized
+ *  message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") PointerPointer output_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr BytePointer output_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr ByteBuffer output_name);
-public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
-    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr byte[] output_name);
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
 
-/** Release the request. The request should be released when it is no
- *  longer needed by the backend. If this call returns with an error
- *  (i.e. non-nullptr) then the request was not released and ownership
- *  remains with the backend. If this call returns with success, the
- *  'request' object is no longer owned by the backend and must not be
- *  used. Any tensor names, data types, shapes, input tensors,
- *  etc. returned by TRITONBACKEND_Request* functions for this request
- *  are no longer valid. If a persistent copy of that data is required
- *  it must be created before calling this function.
+/** TRITONSERVER_Metrics
  * 
- *  @param request The inference request.
- *  @param release_flags Flags indicating what type of request release
- *  should be performed. @see TRITONSERVER_RequestReleaseFlag. @see
- *  TRITONSERVER_InferenceRequestReleaseFn_t.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  Object representing metrics.
+ * 
+ <p>
+ *  Metric format types */
+public enum TRITONSERVER_MetricFormat {
+  TRITONSERVER_METRIC_PROMETHEUS(0);
+
+    public final int value;
+    private TRITONSERVER_MetricFormat(int v) { this.value = v; }
+    private TRITONSERVER_MetricFormat(TRITONSERVER_MetricFormat e) { this.value = e.value; }
+    public TRITONSERVER_MetricFormat intern() { for (TRITONSERVER_MetricFormat e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
+/** Delete a metrics object.
+ * 
+ *  @param metrics The metrics object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_RequestRelease(
-    TRITONBACKEND_Request request, @Cast("uint32_t") int release_flags);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsDelete(
+    TRITONSERVER_Metrics metrics);
 
-/**
- *  TRITONBACKEND_ResponseFactory
+/** Get a buffer containing the metrics in the specified format. For
+ *  each format the buffer contains the following:
  * 
- *  Object representing an inference response factory. Using a
- *  response factory is not required; instead a response can be
- *  generated directly from a TRITONBACKEND_Request object using
- *  TRITONBACKEND_ResponseNew(). A response factory allows a request
- *  to be released before all responses have been sent. Releasing a
- *  request as early as possible releases all input tensor data and
- *  therefore may be desirable in some cases.
- <p>
- *  Create the response factory associated with a request.
+ *    TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
+ *    string (char*) that gives a text representation of the metrics in
+ *    prometheus format. 'byte_size' returns the length of the string
+ *    in bytes.
  * 
- *  @param factory Returns the new response factory.
- *  @param request The inference request.
+ *  The buffer is owned by the 'metrics' object and should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  extends only as long as 'metrics' and must not be accessed once
+ *  'metrics' is deleted.
+ * 
+ *  @param metrics The metrics object.
+ *  @param format The format to use for the returned metrics.
+ *  @param base Returns a pointer to the base of the formatted
+ *  metrics, as described above.
+ *  @param byte_size Returns the size, in bytes, of the formatted
+ *  metrics.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
-    @Cast("TRITONBACKEND_ResponseFactory**") PointerPointer factory, TRITONBACKEND_Request request);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
-    @ByPtrPtr TRITONBACKEND_ResponseFactory factory, TRITONBACKEND_Request request);
+///
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
 
-/** Destroy a response factory.
+/** TRITONSERVER_InferenceTrace
  * 
- *  @param factory The response factory.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  Object that represents tracing for an inference request.
+ * 
+ <p>
+ *  Trace levels */
+public enum TRITONSERVER_InferenceTraceLevel {
+  TRITONSERVER_TRACE_LEVEL_DISABLED(0),
+  TRITONSERVER_TRACE_LEVEL_MIN(1),
+  TRITONSERVER_TRACE_LEVEL_MAX(2);
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryDelete(
-    TRITONBACKEND_ResponseFactory factory);
+    public final int value;
+    private TRITONSERVER_InferenceTraceLevel(int v) { this.value = v; }
+    private TRITONSERVER_InferenceTraceLevel(TRITONSERVER_InferenceTraceLevel e) { this.value = e.value; }
+    public TRITONSERVER_InferenceTraceLevel intern() { for (TRITONSERVER_InferenceTraceLevel e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
-/** Send response flags without a corresponding response.
+/** Get the string representation of a trace level. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
  * 
- *  @param factory The response factory.
- *  @param send_flags Flags to send. @see
- *  TRITONSERVER_ResponseCompleteFlag. @see
- *  TRITONSERVER_InferenceResponseCompleteFn_t.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param level The trace level.
+ *  @return The string representation of the trace level. */
+public static native String TRITONSERVER_InferenceTraceLevelString(
+    TRITONSERVER_InferenceTraceLevel level);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceLevelString(
+    @Cast("TRITONSERVER_InferenceTraceLevel") int level);
 
+// Trace activities
+public enum TRITONSERVER_InferenceTraceActivity {
+  TRITONSERVER_TRACE_REQUEST_START(0),
+  TRITONSERVER_TRACE_QUEUE_START(1),
+  TRITONSERVER_TRACE_COMPUTE_START(2),
+  TRITONSERVER_TRACE_COMPUTE_INPUT_END(3),
+  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START(4),
+  TRITONSERVER_TRACE_COMPUTE_END(5),
+  TRITONSERVER_TRACE_REQUEST_END(6);
 
-///
-///
-///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactorySendFlags(
-    TRITONBACKEND_ResponseFactory factory, @Cast("const uint32_t") int send_flags);
+    public final int value;
+    private TRITONSERVER_InferenceTraceActivity(int v) { this.value = v; }
+    private TRITONSERVER_InferenceTraceActivity(TRITONSERVER_InferenceTraceActivity e) { this.value = e.value; }
+    public TRITONSERVER_InferenceTraceActivity intern() { for (TRITONSERVER_InferenceTraceActivity e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
-/**
- *  TRITONBACKEND_Response
- * 
- *  Object representing an inference response. For a given request,
- *  the backend must carefully manage the lifecycle of responses
- *  generated for that request to ensure that the output tensor
- *  buffers are allocated correctly. When a response is created with
- *  TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
- *  all the outputs and corresponding buffers must be created for that
- *  response using TRITONBACKEND_ResponseOutput and
- *  TRITONBACKEND_OutputBuffer *before* another response is created
- *  for the request. For a given response, outputs can be created in
- *  any order but they must be created sequentially/sychronously (for
- *  example, the backend cannot use multiple threads to simultaneously
- *  add multiple outputs to a response).
- * 
- *  The above requirement applies only to responses being generated
- *  for a given request. The backend may generate responses in
- *  parallel on multiple threads as long as those responses are for
- *  different requests.
- * 
- *  This order of response creation must be strictly followed. But,
- *  once response(s) are created they do not need to be sent
- *  immediately, nor do they need to be sent in the order they were
- *  created. The backend may even delete a created response instead of
- *  sending it by using TRITONBACKEND_ResponseDelete.
- <p>
- *  Create a response for a request.
+/** Get the string representation of a trace activity. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
  * 
- *  @param response Returns the new response.
- *  @param request The request.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  @param activity The trace activity.
+ *  @return The string representation of the trace activity. */
+public static native String TRITONSERVER_InferenceTraceActivityString(
+    TRITONSERVER_InferenceTraceActivity activity);
+public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceActivityString(
+    @Cast("TRITONSERVER_InferenceTraceActivity") int activity);
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
-    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_Request request);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
-    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_Request request);
 
-/** Create a response using a factory.
- * 
- *  @param response Returns the new response.
- *  @param factory The response factory.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
-    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_ResponseFactory factory);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
-    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_ResponseFactory factory);
 
-/** Destroy a response. It is not necessary to delete a response if
- *  TRITONBACKEND_ResponseSend is called as that function transfers
- *  ownership of the response object to Triton.
+
+/** Create a new inference trace object. The caller takes ownership of
+ *  the TRITONSERVER_InferenceTrace object and must call
+ *  TRITONSERVER_InferenceTraceDelete to release the object.
  * 
- *  @param response The response.
+ *  The activity callback function will be called to report activity
+ *  for 'trace' as well as for any child traces that are spawned by
+ *  'trace', and so the activity callback must check the trace object
+ *  to determine specifically what activity is being reported.
+ * 
+ *  The release callback is called for both 'trace' and for any child
+ *  traces spawned by 'trace'.
+ * 
+ *  @param trace Returns the new inference trace object.
+ *  @param level The tracing level.
+ *  @param parent_id The parent trace id for this trace. A value of 0
+ *  indicates that there is not parent trace.
+ *  @param activity_fn The callback function where activity for the
+ *  trace is reported.
+ *  @param release_fn The callback function called when all activity
+ *  is complete for the trace.
+ *  @param trace_userp User-provided pointer that is delivered to
+ *  the activity and release callback functions.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseDelete(
-    TRITONBACKEND_Response response);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, TRITONSERVER_InferenceTraceLevel level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @ByPtrPtr TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceLevel level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @ByPtrPtr TRITONSERVER_InferenceTrace trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
 
-/** Set a string parameter in the response.
+/** Delete a trace object.
  * 
- *  @param response The response.
- *  @param name The name of the parameter.
- *  @param value The value of the parameter.
+ *  @param trace The trace object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response response, String name, String value);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
-    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const char*") BytePointer value);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceDelete(
+    TRITONSERVER_InferenceTrace trace);
 
-/** Set an integer parameter in the response.
+/** Get the id associated with a trace. Every trace is assigned an id
+ *  that is unique across all traces created for a Triton server.
  * 
- *  @param response The response.
- *  @param name The name of the parameter.
- *  @param value The value of the parameter.
+ *  @param trace The trace.
+ *  @param id Returns the id associated with the trace.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response response, String name, @Cast("const int64_t") long value);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
-    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const int64_t") long value);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] id);
 
-/** Set an boolean parameter in the response.
+/** Get the parent id associated with a trace. The parent id indicates
+ *  a parent-child relationship between two traces. A parent id value
+ *  of 0 indicates that there is no parent trace.
  * 
- *  @param response The response.
- *  @param name The name of the parameter.
- *  @param value The value of the parameter.
+ *  @param trace The trace.
+ *  @param id Returns the parent id associated with the trace.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response response, String name, @Cast("const bool") boolean value);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
-    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const bool") boolean value);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] parent_id);
 
-/** Create an output tensor in the response. The lifetime of the
- *  returned output tensor object matches that of the response and so
- *  the output tensor object should not be accessed after the response
- *  object is deleted.
+/** Get the name of the model associated with a trace. The caller does
+ *  not own the returned string and must not modify or delete it. The
+ *  lifetime of the returned string extends only as long as 'trace'.
  * 
- *  @param response The response.
- *  @param output Returns the new response output.
- *  @param name The name of the output tensor.
- *  @param datatype The datatype of the output tensor.
- *  @param shape The shape of the output tensor.
- *  @param dims_count The number of dimensions in the output tensor
- *  shape.
+ *  @param trace The trace.
+ *  @param model_name Returns the name of the model associated with
+ *  the trace.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
-    String name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
-    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Const @ByVal TRITONSERVER_DataType datatype,
-    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
-
-/** Send a response. Calling this function transfers ownership of the
- *  response object to Triton. The caller must not access or delete
- *  the response object after calling this function.
- * 
- *  @param response The response.
- *  @param send_flags Flags associated with the response. @see
- *  TRITONSERVER_ResponseCompleteFlag. @see
- *  TRITONSERVER_InferenceResponseCompleteFn_t.
- *  @param error The TRITONSERVER_Error to send if the response is an
- *  error, or nullptr if the response is successful.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") PointerPointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr BytePointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr ByteBuffer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr byte[] model_name);
 
+/** Get the version of the model associated with a trace.
+ * 
+ *  @param trace The trace.
+ *  @param model_version Returns the version of the model associated
+ *  with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
-///
-///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
-    TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
-    TRITONSERVER_Error error);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") long[] model_version);
 
-/**
- *  TRITONBACKEND_Backend
+/** TRITONSERVER_InferenceRequest
  * 
- *  Object representing a backend.
+ *  Object representing an inference request. The inference request
+ *  provides the meta-data and input tensor values needed for an
+ *  inference and returns the inference result meta-data and output
+ *  tensors. An inference request object can be modified and reused
+ *  multiple times.
  * 
  <p>
- *  TRITONBACKEND_ExecutionPolicy
- * 
- *  Types of execution policy that can be implemented by a backend.
- * 
- *    TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
- *      blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
- *      to handle another inference. Upon returning from
- *      TRITONBACKEND_ModelInstanceExecute, Triton may immediately
- *      call TRITONBACKEND_ModelInstanceExecute for the same instance
- *      to execute a new batch of requests. Thus, most backends using
- *      this policy will not return from
- *      TRITONBACKEND_ModelInstanceExecute until all responses have
- *      been sent and all requests have been released. This is the
- *      default execution policy.
- *  */
-public enum TRITONBACKEND_ExecutionPolicy {
-  TRITONBACKEND_EXECUTION_BLOCKING(0);
+ *  Inference request flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_RequestFlag {
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START(1),
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END(2);
 
     public final int value;
-    private TRITONBACKEND_ExecutionPolicy(int v) { this.value = v; }
-    private TRITONBACKEND_ExecutionPolicy(TRITONBACKEND_ExecutionPolicy e) { this.value = e.value; }
-    public TRITONBACKEND_ExecutionPolicy intern() { for (TRITONBACKEND_ExecutionPolicy e : values()) if (e.value == value) return e; return this; }
+    private TRITONSERVER_RequestFlag(int v) { this.value = v; }
+    private TRITONSERVER_RequestFlag(TRITONSERVER_RequestFlag e) { this.value = e.value; }
+    public TRITONSERVER_RequestFlag intern() { for (TRITONSERVER_RequestFlag e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-/** Get the name of the backend. The caller does not own the returned
- *  string and must not modify or delete it. The lifetime of the
- *  returned string extends only as long as 'backend'.
+/** Inference request release flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_RequestReleaseFlag {
+  TRITONSERVER_REQUEST_RELEASE_ALL(1);
+
+    public final int value;
+    private TRITONSERVER_RequestReleaseFlag(int v) { this.value = v; }
+    private TRITONSERVER_RequestReleaseFlag(TRITONSERVER_RequestReleaseFlag e) { this.value = e.value; }
+    public TRITONSERVER_RequestReleaseFlag intern() { for (TRITONSERVER_RequestReleaseFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Inference response complete flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_ResponseCompleteFlag {
+  TRITONSERVER_RESPONSE_COMPLETE_FINAL(1);
+
+    public final int value;
+    private TRITONSERVER_ResponseCompleteFlag(int v) { this.value = v; }
+    private TRITONSERVER_ResponseCompleteFlag(TRITONSERVER_ResponseCompleteFlag e) { this.value = e.value; }
+    public TRITONSERVER_ResponseCompleteFlag intern() { for (TRITONSERVER_ResponseCompleteFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
+
+
+
+/** Create a new inference request object.
  * 
- *  @param backend The backend.
- *  @param name Returns the name of the backend.
+ *  @param inference_request Returns the new request object.
+ *  @param server the inference server object.
+ *  @param model_name The name of the model to use for the request.
+ *  @param model_version The version of the model to use for the
+ *  request. If -1 then the server will choose a version based on the
+ *  model's policy.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend backend, @Cast("const char**") PointerPointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
-    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr byte[] name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @Cast("TRITONSERVER_InferenceRequest**") PointerPointer inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version);
 
-/** Get the backend configuration.  The 'backend_config' message is
- *  owned by Triton and should not be modified or freed by the caller.
- * 
- *  The backend configuration, as JSON, is:
- * 
- *    {
- *      "cmdline" : {
- *        "<setting>" : "<value>",
- *        ...
- *      }
- *    }
+/** Delete an inference request object.
  * 
- *  @param backend The backend.
- *  @param backend_config Returns the backend configuration as a message.
+ *  @param inference_request The request object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend backend, @Cast("TRITONSERVER_Message**") PointerPointer backend_config);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
-    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONSERVER_Message backend_config);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestDelete(
+    TRITONSERVER_InferenceRequest inference_request);
 
-/** Get the execution policy for this backend. By default the
- *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
+/** Get the ID for a request. The returned ID is owned by
+ *  'inference_request' and must not be modified or freed by the
+ *  caller.
  * 
- *  @param backend The backend.
- *  @param policy Returns the execution policy.
+ *  @param inference_request The request object.
+ *  @param id Returns the ID.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntPointer policy);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntBuffer policy);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") int[] policy);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") PointerPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr byte[] id);
 
-/** Set the execution policy for this backend. By default the
- *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
- *  the backend's execution policy after calling
- *  TRITONBACKEND_Initialize, so to be recognized changes to the
- *  execution policy must be made in TRITONBACKEND_Initialize.
+/** Set the ID for a request.
  * 
- *  @param backend The backend.
- *  @param policy The execution policy.
+ *  @param inference_request The request object.
+ *  @param id The ID.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, String id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer id);
+
+/** Get the flag(s) associated with a request. On return 'flags' holds
+ *  a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
+ * 
+ *  @param inference_request The request object.
+ *  @param flags Returns the flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend backend, TRITONBACKEND_ExecutionPolicy policy);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] flags);
 
-/** Get the location of the files that make up the backend
- *  implementation. This location contains the backend shared library
- *  and any other files located with the shared library. The
- *  'location' communicated depends on how the backend is being
- *  communicated to Triton as indicated by 'artifact_type'.
+/** Set the flag(s) associated with a request. 'flags' should holds a
+ *  bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
  * 
- *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
- *      made available to Triton via the local filesytem. 'location'
- *      returns the full path to the directory containing this
- *      backend's artifacts. The returned string is owned by Triton,
- *      not the caller, and so should not be modified or freed.
+ *  @param inference_request The request object.
+ *  @param flags The flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int flags);
+
+/** Get the correlation ID of the inference request. Default is 0,
+ *  which indictes that the request has no correlation ID. The
+ *  correlation ID is used to indicate two or more inference request
+ *  are related to each other. How this relationship is handled by the
+ *  inference server is determined by the model's scheduling
+ *  policy.
  * 
- *  @param backend The backend.
- *  @param artifact_type Returns the artifact type for the backend.
- *  @param path Returns the location.
+ *  @param inference_request The request object.
+ *  @param correlation_id Returns the correlation ID.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
-    @Cast("const char**") PointerPointer location);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
-    @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
-    @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
-    @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] correlation_id);
+
+/** Set the correlation ID of the inference request. Default is 0, which
+ *  indictes that the request has no correlation ID. The correlation ID
+ *  is used to indicate two or more inference request are related to
+ *  each other. How this relationship is handled by the inference
+ *  server is determined by the model's scheduling policy.
+ * 
+ *  @param inference_request The request object.
+ *  @param correlation_id The correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-/** Get the memory manager associated with a backend.
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long correlation_id);
+
+/** Get the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
  * 
- *  @param backend The backend.
- *  @param manager Returns the memory manager.
+ *  @param inference_request The request object.
+ *  @param priority Returns the priority level.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_MemoryManager**") PointerPointer manager);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
-    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONBACKEND_MemoryManager manager);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] priority);
 
-/** Get the user-specified state associated with the backend. The
- *  state is completely owned and managed by the backend.
+/** Set the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
  * 
- *  @param backend The backend.
- *  @param state Returns the user state, or nullptr if no user state.
+ *  @param inference_request The request object.
+ *  @param priority The priority level.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
-    TRITONBACKEND_Backend backend, @Cast("void**") PointerPointer state);
-public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
-    TRITONBACKEND_Backend backend, @Cast("void**") @ByPtrPtr Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int priority);
 
-/** Set the user-specified state associated with the backend. The
- *  state is completely owned and managed by the backend.
+/** Get the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
  * 
- *  @param backend The backend.
- *  @param state The user state, or nullptr if no user state.
+ *  @param inference_request The request object.
+ *  @param timeout_us Returns the timeout, in microseconds.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-
-///
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendSetState(
-    TRITONBACKEND_Backend backend, Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] timeout_us);
 
-/**
- *  TRITONBACKEND_Model
- * 
- *  Object representing a model implemented using the backend.
- * 
- <p>
- *  Get the name of the model. The returned string is owned by the
- *  model object, not the caller, and so should not be modified or
- *  freed.
+/** Set the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
  * 
- *  @param model The model.
- *  @param name Returns the model name.
+ *  @param inference_request The request object.
+ *  @param timeout_us The timeout, in microseconds.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model model, @Cast("const char**") PointerPointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
-    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr byte[] name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long timeout_us);
 
-/** Get the version of the model.
+/** Add an input to a request.
  * 
- *  @param model The model.
- *  @param version Returns the model version.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param datatype The type of the input. Valid type names are BOOL,
+ *  UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
+ *  FP32, FP64, and BYTES.
+ *  @param shape The shape of the input.
+ *  @param dim_count The number of dimensions of 'shape'.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
-    TRITONBACKEND_Model model, @Cast("uint64_t*") LongPointer version);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
-    TRITONBACKEND_Model model, @Cast("uint64_t*") LongBuffer version);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
-    TRITONBACKEND_Model model, @Cast("uint64_t*") long[] version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
 
-/** Get the location of the files that make up the model. The
- *  'location' communicated depends on how the model is being
- *  communicated to Triton as indicated by 'artifact_type'.
- * 
- *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
- *      available to Triton via the local filesytem. 'location'
- *      returns the full path to the directory in the model repository
- *      that contains this model's artifacts. The returned string is
- *      owned by Triton, not the caller, and so should not be modified
- *      or freed.
+/** Remove an input from a request.
  * 
- *  @param model The model.
- *  @param artifact_type Returns the artifact type for the model.
- *  @param path Returns the location.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
-    @Cast("const char**") PointerPointer location);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
-    @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
-    @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
-    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
-    @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
 
-/** Get the model configuration. The caller takes ownership of the
- *  message object and must call TRITONSERVER_MessageDelete to release
- *  the object. The configuration is available via this call even
- *  before the model is loaded and so can be used in
- *  TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
- *  returns equivalent information but is not useable until after the
- *  model loads.
+/** Remove all inputs from a request.
  * 
- *  @param model The model.
- *  @param config_version The model configuration will be returned in
- *  a format matching this version. If the configuration cannot be
- *  represented in the requested version's format then an error will
- *  be returned. Currently only version 1 is supported.
- *  @param model_config Returns the model configuration as a message.
+ *  @param inference_request The request object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
-    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
-    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
-    @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputs(
+    TRITONSERVER_InferenceRequest inference_request);
 
-/** Whether the backend should attempt to auto-complete the model configuration.
- *  If true, the model should fill the inputs, outputs, and max batch size in
- *  the model configuration if incomplete. If the model configuration is
- *  changed,  the new configuration must be reported to Triton using
- *  TRITONBACKEND_ModelSetConfig.
+/** Assign a buffer of data to an input. The buffer will be appended
+ *  to any existing buffers for that input. The 'inference_request'
+ *  object takes ownership of the buffer and so the caller should not
+ *  modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed
+ *  from 'inference_request'.
  * 
- *  @param model The model.
- *  @param auto_complete_config Returns whether the backend should auto-complete
- *  the model configuration.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
  *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id);
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
+/** Assign a buffer of data to an input for execution on all model instances
+ *  with the specified host policy. The buffer will be appended to any existing
+ *  buffers for that input on all devices with this host policy. The
+ *  'inference_request' object takes ownership of the buffer and so the caller
+ *  should not modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed from
+ *  'inference_request'. If the execution is scheduled on a device that does not
+ *  have a input buffer specified using this function, then the input buffer
+ *  specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
+ *  a non-host policy specific version of data must be added using that API.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
+ *  @param host_policy_name All model instances executing with this host_policy
+ *  will use this input buffer for execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("int64_t") long memory_type_id, String host_policy_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id, @Cast("const char*") BytePointer host_policy_name);
 
-/** Set the model configuration in Triton server. Only the inputs, outputs,
- *  and max batch size can be changed. Any other changes to the model
- *  configuration will be ignored by Triton. This function can only be called
- *  from TRITONBACKEND_ModelInitialize, calling in any other context will result
- *  in an error being returned. The function does not take ownership of the
- *  message object and so the caller should call TRITONSERVER_MessageDelete to
- *  release the object once the function returns.
+/** Clear all input data from an input, releasing ownership of the
+ *  buffer(s) that were appended to the input with
+ *  TRITONSERVER_InferenceRequestAppendInputData or
+ *  TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
+ *  @param inference_request The request object.
+ *  @param name The name of the input. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Add an output request to an inference request.
  * 
- *  @param model The model.
- *  @param config_version The format version of the model configuration.
- *  If the configuration is not represented in the version's format
- *  then an error will be returned. Currently only version 1 is supported.
- *  @param model_config The updated model configuration as a message.
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelSetConfig(
-    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
-    TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
 
-/** Get the TRITONSERVER_Server object that this model is being served
- *  by.
+/** Remove an output request from an inference request.
  * 
- *  @param model The model.
- *  @param server Returns the server.
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model model, @Cast("TRITONSERVER_Server**") PointerPointer server);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
-    TRITONBACKEND_Model model, @ByPtrPtr TRITONSERVER_Server server);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
 
-/** Get the backend used by the model.
+/** Remove all output requests from an inference request.
  * 
- *  @param model The model.
- *  @param model Returns the backend object.
+ *  @param inference_request The request object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_Backend**") PointerPointer backend);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
-    TRITONBACKEND_Model model, @ByPtrPtr TRITONBACKEND_Backend backend);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
+    TRITONSERVER_InferenceRequest inference_request);
 
-/** Get the user-specified state associated with the model. The
- *  state is completely owned and managed by the backend.
+/** Set the release callback for an inference request. The release
+ *  callback is called by Triton to return ownership of the request
+ *  object.
  * 
- *  @param model The model.
- *  @param state Returns the user state, or nullptr if no user state.
+ *  @param inference_request The request object.
+ *  @param request_release_fn The function called to return ownership
+ *  of the 'inference_request' object.
+ *  @param request_release_userp User-provided pointer that is
+ *  delivered to the 'request_release_fn' callback.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
-    TRITONBACKEND_Model model, @Cast("void**") PointerPointer state);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
-    TRITONBACKEND_Model model, @Cast("void**") @ByPtrPtr Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetReleaseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
+    Pointer request_release_userp);
 
-/** Set the user-specified state associated with the model. The
- *  state is completely owned and managed by the backend.
+/** Set the allocator and response callback for an inference
+ *  request. The allocator is used to allocate buffers for any output
+ *  tensors included in responses that are produced for this
+ *  request. The response callback is called to return response
+ *  objects representing responses produced for this request.
  * 
- *  @param model The model.
- *  @param state The user state, or nullptr if no user state.
+ *  @param inference_request The request object.
+ *  @param response_allocator The TRITONSERVER_ResponseAllocator to use
+ *  to allocate buffers to hold inference results.
+ *  @param response_allocator_userp User-provided pointer that is
+ *  delivered to the response allocator's start and allocation functions.
+ *  @param response_fn The function called to deliver an inference
+ *  response for this request.
+ *  @param response_userp User-provided pointer that is delivered to
+ *  the 'response_fn' callback.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-
 ///
 ///
 ///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelSetState(
-    TRITONBACKEND_Model model, Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetResponseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_ResponseAllocator response_allocator,
+    Pointer response_allocator_userp,
+    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
+    Pointer response_userp);
 
-/**
- *  TRITONBACKEND_ModelInstance
+/** TRITONSERVER_InferenceResponse
  * 
- *  Object representing a model instance implemented using the
- *  backend.
+ *  Object representing an inference response. The inference response
+ *  provides the meta-data and output tensor values calculated by the
+ *  inference.
  * 
  <p>
- *  Get the name of the model instance. The returned string is owned by the
- *  model object, not the caller, and so should not be modified or
- *  freed.
+ *  Delete an inference response object.
  * 
- *  @param instance The model instance.
- *  @param name Returns the instance name.
+ *  @param inference_response The response object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const char**") PointerPointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr BytePointer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr ByteBuffer name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr byte[] name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseDelete(
+    TRITONSERVER_InferenceResponse inference_response);
 
-/** Get the kind of the model instance.
+/** Return the error status of an inference response. Return a
+ *  TRITONSERVER_Error object on failure, return nullptr on success.
+ *  The returned error object is owned by 'inference_response' and so
+ *  should not be deleted by the caller.
  * 
- *  @param instance The model instance.
- *  @param kind Returns the instance kind.
+ *  @param inference_response The response object.
+ *  @return a TRITONSERVER_Error indicating the success or failure
+ *  status of the response. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseError(
+    TRITONSERVER_InferenceResponse inference_response);
+
+/** Get model used to produce a response. The caller does not own the
+ *  returned model name value and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param model_name Returns the name of the model.
+ *  @param model_version Returns the version of the model.
+ *  this response.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
-    TRITONBACKEND_ModelInstance instance,
-    TRITONSERVER_InstanceGroupKind kind);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") PointerPointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr BytePointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr ByteBuffer model_name,
+    @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr byte[] model_name,
+    @Cast("int64_t*") long[] model_version);
 
-/** Get the device ID of the model instance.
+/** Get the ID of the request corresponding to a response. The caller
+ *  does not own the returned ID and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
  * 
- *  @param instance The model instance.
- *  @param device_id Returns the instance device ID.
+ *  @param inference_response The response object.
+ *  @param request_id Returns the ID of the request corresponding to
+ *  this response.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") PointerPointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr BytePointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr ByteBuffer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr byte[] request_id);
+
+/** Get the number of parameters available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of parameters.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance instance, IntPointer device_id);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance instance, IntBuffer device_id);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
-    TRITONBACKEND_ModelInstance instance, int[] device_id);
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
 
-/** Get the host policy setting.  The 'host_policy' message is
- *  owned by Triton and should not be modified or freed by the caller.
+/** Get all information about a parameter. The caller does not own any
+ *  of the returned values and must not modify or delete them. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
  * 
- *  The host policy setting, as JSON, is:
+ *  The 'vvalue' returns a void* pointer that must be cast
+ *  appropriately based on 'type'. For example:
  * 
- *    {
- *      "<host_policy>" : {
- *        "<setting>" : "<value>",
+ *    void* vvalue;
+ *    TRITONSERVER_ParameterType type;
+ *    TRITONSERVER_InferenceResponseParameter(
+ *                      response, index, &name, &type, &vvalue);
+ *    switch (type) {
+ *      case TRITONSERVER_PARAMETER_BOOL:
+ *        bool value = *(reinterpret_cast<bool*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_INT:
+ *        int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_STRING:
+ *        const char* value = reinterpret_cast<const char*>(vvalue);
  *        ...
- *      }
- *    }
  * 
- *  @param instance The model instance.
- *  @param host_policy Returns the host policy setting as a message.
+ *  @param inference_response The response object.
+ *  @param index The index of the parameter, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseParameterCount.
+ *  @param name Returns the name of the parameter.
+ *  @param type Returns the type of the parameter.
+ *  @param vvalue Returns a pointer to the parameter value.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance instance, @Cast("TRITONSERVER_Message**") PointerPointer host_policy);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
-    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONSERVER_Message host_policy);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") PointerPointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_ParameterType*") IntBuffer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_ParameterType*") int[] type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
 
-/** Whether the model instance is passive.
- * 
- *  @param instance The model instance.
- *  @param is_passive Returns true if the instance is passive, false otherwise
+/** Get the number of outputs available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of output tensors.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
 
-/** Get the number of optimization profiles to be loaded for the instance.
+/** Get all information about an output tensor.  The tensor data is
+ *  returned as the base pointer to the data and the size, in bytes,
+ *  of the data. The caller does not own any of the returned values
+ *  and must not modify or delete them. The lifetime of all returned
+ *  values extends until 'inference_response' is deleted.
  * 
- *  @param instance The model instance.
- *  @param count Returns the number of optimization profiles.
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param name Returns the name of the output.
+ *  @param datatype Returns the type of the output.
+ *  @param shape Returns the shape of the output.
+ *  @param dim_count Returns the number of dimensions of the returned
+ *  shape.
+ *  @param base Returns the tensor data for the output.
+ *  @param byte_size Returns the size, in bytes, of the data.
+ *  @param memory_type Returns the memory type of the data.
+ *  @param memory_type_id Returns the memory type id of the data.
+ *  @param userp The user-specified value associated with the buffer
+ *  in TRITONSERVER_ResponseAllocatorAllocFn_t.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
-    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") PointerPointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint64_t*") LongBuffer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint64_t*") long[] dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
 
-/** Get the name of optimization profile. The caller does not own
- *  the returned string and must not modify or delete it. The lifetime
- *  of the returned string extends only as long as 'instance'.
+/** Get a classification label associated with an output for a given
+ *  index.  The caller does not own the returned label and must not
+ *  modify or delete it. The lifetime of all returned label extends
+ *  until 'inference_response' is deleted.
  * 
- *  @param instance The model instance.
- *  @param index The index of the optimization profile. Must be 0
- *  <= index < count, where count is the value returned by
- *  TRITONBACKEND_ModelInstanceProfileCount.
- *  @param profile_name Returns the name of the optimization profile
- *  corresponding to the index.
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param class_index The index of the class.
+ *  @param name Returns the label corresponding to 'class_index' or
+ *  nullptr if no label.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
-    @Cast("const char**") PointerPointer profile_name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr BytePointer profile_name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr ByteBuffer profile_name);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
-    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr byte[] profile_name);
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") PointerPointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr BytePointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr ByteBuffer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr byte[] label);
 
-/** Get the model associated with a model instance.
+
+/** TRITONSERVER_ServerOptions
  * 
- *  @param instance The model instance.
- *  @param backend Returns the model object.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *  Options to use when creating an inference server.
+ * 
+ <p>
+ *  Model control modes */
+public enum TRITONSERVER_ModelControlMode {
+  TRITONSERVER_MODEL_CONTROL_NONE(0),
+  TRITONSERVER_MODEL_CONTROL_POLL(1),
+  TRITONSERVER_MODEL_CONTROL_EXPLICIT(2);
 
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Model**") PointerPointer model);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
-    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Model model);
+    public final int value;
+    private TRITONSERVER_ModelControlMode(int v) { this.value = v; }
+    private TRITONSERVER_ModelControlMode(TRITONSERVER_ModelControlMode e) { this.value = e.value; }
+    public TRITONSERVER_ModelControlMode intern() { for (TRITONSERVER_ModelControlMode e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
-/** Get the user-specified state associated with the model
- *  instance. The state is completely owned and managed by the
- *  backend.
+/** Create a new server options object. The caller takes ownership of
+ *  the TRITONSERVER_ServerOptions object and must call
+ *  TRITONSERVER_ServerOptionsDelete to release the object.
  * 
- *  @param instance The model instance.
- *  @param state Returns the user state, or nullptr if no user state.
+ *  @param options Returns the new server options object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance instance, @Cast("void**") PointerPointer state);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
-    TRITONBACKEND_ModelInstance instance, @Cast("void**") @ByPtrPtr Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @Cast("TRITONSERVER_ServerOptions**") PointerPointer options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @ByPtrPtr TRITONSERVER_ServerOptions options);
 
-/** Set the user-specified state associated with the model
- *  instance. The state is completely owned and managed by the
- *  backend.
+/** Delete a server options object.
  * 
- *  @param instance The model instance.
- *  @param state The user state, or nullptr if no user state.
+ *  @param options The server options object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSetState(
-    TRITONBACKEND_ModelInstance instance, Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsDelete(
+    TRITONSERVER_ServerOptions options);
 
-/** Record statistics for an inference request.
- * 
- *  Set 'success' true to indicate that the inference request
- *  completed successfully. In this case all timestamps should be
- *  non-zero values reported in nanoseconds and should be collected
- *  using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
- *  Set 'success' to false to indicate that the inference request failed
- *  to complete successfully. In this case all timestamps values are
- *  ignored.
- * 
- *  For consistency of measurement across different backends, the
- *  timestamps should be collected at the following points during
- *  TRITONBACKEND_ModelInstanceExecute.
- * 
- *    TRITONBACKEND_ModelInstanceExecute()
- *      CAPTURE TIMESPACE (exec_start_ns)
- *      < process input tensors to prepare them for inference
- *        execution, including copying the tensors to/from GPU if
- *        necessary>
- *      CAPTURE TIMESPACE (compute_start_ns)
- *      < perform inference computations to produce outputs >
- *      CAPTURE TIMESPACE (compute_end_ns)
- *      < allocate output buffers and extract output tensors, including
- *        copying the tensors to/from GPU if necessary>
- *      CAPTURE TIMESPACE (exec_end_ns)
- *      return
- * 
- *  Note that these statistics are associated with a valid
- *  TRITONBACKEND_Request object and so must be reported before the
- *  request is released. For backends that release the request before
- *  all response(s) are sent, these statistics cannot capture
- *  information about the time required to produce the response.
+/** Set the textual ID for the server in a server options. The ID is a
+ *  name that identifies the server.
  * 
- *  @param instance The model instance.
- *  @param request The inference request that statistics are being
- *  reported for.
- *  @param success True if the inference request completed
- *  successfully, false if it failed to complete.
- *  @param exec_start_ns Timestamp for the start of execution.
- *  @param compute_start_ns Timestamp for the start of execution
- *  computations.
- *  @param compute_end_ns Timestamp for the end of execution
- *  computations.
- *  @param exec_end_ns Timestamp for the end of execution.
+ *  @param options The server options object.
+ *  @param server_id The server identifier.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportStatistics(
-    TRITONBACKEND_ModelInstance instance, TRITONBACKEND_Request request,
-    @Cast("const bool") boolean success, @Cast("const uint64_t") long exec_start_ns,
-    @Cast("const uint64_t") long compute_start_ns, @Cast("const uint64_t") long compute_end_ns,
-    @Cast("const uint64_t") long exec_end_ns);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, String server_id);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer server_id);
 
-/** Record statistics for the execution of an entire batch of
- *  inference requests.
- * 
- *  All timestamps should be non-zero values reported in nanoseconds
- *  and should be collected using
- *  std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
- *  See TRITONBACKEND_ModelInstanceReportStatistics for more information about
- *  the timestamps.
- * 
- *  'batch_size' is the sum of the batch sizes for the individual
- *  requests that were delivered together in the call to
- *  TRITONBACKEND_ModelInstanceExecute. For example, if three requests
- *  are passed to TRITONBACKEND_ModelInstanceExecute and those
- *  requests have batch size 1, 2, and 3; then 'batch_size' should be
- *  set to 6.
- * 
- *  @param instance The model instance.
- *  @param batch_size Combined batch size of all the individual
- *  requests executed in the batch.
- *  @param exec_start_ns Timestamp for the start of execution.
- *  @param compute_start_ns Timestamp for the start of execution
- *  computations.
- *  @param compute_end_ns Timestamp for the end of execution
- *  computations.
- *  @param exec_end_ns Timestamp for the end of execution.
+/** Set the model repository path in a server options. The path must be
+ *  the full absolute path to the model repository. This function can be called
+ *  multiple times with different paths to set multiple model repositories.
+ *  Note that if a model is not unique across all model repositories
+ *  at any time, the model will not be available.
+ * 
+ *  @param options The server options object.
+ *  @param model_repository_path The full path to the model repository.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-
-
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportBatchStatistics(
-    TRITONBACKEND_ModelInstance instance, @Cast("const uint64_t") long batch_size,
-    @Cast("const uint64_t") long exec_start_ns, @Cast("const uint64_t") long compute_start_ns,
-    @Cast("const uint64_t") long compute_end_ns, @Cast("const uint64_t") long exec_end_ns);
-
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, String model_repository_path);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_repository_path);
 
-/**
- *  The following functions can be implemented by a backend. Functions
- *  indicated as required must be implemented or the backend will fail
- *  to load.
+/** Set the model control mode in a server options. For each mode the models
+ *  will be managed as the following:
  * 
- <p>
- *  Initialize a backend. This function is optional, a backend is not
- *  required to implement it. This function is called once when a
- *  backend is loaded to allow the backend to initialize any state
- *  associated with the backend. A backend has a single state that is
- *  shared across all models that use the backend.
+ *    TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
+ *    loaded on startup. After startup any changes to the model repository will
+ *    be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
+ *    an error.
  * 
- *  @param backend The backend.
+ *    TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
+ *    loaded on startup. The model repository can be polled periodically using
+ *    TRITONSERVER_ServerPollModelRepository and the server will load, unload,
+ *    and updated models according to changes in the model repository.
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
+ *    not be loaded on startup. The corresponding model control APIs must be
+ *    called to load / unload a model in the model repository.
+ * 
+ *  @param options The server options object.
+ *  @param mode The mode to use for the model control.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_Initialize(
-    TRITONBACKEND_Backend backend);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
+    TRITONSERVER_ServerOptions options, TRITONSERVER_ModelControlMode mode);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
+    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_ModelControlMode") int mode);
 
-/** Finalize for a backend. This function is optional, a backend is
- *  not required to implement it. This function is called once, just
- *  before the backend is unloaded. All state associated with the
- *  backend should be freed and any threads created for the backend
- *  should be exited/joined before returning from this function.
+/** Set the model to be loaded at startup in a server options. The model must be
+ *  present in one, and only one, of the specified model repositories.
+ *  This function can be called multiple times with different model name
+ *  to set multiple startup models.
+ *  Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
  * 
- *  @param backend The backend.
+ *  @param options The server options object.
+ *  @param mode_name The name of the model to load on startup.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_Finalize(
-    TRITONBACKEND_Backend backend);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_name);
 
-/** Initialize for a model. This function is optional, a backend is
- *  not required to implement it. This function is called once when a
- *  model that uses the backend is loaded to allow the backend to
- *  initialize any state associated with the model. The backend should
- *  also examine the model configuration to determine if the
- *  configuration is suitable for the backend. Any errors reported by
- *  this function will prevent the model from loading.
+/** Enable or disable strict model configuration handling in a server
+ *  options.
  * 
- *  @param model The model.
+ *  @param options The server options object.
+ *  @param strict True to enable strict model configuration handling,
+ *  false to disable.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInitialize(
-    TRITONBACKEND_Model model);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictModelConfig(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
 
-/** Finalize for a model. This function is optional, a backend is not
- *  required to implement it. This function is called once for a
- *  model, just before the model is unloaded from Triton. All state
- *  associated with the model should be freed and any threads created
- *  for the model should be exited/joined before returning from this
- *  function.
+/** Set the total pinned memory byte size that the server can allocate
+ *  in a server options. The pinned memory pool will be shared across
+ *  Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
  * 
- *  @param model The model.
+ *  @param options The server options object.
+ *  @param size The pinned memory pool byte size.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelFinalize(
-    TRITONBACKEND_Model model);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, @Cast("uint64_t") long size);
 
-/** Initialize for a model instance. This function is optional, a
- *  backend is not required to implement it. This function is called
- *  once when a model instance is created to allow the backend to
- *  initialize any state associated with the instance.
+/** Set the total CUDA memory byte size that the server can allocate
+ *  on given GPU device in a server options. The pinned memory pool
+ *  will be shared across Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
  * 
- *  @param instance The model instance.
+ *  @param options The server options object.
+ *  @param gpu_device The GPU device to allocate the memory pool.
+ *  @param size The CUDA memory pool byte size.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceInitialize(
-    TRITONBACKEND_ModelInstance instance);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, int gpu_device, @Cast("uint64_t") long size);
 
-/** Finalize for a model instance. This function is optional, a
- *  backend is not required to implement it. This function is called
- *  once for an instance, just before the corresponding model is
- *  unloaded from Triton. All state associated with the instance
- *  should be freed and any threads created for the instance should be
- *  exited/joined before returning from this function.
+/** Set the minimum support CUDA compute capability in a server
+ *  options.
  * 
- *  @param instance The model instance.
+ *  @param options The server options object.
+ *  @param cc The minimum CUDA compute capability.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceFinalize(
-    TRITONBACKEND_ModelInstance instance);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+    TRITONSERVER_ServerOptions options, double cc);
 
-/** Execute a batch of one or more requests on a model instance. This
- *  function is required. Triton will not perform multiple
- *  simultaneous calls to this function for a given model 'instance';
- *  however, there may be simultaneous calls for different model
- *  instances (for the same or different models).
- * 
- *  If an error is returned the ownership of the request objects
- *  remains with Triton and the backend must not retain references to
- *  the request objects or access them in any way.
- * 
- *  If success is returned, ownership of the request objects is
- *  transferred to the backend and it is then responsible for creating
- *  responses and releasing the request objects.
+/** Enable or disable exit-on-error in a server options.
  * 
- *  @param instance The model instance.
- *  @param requests The requests.
- *  @param request_count The number of requests in the batch.
+ *  @param options The server options object.
+ *  @param exit True to enable exiting on intialization error, false
+ *  to continue.
  *  @return a TRITONSERVER_Error indicating success or failure. */
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Request**") PointerPointer requests,
-    @Cast("const uint32_t") int request_count);
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
-    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Request requests,
-    @Cast("const uint32_t") int request_count);
 
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitOnError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean exit);
 
-// #ifdef __cplusplus
-// #endif
+/** Enable or disable strict readiness handling in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param strict True to enable strict readiness handling, false to
+ *  disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictReadiness(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
 
-// Parsed from tritonrepoagent.h
+/** Set the exit timeout, in seconds, for the server in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param timeout The exit timeout, in seconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// #pragma once
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitTimeout(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int timeout);
 
-// #include <stddef.h>
-// #include <stdint.h>
-// #include "triton/core/tritonserver.h"
+/** Set the number of threads used in buffer manager in a server options.
+ * 
+ *  @param thread_count The number of threads.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-// #ifdef __cplusplus
-// #endif
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int thread_count);
 
-// #ifdef _COMPILING_TRITONREPOAGENT
-// #if defined(_MSC_VER)
-// #define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
-// #define TRITONREPOAGENT_ISPEC __declspec(dllimport)
-// #elif defined(__GNUC__)
-// #define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
-// #define TRITONREPOAGENT_ISPEC
-// #else
-// #define TRITONREPOAGENT_DECLSPEC
-// #define TRITONREPOAGENT_ISPEC
-// #endif
-// #else
-// #if defined(_MSC_VER)
-// #define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
-// #define TRITONREPOAGENT_ISPEC __declspec(dllexport)
-// #else
-// #define TRITONREPOAGENT_DECLSPEC
-// #define TRITONREPOAGENT_ISPEC
-// Targeting ../tritonserver/TRITONREPOAGENT_Agent.java
+/** Enable or disable info level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable info logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogInfo(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
 
+/** Enable or disable warning level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable warning logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-// Targeting ../tritonserver/TRITONREPOAGENT_AgentModel.java
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogWarn(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
 
+/** Enable or disable error level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable error logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
 
-/**
- *  TRITONREPOAGENT API Version
+/** Set verbose logging level. Level zero disables verbose logging.
  * 
- *  The TRITONREPOAGENT API is versioned with major and minor version
- *  numbers. Any change to the API that does not impact backwards
- *  compatibility (for example, adding a non-required function)
- *  increases the minor version number. Any change that breaks
- *  backwards compatibility (for example, deleting or changing the
- *  behavior of a function) increases the major version number. A
- *  repository agent should check that the API version used to compile
- *  the agent is compatible with the API version of the Triton server
- *  that it is running in. This is typically done by code similar to
- *  the following which makes sure that the major versions are equal
- *  and that the minor version of Triton is >= the minor version used
- *  to build the agent.
+ *  @param options The server options object.
+ *  @param level The verbose logging level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogVerbose(
+    TRITONSERVER_ServerOptions options, int level);
+
+/** Enable or disable metrics collection in a server options.
  * 
- *    uint32_t api_version_major, api_version_minor;
- *    TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
- *    if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
- *        (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
- *      return TRITONSERVER_ErrorNew(
- *        TRITONSERVER_ERROR_UNSUPPORTED,
- *        "triton repository agent API version does not support this agent");
- *    }
- *  */
-public static final int TRITONREPOAGENT_API_VERSION_MAJOR = 0;
+ *  @param options The server options object.
+ *  @param metrics True to enable metrics, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static final int TRITONREPOAGENT_API_VERSION_MINOR = 1;
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean metrics);
 
-/** Get the TRITONREPOAGENT API version supported by Triton. This
- *  value can be compared against the
- *  TRITONREPOAGENT_API_VERSION_MAJOR and
- *  TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
- *  ensure that Triton is compatible with the agent.
+/** Enable or disable GPU metrics collection in a server options. GPU
+ *  metrics are collected if both this option and
+ *  TRITONSERVER_ServerOptionsSetMetrics are true.
  * 
- *  @param major Returns the TRITONREPOAGENT API major version supported
- *  by Triton.
- *  @param minor Returns the TRITONREPOAGENT API minor version supported
- *  by Triton.
+ *  @param options The server options object.
+ *  @param gpu_metrics True to enable GPU metrics, false to disable.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetGpuMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean gpu_metrics);
+
+/** Set the directory containing backend shared libraries. This
+ *  directory is searched last after the version and model directory
+ *  in the model repository when looking for the backend shared
+ *  library for a model. If the backend is named 'be' the directory
+ *  searched is 'backend_dir'/be/libtriton_be.so.
+ * 
+ *  @param options The server options object.
+ *  @param backend_dir The full path of the backend directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, String backend_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_dir);
+
+/** Set the directory containing repository agent shared libraries. This
+ *  directory is searched when looking for the repository agent shared
+ *  library for a model. If the backend is named 'ra' the directory
+ *  searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
+ * 
+ *  @param options The server options object.
+ *  @param repoagent_dir The full path of the repository agent directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, String repoagent_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer repoagent_dir);
+
+/** Set a configuration setting for a named backend in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param backend_name The name of the backend.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
-    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
-    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
-    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, String backend_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
 
-/** TRITONREPOAGENT_ArtifactType
+/** Set a host policy setting for a given policy name in a server options.
  * 
- *  The ways that the files that make up a model's repository content
- *  are communicated between Triton and the agent.
+ *  @param options The server options object.
+ *  @param policy_name The name of the policy.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, String policy_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer policy_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+
+/** TRITONSERVER_Server
  * 
- *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
- *      communicated to and from the repository agent via a locally
- *      accessible filesystem. The agent can access these files using
- *      an appropriate filesystem API.
+ *  An inference server.
  * 
- *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
- *      communicated to and from the repository agent via a remote filesystem.
- *      The remote filesystem path follows the same convention as is used for
- *      repository paths, for example, "s3://" prefix indicates an S3 path.
- *  */
-public enum TRITONREPOAGENT_ArtifactType {
-  TRITONREPOAGENT_ARTIFACT_FILESYSTEM(0),
-  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM(1);
+ <p>
+ *  Model batch flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_ModelBatchFlag {
+  TRITONSERVER_BATCH_UNKNOWN(1),
+  TRITONSERVER_BATCH_FIRST_DIM(2);
 
     public final int value;
-    private TRITONREPOAGENT_ArtifactType(int v) { this.value = v; }
-    private TRITONREPOAGENT_ArtifactType(TRITONREPOAGENT_ArtifactType e) { this.value = e.value; }
-    public TRITONREPOAGENT_ArtifactType intern() { for (TRITONREPOAGENT_ArtifactType e : values()) if (e.value == value) return e; return this; }
+    private TRITONSERVER_ModelBatchFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelBatchFlag(TRITONSERVER_ModelBatchFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelBatchFlag intern() { for (TRITONSERVER_ModelBatchFlag e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-/** TRITONREPOAGENT_ActionType
- * 
- *  Types of repository actions that can be handled by an agent.
- *  The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
- *  TRITONREPOAGENT_ModelInitialize and ends with a call to
- *  TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
- *  state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
- *  Possible lifecycles are:
- * 
- *  LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
- *  LOAD -> LOAD_FAIL
- * 
- *    TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
- * 
- *    TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
- *      successfully and the model is now loaded.
- * 
- *    TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
- *      successfully. The model is not loaded.
- * 
- *    TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
- * 
- *    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
- *  */
-public enum TRITONREPOAGENT_ActionType {
-  TRITONREPOAGENT_ACTION_LOAD(0),
-  TRITONREPOAGENT_ACTION_LOAD_COMPLETE(1),
-  TRITONREPOAGENT_ACTION_LOAD_FAIL(2),
-  TRITONREPOAGENT_ACTION_UNLOAD(3),
-  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE(4);
+/** Model index flags. The enum values must be power-of-2 values. */
+public enum TRITONSERVER_ModelIndexFlag {
+  TRITONSERVER_INDEX_FLAG_READY(1);
 
     public final int value;
-    private TRITONREPOAGENT_ActionType(int v) { this.value = v; }
-    private TRITONREPOAGENT_ActionType(TRITONREPOAGENT_ActionType e) { this.value = e.value; }
-    public TRITONREPOAGENT_ActionType intern() { for (TRITONREPOAGENT_ActionType e : values()) if (e.value == value) return e; return this; }
+    private TRITONSERVER_ModelIndexFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelIndexFlag(TRITONSERVER_ModelIndexFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelIndexFlag intern() { for (TRITONSERVER_ModelIndexFlag e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-/** Get the location of the files that make up the model. The
- *  'location' communicated depends on how the model is being
- *  communicated to the agent as indicated by 'artifact_type'.
+/** Model transaction policy flags. The enum values must be
+ *  power-of-2 values. */
+public enum TRITONSERVER_ModelTxnPropertyFlag {
+  TRITONSERVER_TXN_ONE_TO_ONE(1),
+  TRITONSERVER_TXN_DECOUPLED(2);
+
+    public final int value;
+    private TRITONSERVER_ModelTxnPropertyFlag(int v) { this.value = v; }
+    private TRITONSERVER_ModelTxnPropertyFlag(TRITONSERVER_ModelTxnPropertyFlag e) { this.value = e.value; }
+    public TRITONSERVER_ModelTxnPropertyFlag intern() { for (TRITONSERVER_ModelTxnPropertyFlag e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Create a new server object. The caller takes ownership of the
+ *  TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
+ *  to release the object.
  * 
- *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
- *      made available to the agent via the local
- *      filesytem. 'location' returns the full path to the directory
- *      in the model repository that contains the model's
- *      artifacts. The returned location string is owned by Triton,
- *      not the caller, and so should not be modified or freed. The
- *      contents of the directory are owned by Triton, not the agent,
- *      and so the agent should not delete or modify the contents. Use
- *      TRITONREPOAGENT_RepositoryAcquire to get a location that can be
- *      used to modify the model repository contents.
+ *  @param server Returns the new inference server object.
+ *  @param options The inference server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @Cast("TRITONSERVER_Server**") PointerPointer server, TRITONSERVER_ServerOptions options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @ByPtrPtr TRITONSERVER_Server server, TRITONSERVER_ServerOptions options);
+
+/** Delete a server object. If server is not already stopped it is
+ *  stopped before being deleted.
  * 
- *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
- *      made available to the agent via a remote filesystem.
- *      'location' returns the full path to the remote directory that contains
- *      the model's artifacts. The returned location string is owned by Triton,
- *      not the caller, and so should not be modified or freed. The contents of
- *      the remote directory are owned by Triton, not the agent,
- *      and so the agent should not delete or modify the contents.
- *      Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
- *      that can be used to write updated model repository contents.
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerDelete(
+    TRITONSERVER_Server server);
+
+/** Stop a server object. A server can't be restarted once it is
+ *  stopped.
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param artifact_type Returns the artifact type for the location.
- *  @param path Returns the location.
+ *  @param server The inference server object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
+public static native TRITONSERVER_Error TRITONSERVER_ServerStop(
+    TRITONSERVER_Server server);
+
+/** Check the model repository for changes and update server state
+ *  based on those changes.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") PointerPointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType*") IntBuffer artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType*") int[] artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server server);
 
-/** Acquire a location where the agent can produce a new version of
- *  the model repository files. This is a convenience method to create
- *  a temporary directory for the agent. The agent is responsible for
- *  calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
- *  TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
- *  acquired location is empty. The 'location' communicated depends on
- *  the requested 'artifact_type'.
+/** Is the server live?
  * 
- *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
- *      on the local filesystem. 'location' returns the full path to
- *      an empty directory that the agent should populate with the
- *      model's artifacts. The returned location string is owned by
- *      Triton, not the agent, and so should not be modified or freed.
+ *  @param server The inference server object.
+ *  @param live Returns true if server is live, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] live);
+
+/** Is the server ready?
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param artifact_type The artifact type for the location.
- *  @param path Returns the location.
+ *  @param server The inference server object.
+ *  @param ready Returns true if server is ready, false otherwise.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") PointerPointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] ready);
 
-/** Discard and release ownership of a previously acquired location
- *  and its contents. The agent must not access or modify the location
- *  or its contents after this call.
+/** Is the model ready?
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param path The location to release.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model to get readiness for.
+ *  @param model_version The version of the model to get readiness
+ *  for.  If -1 then the server will choose a version based on the
+ *  model's policy.
+ *  @param ready Returns true if server is ready, false otherwise.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    String location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const char*") BytePointer location);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
 
-/** Inform Triton that the specified repository location should be used for
- *  the model in place of the original model repository. This method can only be
- *  called when TRITONREPOAGENT_ModelAction is invoked with
- *  TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
- *  communicated depends on how the repository is being
- *  communicated to Triton as indicated by 'artifact_type'.
+/** Get the batch properties of the model. The properties are
+ *  communicated by a flags value and an (optional) object returned by
+ *  'voidp'.
  * 
- *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
- *      made available to Triton via the local filesytem. 'location' returns
- *      the full path to the directory. Ownership of the contents of the
- *      returned directory are transferred to Triton and the agent should not
- *      modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
- *      The local filesystem directory can be created using
- *      TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
- *      its own local filesystem API.
+ *    - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
+ *      batching properties of the model. This means that the model
+ *      does not support batching in any way that is useable by
+ *      Triton. The returned 'voidp' value is nullptr.
  * 
- *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
- *      made available to Triton via a remote filesystem. 'location' returns
- *      the full path to the remote filesystem directory. Ownership of the
- *      contents of the returned directory are transferred to Triton and
- *      the agent should not modified or freed the contents until
- *      TRITONREPOAGENT_ModelFinalize.
+ *    - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
+ *      along the first dimension of every input and output
+ *      tensor. Triton schedulers that perform batching can
+ *      automatically batch inference requests along this dimension.
+ *      The returned 'voidp' value is nullptr.
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param artifact_type The artifact type for the location.
- *  @param path Returns the location.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param flags Returns flags indicating the batch properties of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the
+ *  'flags' value.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, String location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
-
-/** Get the number of agent parameters defined for a model.
- * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param count Returns the number of input tensors.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("uint32_t*") int[] count);
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
 
-/** Get a parameter name and value. The caller does not own the
- *  returned strings and must not modify or delete them.
+/** Get the transaction policy of the model. The policy is
+ *  communicated by a flags value.
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param index The index of the parameter. Must be 0 <= index <
- *  count, where count is the value returned by
- *  TRITONREPOAGENT_ModelParameterCount.
- *  @param parameter_name Returns the name of the parameter.
- *  @param parameter_value Returns the value of the parameter.
+ *    - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
+ *      one response per request.
+ * 
+ *    - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
+ *      to many responses per request.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param txn_flags Returns flags indicating the transaction policy of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the 'flags' value.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int index, @Cast("const char**") PointerPointer parameter_name,
-    @Cast("const char**") PointerPointer parameter_value);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer parameter_name,
-    @Cast("const char**") @ByPtrPtr BytePointer parameter_value);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer parameter_name,
-    @Cast("const char**") @ByPtrPtr ByteBuffer parameter_value);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] parameter_name,
-    @Cast("const char**") @ByPtrPtr byte[] parameter_value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
 
-/** Get the model configuration. The caller takes ownership of the
- *  message object and must call TRITONSERVER_MessageDelete to release
- *  the object. If the model repository does not contain a
- *  config.pbtxt file then 'model_config' is returned as nullptr.
+/** Get the metadata of the server as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
  * 
- *  @param agent The agent.
- *  @param model The model.
- *  @param config_version The model configuration will be returned in
- *  a format matching this version. If the configuration cannot be
- *  represented in the requested version's format then an error will
- *  be returned. Currently only version 1 is supported.
- *  @param model_config Returns the model configuration as a message.
+ *  @param server The inference server object.
+ *  @param server_metadata Returns the server metadata message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int config_version, @Cast("TRITONSERVER_Message**") PointerPointer model_config);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("const uint32_t") int config_version, @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Message**") PointerPointer server_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Message server_metadata);
 
-/** Get the user-specified state associated with the model.
+/** Get the metadata of a model as a TRITONSERVER_Message
+ *  object.  The caller takes ownership of the message object and must
+ *  call TRITONSERVER_MessageDelete to release the object.
  * 
- *  @param model The agent model.
- *  @param state Returns the user state, or nullptr if no user state.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.
+ *  If -1 then the server will choose a version based on the model's
+ *  policy.
+ *  @param model_metadata Returns the model metadata message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
-    TRITONREPOAGENT_AgentModel model, @Cast("void**") PointerPointer state);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
-    TRITONREPOAGENT_AgentModel model, @Cast("void**") @ByPtrPtr Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
 
-/** Set the user-specified state associated with the model.
+/** Get the statistics of a model as a TRITONSERVER_Message
+ *  object. The caller takes ownership of the object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
  * 
- *  @param model The agent model.
- *  @param state The user state, or nullptr if no user state.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  If empty, then statistics for all available models will be returned,
+ *  and the server will choose a version based on those models' policies.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param model_stats Returns the model statistics message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelSetState(
-    TRITONREPOAGENT_AgentModel model, Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
 
-/** Get the user-specified state associated with the agent.
+/** Get the configuration of a model as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
  * 
- *  @param agent The agent.
- *  @param state Returns the user state, or nullptr if no user state.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model config message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_State(
-    TRITONREPOAGENT_Agent agent, @Cast("void**") PointerPointer state);
-public static native TRITONSERVER_Error TRITONREPOAGENT_State(
-    TRITONREPOAGENT_Agent agent, @Cast("void**") @ByPtrPtr Pointer state);
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
 
-/** Set the user-specified state associated with the agent.
+/** Get the index of all unique models in the model repositories as a
+ *  TRITONSERVER_Message object. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object.
  * 
- *  @param agent The agent.
- *  @param state The user state, or nullptr if no user state.
+ *  If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
+ *  that are loaded into the server and ready for inferencing are
+ *  returned.
+ * 
+ *  @param server The inference server object.
+ *  @param flags TRITONSERVER_ModelIndexFlag flags that control how to
+ *  collect the index.
+ *  @param model_index Return the model index message that holds the
+ *  index of all models contained in the server's model repository(s).
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-
-///
-///
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_SetState(
-    TRITONREPOAGENT_Agent agent, Pointer state);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_index);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @ByPtrPtr TRITONSERVER_Message model_index);
 
-/**
- *  The following functions can be implemented by an agent. Functions
- *  indicated as required must be implemented or the agent will fail
- *  to load.
- * 
- <p>
- *  Initialize an agent. This function is optional. This function is
- *  called once when an agent is loaded to allow the agent to
- *  initialize any state associated with the agent. An agent has a
- *  single state that is shared across all invocations of the agent.
+/** Load the requested model or reload the model if it is already
+ *  loaded. The function does not return until the model is loaded or
+ *  fails to load. Returned error indicates if model loaded
+ *  successfully or not.
  * 
- *  @param agent The agent.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_Initialize(
-    TRITONREPOAGENT_Agent agent);
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
 
-/** Finalize for an agent. This function is optional. This function is
- *  called once, just before the agent is unloaded. All state
- *  associated with the agent should be freed and any threads created
- *  for the agent should be exited/joined before returning from this
- *  function.
+/** Unload the requested model. Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model to be fully unload
+ *  and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
  * 
- *  @param agent The agent.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_Finalize(
-    TRITONREPOAGENT_Agent agent);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
 
-/** Initialize a model associated with an agent. This function is optional.
- *  This function is called once when an agent model's lifecycle begins to allow
- *  the agent model to initialize any state associated with it. An agent model
- *  has a single state that is shared across all the lifecycle of the agent
- *  model.
+/** Unload the requested model, and also unload any dependent model that
+ *  was loaded along with the requested model (for example, the models composing
+ *  an ensemble). Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model and all dependent
+ *  models to be fully unload and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
  * 
- *  @param agent The agent to be associated with the model.
- *  @param model The model.
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelInitialize(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
 
-/** Finalize for a model. This function is optional. This function is
- *  called once, just before the end of the agent model's lifecycle. All state
- *  associated with the agent model should be freed and any threads created
- *  for the agent model should be exited/joined before returning from this
- *  function. If the model acquired a model location using
- *  TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
- *  TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
+/** Get the current metrics for the server. The caller takes ownership
+ *  of the metrics object and must call TRITONSERVER_MetricsDelete to
+ *  release the object.
  * 
- *  @param agent The agent associated with the model.
- *  @param model The model.
+ *  @param server The inference server object.
+ *  @param metrics Returns the metrics.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelFinalize(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Metrics**") PointerPointer metrics);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Metrics metrics);
 
-/** Handle an action for a specified model. This function is
- *  required. Triton will not perform multiple simultaneous calls to
- *  this function for a given agent and model; however, there may be
- *  simultaneous calls for the agent for different models.
- * 
- *  If the agent does not handle the action the agent should
- *  immediately return success (nullptr).
+/** Perform inference using the meta-data and inputs supplied by the
+ *  'inference_request'. If the function returns success, then the
+ *  caller releases ownership of 'inference_request' and must not
+ *  access it in any way after this call, until ownership is returned
+ *  via the 'request_release_fn' callback registered in the request
+ *  object with TRITONSERVER_InferenceRequestSetReleaseCallback.
  * 
- *  Any modification to the model's repository must be made when 'action_type'
- *  is TRITONREPOAGENT_ACTION_LOAD.
- *  To modify the model's repository the agent must either acquire a mutable
- *  location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
- *  or its own managed location, report the location to Triton via
- *  TRITONREPOAGENT_ModelRepositoryUpdate, and then return
- *  success (nullptr). If the agent does not need to make any changes
- *  to the model repository it should not call
- *  TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
- *  To indicate that a model load should fail return a non-success status.
+ *  The function unconditionally takes ownership of 'trace' and so the
+ *  caller must not access it in any way after this call (except in
+ *  the trace id callback) until ownership is returned via the trace's
+ *  release_fn callback.
  * 
- *  @param agent The agent.
- *  @param model The model that is the target of the action.
- *  \action_type The type of action the agent should handle for the model.
+ *  Responses produced for this request are returned using the
+ *  allocator and callback registered with the request by
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ * 
+ *  @param server The inference server object.
+ *  @param inference_request The request object.
+ *  @param trace The trace object for this request, or nullptr if no
+ *  tracing.
  *  @return a TRITONSERVER_Error indicating success or failure. */
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ActionType action_type);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ActionType") int action_type);
+public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
+    TRITONSERVER_Server server,
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceTrace trace);
+
 
 // #ifdef __cplusplus
 // #endif
 
 
-// Parsed from tritonserver.h
+// Parsed from tritonbackend.h
 
 // Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 //
@@ -2207,2192 +2278,2134 @@ public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 // #pragma once
 
-/** \file */
-
-// #include <stdbool.h>
 // #include <stddef.h>
 // #include <stdint.h>
+// #include "triton/core/tritonserver.h"
 
 // #ifdef __cplusplus
 // #endif
 
-// #ifdef _COMPILING_TRITONSERVER
+// #ifdef _COMPILING_TRITONBACKEND
 // #if defined(_MSC_VER)
-// #define TRITONSERVER_DECLSPEC __declspec(dllexport)
+// #define TRITONBACKEND_DECLSPEC __declspec(dllexport)
+// #define TRITONBACKEND_ISPEC __declspec(dllimport)
 // #elif defined(__GNUC__)
-// #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONBACKEND_ISPEC
 // #else
-// #define TRITONSERVER_DECLSPEC
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
 // #endif
 // #else
 // #if defined(_MSC_VER)
-// #define TRITONSERVER_DECLSPEC __declspec(dllimport)
+// #define TRITONBACKEND_DECLSPEC __declspec(dllimport)
+// #define TRITONBACKEND_ISPEC __declspec(dllexport)
 // #else
-// #define TRITONSERVER_DECLSPEC
-// Targeting ../tritonserver/TRITONSERVER_Error.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_InferenceRequest.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_InferenceResponse.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_InferenceTrace.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_Message.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_Metrics.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_ResponseAllocator.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_Server.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_ServerOptions.java
-
-
-
-/**
- *  TRITONSERVER API Version
- * 
- *  The TRITONSERVER API is versioned with major and minor version
- *  numbers. Any change to the API that does not impact backwards
- *  compatibility (for example, adding a non-required function)
- *  increases the minor version number. Any change that breaks
- *  backwards compatibility (for example, deleting or changing the
- *  behavior of a function) increases the major version number. A
- *  client should check that the API version used to compile the
- *  client is compatible with the API version of the Triton shared
- *  library that it is linking against. This is typically done by code
- *  similar to the following which makes sure that the major versions
- *  are equal and that the minor version of the Triton shared library
- *  is >= the minor version used to build the client.
- * 
- *    uint32_t api_version_major, api_version_minor;
- *    TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
- *    if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
- *        (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
- *      return TRITONSERVER_ErrorNew(
- *        TRITONSERVER_ERROR_UNSUPPORTED,
- *        "triton server API version does not support this client");
- *    }
- *  */
-public static final int TRITONSERVER_API_VERSION_MAJOR = 1;
-
-///
-public static final int TRITONSERVER_API_VERSION_MINOR = 3;
-
-/** Get the TRITONBACKEND API version supported by the Triton shared
- *  library. This value can be compared against the
- *  TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
- *  used to build the client to ensure that Triton shared library is
- *  compatible with the client.
- * 
- *  @param major Returns the TRITONSERVER API major version supported
- *  by Triton.
- *  @param minor Returns the TRITONSERVER API minor version supported
- *  by Triton.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-///
-public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
-    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
-public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
-    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
-public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
-    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
-
-/** TRITONSERVER_DataType
- * 
- *  Tensor data types recognized by TRITONSERVER.
- *  */
-public enum TRITONSERVER_DataType {
-  TRITONSERVER_TYPE_INVALID(0),
-  TRITONSERVER_TYPE_BOOL(1),
-  TRITONSERVER_TYPE_UINT8(2),
-  TRITONSERVER_TYPE_UINT16(3),
-  TRITONSERVER_TYPE_UINT32(4),
-  TRITONSERVER_TYPE_UINT64(5),
-  TRITONSERVER_TYPE_INT8(6),
-  TRITONSERVER_TYPE_INT16(7),
-  TRITONSERVER_TYPE_INT32(8),
-  TRITONSERVER_TYPE_INT64(9),
-  TRITONSERVER_TYPE_FP16(10),
-  TRITONSERVER_TYPE_FP32(11),
-  TRITONSERVER_TYPE_FP64(12),
-  TRITONSERVER_TYPE_BYTES(13);
-
-    public final int value;
-    private TRITONSERVER_DataType(int v) { this.value = v; }
-    private TRITONSERVER_DataType(TRITONSERVER_DataType e) { this.value = e.value; }
-    public TRITONSERVER_DataType intern() { for (TRITONSERVER_DataType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Get the string representation of a data type. The returned string
- *  is not owned by the caller and so should not be modified or freed.
- * 
- *  @param datatype The data type.
- *  @return The string representation of the data type. */
-
-///
-public static native String TRITONSERVER_DataTypeString(
-    TRITONSERVER_DataType datatype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_DataTypeString(
-    @Cast("TRITONSERVER_DataType") int datatype);
-
-/** Get the Triton datatype corresponding to a string representation
- *  of a datatype.
- * 
- *  @param dtype The datatype string representation.
- *  @return The Triton data type or TRITONSERVER_TYPE_INVALID if the
- *  string does not represent a data type. */
-
-///
-public static native TRITONSERVER_DataType TRITONSERVER_StringToDataType(String dtype);
-public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(@Cast("const char*") BytePointer dtype);
-
-/** Get the size of a Triton datatype in bytes. Zero is returned for
- *  TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
- *  returned for TRITONSERVER_TYPE_INVALID.
- * 
- *  @param dtype The datatype.
- *  @return The size of the datatype. */
-
-///
-///
-public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
-public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(@Cast("TRITONSERVER_DataType") int datatype);
-
-/** TRITONSERVER_MemoryType
- * 
- *  Types of memory recognized by TRITONSERVER.
- *  */
-public enum TRITONSERVER_MemoryType {
-  TRITONSERVER_MEMORY_CPU(0),
-  TRITONSERVER_MEMORY_CPU_PINNED(1),
-  TRITONSERVER_MEMORY_GPU(2);
-
-    public final int value;
-    private TRITONSERVER_MemoryType(int v) { this.value = v; }
-    private TRITONSERVER_MemoryType(TRITONSERVER_MemoryType e) { this.value = e.value; }
-    public TRITONSERVER_MemoryType intern() { for (TRITONSERVER_MemoryType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Get the string representation of a memory type. The returned
- *  string is not owned by the caller and so should not be modified or
- *  freed.
- * 
- *  @param memtype The memory type.
- *  @return The string representation of the memory type. */
-
-///
-///
-public static native String TRITONSERVER_MemoryTypeString(
-    TRITONSERVER_MemoryType memtype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_MemoryTypeString(
-    @Cast("TRITONSERVER_MemoryType") int memtype);
-
-/** TRITONSERVER_ParameterType
- * 
- *  Types of parameters recognized by TRITONSERVER.
- *  */
-public enum TRITONSERVER_ParameterType {
-  TRITONSERVER_PARAMETER_STRING(0),
-  TRITONSERVER_PARAMETER_INT(1),
-  TRITONSERVER_PARAMETER_BOOL(2);
-
-    public final int value;
-    private TRITONSERVER_ParameterType(int v) { this.value = v; }
-    private TRITONSERVER_ParameterType(TRITONSERVER_ParameterType e) { this.value = e.value; }
-    public TRITONSERVER_ParameterType intern() { for (TRITONSERVER_ParameterType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Get the string representation of a parmeter type. The returned
- *  string is not owned by the caller and so should not be modified or
- *  freed.
- * 
- *  @param paramtype The parameter type.
- *  @return The string representation of the parameter type. */
-
-///
-///
-public static native String TRITONSERVER_ParameterTypeString(
-    TRITONSERVER_ParameterType paramtype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_ParameterTypeString(
-    @Cast("TRITONSERVER_ParameterType") int paramtype);
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
+// Targeting ../tritonserver/TRITONBACKEND_MemoryManager.java
 
-/** TRITONSERVER_InstanceGroupKind
- * 
- *  Kinds of instance groups recognized by TRITONSERVER.
- *  */
-public enum TRITONSERVER_InstanceGroupKind {
-  TRITONSERVER_INSTANCEGROUPKIND_AUTO(0),
-  TRITONSERVER_INSTANCEGROUPKIND_CPU(1),
-  TRITONSERVER_INSTANCEGROUPKIND_GPU(2),
-  TRITONSERVER_INSTANCEGROUPKIND_MODEL(3);
 
-    public final int value;
-    private TRITONSERVER_InstanceGroupKind(int v) { this.value = v; }
-    private TRITONSERVER_InstanceGroupKind(TRITONSERVER_InstanceGroupKind e) { this.value = e.value; }
-    public TRITONSERVER_InstanceGroupKind intern() { for (TRITONSERVER_InstanceGroupKind e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+// Targeting ../tritonserver/TRITONBACKEND_Input.java
 
-/** Get the string representation of an instance-group kind. The
- *  returned string is not owned by the caller and so should not be
- *  modified or freed.
- * 
- *  @param kind The instance-group kind.
- *  @return The string representation of the kind. */
 
-///
-///
-public static native String TRITONSERVER_InstanceGroupKindString(
-    TRITONSERVER_InstanceGroupKind kind);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InstanceGroupKindString(
-    @Cast("TRITONSERVER_InstanceGroupKind") int kind);
+// Targeting ../tritonserver/TRITONBACKEND_Output.java
 
-/** TRITONSERVER_Logging
- * 
- *  Types/levels of logging.
- *  */
-public enum TRITONSERVER_LogLevel {
-  TRITONSERVER_LOG_INFO(0),
-  TRITONSERVER_LOG_WARN(1),
-  TRITONSERVER_LOG_ERROR(2),
-  TRITONSERVER_LOG_VERBOSE(3);
 
-    public final int value;
-    private TRITONSERVER_LogLevel(int v) { this.value = v; }
-    private TRITONSERVER_LogLevel(TRITONSERVER_LogLevel e) { this.value = e.value; }
-    public TRITONSERVER_LogLevel intern() { for (TRITONSERVER_LogLevel e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+// Targeting ../tritonserver/TRITONBACKEND_Request.java
 
-/** Is a log level enabled?
- * 
- *  @param level The log level.
- *  @return True if the log level is enabled, false if not enabled. */
 
-///
-public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
-    TRITONSERVER_LogLevel level);
-public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
-    @Cast("TRITONSERVER_LogLevel") int level);
+// Targeting ../tritonserver/TRITONBACKEND_ResponseFactory.java
 
-/** Log a message at a given log level if that level is enabled.
- * 
- *  @param level The log level.
- *  @param filename The file name of the location of the log message.
- *  @param line The line number of the log message.
- *  @param msg The log message.
- *  @return a TRITONSERVER_Error indicating success or failure. */
 
-///
-///
-///
-public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
-    TRITONSERVER_LogLevel level, String filename, int line,
-    String msg);
-public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
-    @Cast("TRITONSERVER_LogLevel") int level, @Cast("const char*") BytePointer filename, int line,
-    @Cast("const char*") BytePointer msg);
+// Targeting ../tritonserver/TRITONBACKEND_Response.java
 
-/** TRITONSERVER_Error
- * 
- *  Errors are reported by a TRITONSERVER_Error object. A NULL
- *  TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
- *  indicates error and the code and message for the error can be
- *  retrieved from the object.
- * 
- *  The caller takes ownership of a TRITONSERVER_Error object returned by
- *  the API and must call TRITONSERVER_ErrorDelete to release the object.
- * 
- <p>
- *  The TRITONSERVER_Error error codes */
-public enum TRITONSERVER_Error_Code {
-  TRITONSERVER_ERROR_UNKNOWN(0),
-  TRITONSERVER_ERROR_INTERNAL(1),
-  TRITONSERVER_ERROR_NOT_FOUND(2),
-  TRITONSERVER_ERROR_INVALID_ARG(3),
-  TRITONSERVER_ERROR_UNAVAILABLE(4),
-  TRITONSERVER_ERROR_UNSUPPORTED(5),
-  TRITONSERVER_ERROR_ALREADY_EXISTS(6);
 
-    public final int value;
-    private TRITONSERVER_Error_Code(int v) { this.value = v; }
-    private TRITONSERVER_Error_Code(TRITONSERVER_Error_Code e) { this.value = e.value; }
-    public TRITONSERVER_Error_Code intern() { for (TRITONSERVER_Error_Code e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+// Targeting ../tritonserver/TRITONBACKEND_Backend.java
 
-/** Create a new error object. The caller takes ownership of the
- *  TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
- *  release the object.
- * 
- *  @param code The error code.
- *  @param msg The error message.
- *  @return A new TRITONSERVER_Error object. */
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
-    TRITONSERVER_Error_Code code, String msg);
-public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
-    @Cast("TRITONSERVER_Error_Code") int code, @Cast("const char*") BytePointer msg);
+// Targeting ../tritonserver/TRITONBACKEND_Model.java
 
-/** Delete an error object.
- * 
- *  @param error The error object. */
 
-///
-public static native void TRITONSERVER_ErrorDelete(TRITONSERVER_Error error);
+// Targeting ../tritonserver/TRITONBACKEND_ModelInstance.java
 
-/** Get the error code.
- * 
- *  @param error The error object.
- *  @return The error code. */
 
-///
-public static native TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
 
-/** Get the string representation of an error code. The returned
- *  string is not owned by the caller and so should not be modified or
- *  freed. The lifetime of the returned string extends only as long as
- *  'error' and must not be accessed once 'error' is deleted.
+/**
+ *  TRITONBACKEND API Version
  * 
- *  @param error The error object.
- *  @return The string representation of the error code. */
+ *  The TRITONBACKEND API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  backend should check that the API version used to compile the
+ *  backend is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the backend.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton backend API version does not support this backend");
+ *    }
+ *  */
+public static final int TRITONBACKEND_API_VERSION_MAJOR = 1;
 
 ///
-public static native String TRITONSERVER_ErrorCodeString(
-    TRITONSERVER_Error error);
+public static final int TRITONBACKEND_API_VERSION_MINOR = 4;
 
-/** Get the error message. The returned string is not owned by the
- *  caller and so should not be modified or freed. The lifetime of the
- *  returned string extends only as long as 'error' and must not be
- *  accessed once 'error' is deleted.
+/** Get the TRITONBACKEND API version supported by Triton. This value
+ *  can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
+ *  TRITONBACKEND_API_VERSION_MINOR used to build the backend to
+ *  ensure that Triton is compatible with the backend.
  * 
- *  @param error The error object.
- *  @return The error message. */
+ *  @param major Returns the TRITONBACKEND API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONBACKEND API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native String TRITONSERVER_ErrorMessage(
-    TRITONSERVER_Error error);
-// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
-
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
 
-// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
+/** TRITONBACKEND_ArtifactType
+ * 
+ *  The ways that the files that make up a backend or model are
+ *  communicated to the backend.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
+ *      artifacts are made available to Triton via a locally
+ *      accessible filesystem. The backend can access these files
+ *      using an appropriate system API.
+ *  */
+public enum TRITONBACKEND_ArtifactType {
+  TRITONBACKEND_ARTIFACT_FILESYSTEM(0);
 
+    public final int value;
+    private TRITONBACKEND_ArtifactType(int v) { this.value = v; }
+    private TRITONBACKEND_ArtifactType(TRITONBACKEND_ArtifactType e) { this.value = e.value; }
+    public TRITONBACKEND_ArtifactType intern() { for (TRITONBACKEND_ArtifactType e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
 
 
-/** Create a new response allocator object.
- * 
- *  The response allocator object is used by Triton to allocate
- *  buffers to hold the output tensors in inference responses. Most
- *  models generate a single response for each inference request
- *  (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
- *  callbacks will be:
+/**
+ *  TRITONBACKEND_MemoryManager
  * 
- *    TRITONSERVER_ServerInferAsync called
- *     - start_fn : optional (and typically not required)
- *     - alloc_fn : called once for each output tensor in response
- *    TRITONSERVER_InferenceResponseDelete called
- *     - release_fn: called once for each output tensor in response
+ *  Object representing an memory manager that is capable of
+ *  allocating and otherwise managing different memory types. For
+ *  improved performance Triton maintains pools for GPU and CPU-pinned
+ *  memory and the memory manager allows backends to access those
+ *  pools.
  * 
- *  For models that generate multiple responses for each inference
- *  request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
- *  used to determine sets of alloc_fn callbacks that belong to the
- *  same response:
+ <p>
+ *  Allocate a contiguous block of memory of a specific type using a
+ *  memory manager. Two error codes have specific interpretations for
+ *  this function:
  * 
- *    TRITONSERVER_ServerInferAsync called
- *     - start_fn
- *     - alloc_fn : called once for each output tensor in response
- *     - start_fn
- *     - alloc_fn : called once for each output tensor in response
- *       ...
- *    For each response, TRITONSERVER_InferenceResponseDelete called
- *     - release_fn: called once for each output tensor in the response
+ *    TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
+ *      incapable of allocating the requested memory type and memory
+ *      type ID. Requests for the memory type and ID will always fail
+ *      no matter 'byte_size' of the request.
  * 
- *  In all cases the start_fn, alloc_fn and release_fn callback
- *  functions must be thread-safe. Typically making these functions
- *  thread-safe does not require explicit locking. The recommended way
- *  to implement these functions is to have each inference request
- *  provide a 'response_allocator_userp' object that is unique to that
- *  request with TRITONSERVER_InferenceRequestSetResponseCallback. The
- *  callback functions then operate only on this unique state. Locking
- *  is required only when the callback function needs to access state
- *  that is shared across inference requests (for example, a common
- *  allocation pool).
+ *    TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
+ *       allocate the memory type and ID but that currently it cannot
+ *       allocate a contiguous block of memory of the requested
+ *       'byte_size'.
  * 
- *  @param allocator Returns the new response allocator object.
- *  @param alloc_fn The function to call to allocate buffers for result
- *  tensors.
- *  @param release_fn The function to call when the server no longer
- *  holds a reference to an allocated buffer.
- *  @param start_fn The function to call to indicate that the
- *  subsequent 'alloc_fn' calls are for a new response. This callback
- *  is optional (use nullptr to indicate that it should not be
- *  invoked).
- <p>
+ *  @param manager The memory manager.
+ *  @param buffer Returns the allocated memory.
+ *  @param memory_type The type of memory to allocate.
+ *  @param memory_type_id The ID associated with the memory type to
+ *  allocate. For GPU memory this indicates the device ID of the GPU
+ *  to allocate from.
+ *  @param byte_size The size of memory to allocate, in bytes.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
-    @Cast("TRITONSERVER_ResponseAllocator**") PointerPointer allocator,
-    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
-public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
-    @ByPtrPtr TRITONSERVER_ResponseAllocator allocator,
-    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
-    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
-    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
+    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
+    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
 
-/** Delete a response allocator.
+/** Free a buffer that was previously allocated with
+ *  TRITONBACKEND_MemoryManagerAllocate. The call must provide the
+ *  same values for 'memory_type' and 'memory_type_id' as were used
+ *  when the buffer was allocate or else the behavior is undefined.
  * 
- *  @param allocator The response allocator object.
+ *  @param manager The memory manager.
+ *  @param buffer The allocated memory buffer to free.
+ *  @param memory_type The type of memory of the buffer.
+ *  @param memory_type_id The ID associated with the memory type of
+ *  the buffer.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorDelete(
-    TRITONSERVER_ResponseAllocator allocator);
+///
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
+    TRITONBACKEND_MemoryManager manager, Pointer buffer,
+    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
+    TRITONBACKEND_MemoryManager manager, Pointer buffer,
+    @Cast("TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id);
 
-/** TRITONSERVER_Message
+/**
+ *  TRITONBACKEND_Input
  * 
- *  Object representing a Triton Server message.
+ *  Object representing an input tensor.
  * 
  <p>
- *  Create a new message object from serialized JSON string.
+ *  Get the name and properties of an input tensor. The returned
+ *  strings and other properties are owned by the input, not the
+ *  caller, and so should not be modified or freed.
  * 
- *  @param message The message object.
- *  @param base The base of the serialized JSON.
- *  @param byte_size The size, in bytes, of the serialized message.
+ *  @param input The input tensor.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBuffer.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
-    @Cast("TRITONSERVER_Message**") PointerPointer message, String base, @Cast("size_t") long byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
-    @ByPtrPtr TRITONSERVER_Message message, String base, @Cast("size_t") long byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
-    @ByPtrPtr TRITONSERVER_Message message, @Cast("const char*") BytePointer base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") PointerPointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
 
-/** Delete a message object.
+/** Get the name and properties of an input tensor associated with a given
+ *  host policy. If there are no input buffers for the specified  host policy,
+ *  the properties of the fallback input buffers are returned. The returned
+ *  strings and other properties are owned by the input, not the caller, and so
+ *  should not be modified or freed.
  * 
- *  @param message The message object.
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input properties
+ *  will be return if nullptr is provided.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBufferForHostPolicy.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_MessageDelete(
-    TRITONSERVER_Message message);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") PointerPointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
 
-/** Get the base and size of the buffer containing the serialized
- *  message in JSON format. The buffer is owned by the
- *  TRITONSERVER_Message object and should not be modified or freed by
- *  the caller. The lifetime of the buffer extends only as long as
- *  'message' and must not be accessed once 'message' is deleted.
+/** Get a buffer holding (part of) the tensor data for an input. For a
+ *  given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
+ *  returned buffer is owned by the input and so should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  matches that of the input and so the buffer should not be accessed
+ *  after the input tensor object is released.
  * 
- *  @param message The message object.
- *  @param base Returns the base of the serialized message.
- *  @param byte_size Returns the size, in bytes, of the serialized
- *  message.
+ *  @param input The input tensor.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputProperties.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongBuffer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") long[] buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") int[] memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
+
+/** Get a buffer holding (part of) the tensor data for an input for a specific
+ *  host policy. If there are no input buffers specified for this host policy,
+ *  the fallback input buffer is returned.
+ *  For a given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  The returned buffer is owned by the input and so should not be modified or
+ *  freed by the caller. The lifetime of the buffer matches that of the input
+ *  and so the buffer should not be accessed after the input tensor object is
+ *  released.
+ * 
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input buffer
+ *  will be return if nullptr is provided.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message message, @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
-    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id);
 
-/** TRITONSERVER_Metrics
+/**
+ *  TRITONBACKEND_Output
  * 
- *  Object representing metrics.
+ *  Object representing a response output tensor.
  * 
  <p>
- *  Metric format types */
-public enum TRITONSERVER_MetricFormat {
-  TRITONSERVER_METRIC_PROMETHEUS(0);
-
-    public final int value;
-    private TRITONSERVER_MetricFormat(int v) { this.value = v; }
-    private TRITONSERVER_MetricFormat(TRITONSERVER_MetricFormat e) { this.value = e.value; }
-    public TRITONSERVER_MetricFormat intern() { for (TRITONSERVER_MetricFormat e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Delete a metrics object.
+ *  Get a buffer to use to hold the tensor data for the output. The
+ *  returned buffer is owned by the output and so should not be freed
+ *  by the caller. The caller can and should fill the buffer with the
+ *  output data for the tensor. The lifetime of the buffer matches
+ *  that of the output and so the buffer should not be accessed after
+ *  the output tensor object is released.
  * 
- *  @param metrics The metrics object.
+ *  @param buffer Returns a pointer to a buffer where the contents of
+ *  the output tensor should be placed.
+ *  @param buffer_byte_size The size, in bytes, of the buffer required
+ *  by the caller.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the caller.  Returns the
+ *  actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the caller. Returns
+ *  the actual memory type id of 'buffer'.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_MetricsDelete(
-    TRITONSERVER_Metrics metrics);
+///
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") PointerPointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") int[] memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
 
-/** Get a buffer containing the metrics in the specified format. For
- *  each format the buffer contains the following:
+/**
+ *  TRITONBACKEND_Request
  * 
- *    TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
- *    string (char*) that gives a text representation of the metrics in
- *    prometheus format. 'byte_size' returns the length of the string
- *    in bytes.
+ *  Object representing an inference request.
  * 
- *  The buffer is owned by the 'metrics' object and should not be
- *  modified or freed by the caller. The lifetime of the buffer
- *  extends only as long as 'metrics' and must not be accessed once
- *  'metrics' is deleted.
+ <p>
+ *  Get the ID of the request. Can be nullptr if request doesn't have
+ *  an ID. The returned string is owned by the request, not the
+ *  caller, and so should not be modified or freed.
  * 
- *  @param metrics The metrics object.
- *  @param format The format to use for the returned metrics.
- *  @param base Returns a pointer to the base of the formatted
- *  metrics, as described above.
- *  @param byte_size Returns the size, in bytes, of the formatted
- *  metrics.
+ *  @param request The inference request.
+ *  @param id Returns the ID.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
-    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
-    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
-    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
-
-/** TRITONSERVER_InferenceTrace
- * 
- *  Object that represents tracing for an inference request.
- * 
- <p>
- *  Trace levels */
-public enum TRITONSERVER_InferenceTraceLevel {
-  TRITONSERVER_TRACE_LEVEL_DISABLED(0),
-  TRITONSERVER_TRACE_LEVEL_MIN(1),
-  TRITONSERVER_TRACE_LEVEL_MAX(2);
-
-    public final int value;
-    private TRITONSERVER_InferenceTraceLevel(int v) { this.value = v; }
-    private TRITONSERVER_InferenceTraceLevel(TRITONSERVER_InferenceTraceLevel e) { this.value = e.value; }
-    public TRITONSERVER_InferenceTraceLevel intern() { for (TRITONSERVER_InferenceTraceLevel e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") PointerPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr byte[] id);
 
-/** Get the string representation of a trace level. The returned
- *  string is not owned by the caller and so should not be modified or
- *  freed.
+/** Get the correlation ID of the request. Zero indicates that the
+ *  request does not have a correlation ID.
  * 
- *  @param level The trace level.
- *  @return The string representation of the trace level. */
-public static native String TRITONSERVER_InferenceTraceLevelString(
-    TRITONSERVER_InferenceTraceLevel level);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceLevelString(
-    @Cast("TRITONSERVER_InferenceTraceLevel") int level);
-
-// Trace activities
-public enum TRITONSERVER_InferenceTraceActivity {
-  TRITONSERVER_TRACE_REQUEST_START(0),
-  TRITONSERVER_TRACE_QUEUE_START(1),
-  TRITONSERVER_TRACE_COMPUTE_START(2),
-  TRITONSERVER_TRACE_COMPUTE_INPUT_END(3),
-  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START(4),
-  TRITONSERVER_TRACE_COMPUTE_END(5),
-  TRITONSERVER_TRACE_REQUEST_END(6);
+ *  @param request The inference request.
+ *  @param id Returns the correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-    public final int value;
-    private TRITONSERVER_InferenceTraceActivity(int v) { this.value = v; }
-    private TRITONSERVER_InferenceTraceActivity(TRITONSERVER_InferenceTraceActivity e) { this.value = e.value; }
-    public TRITONSERVER_InferenceTraceActivity intern() { for (TRITONSERVER_InferenceTraceActivity e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") long[] id);
 
-/** Get the string representation of a trace activity. The returned
- *  string is not owned by the caller and so should not be modified or
- *  freed.
+/** Get the number of input tensors specified in the request.
  * 
- *  @param activity The trace activity.
- *  @return The string representation of the trace activity. */
-public static native String TRITONSERVER_InferenceTraceActivityString(
-    TRITONSERVER_InferenceTraceActivity activity);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceActivityString(
-    @Cast("TRITONSERVER_InferenceTraceActivity") int activity);
-// Targeting ../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
-
+ *  @param request The inference request.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
 
-/** Create a new inference trace object. The caller takes ownership of
- *  the TRITONSERVER_InferenceTrace object and must call
- *  TRITONSERVER_InferenceTraceDelete to release the object.
- * 
- *  The activity callback function will be called to report activity
- *  for 'trace' as well as for any child traces that are spawned by
- *  'trace', and so the activity callback must check the trace object
- *  to determine specifically what activity is being reported.
- * 
- *  The release callback is called for both 'trace' and for any child
- *  traces spawned by 'trace'.
+/** Get the name of an input tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
  * 
- *  @param trace Returns the new inference trace object.
- *  @param level The tracing level.
- *  @param parent_id The parent trace id for this trace. A value of 0
- *  indicates that there is not parent trace.
- *  @param activity_fn The callback function where activity for the
- *  trace is reported.
- *  @param release_fn The callback function called when all activity
- *  is complete for the trace.
- *  @param trace_userp User-provided pointer that is delivered to
- *  the activity and release callback functions.
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input_name Returns the name of the input tensor
+ *  corresponding to the index.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
-    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, TRITONSERVER_InferenceTraceLevel level,
-    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
-    @ByPtrPtr TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceLevel level,
-    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
-    @ByPtrPtr TRITONSERVER_InferenceTrace trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
-    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] input_name);
 
-/** Delete a trace object.
+/** Get a named request input. The lifetime of the returned input
+ *  object matches that of the request and so the input object should
+ *  not be accessed after the request object is released.
  * 
- *  @param trace The trace object.
+ *  @param request The inference request.
+ *  @param name The name of the input.
+ *  @param input Returns the input corresponding to the name.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceDelete(
-    TRITONSERVER_InferenceTrace trace);
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @ByPtrPtr TRITONBACKEND_Input input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, @Cast("const char*") BytePointer name,
+    @ByPtrPtr TRITONBACKEND_Input input);
 
-/** Get the id associated with a trace. Every trace is assigned an id
- *  that is unique across all traces created for a Triton server.
+/** Get a request input by index. The order of inputs in a given
+ *  request is not necessarily consistent with other requests, even if
+ *  the requests are in the same batch. As a result, you can not
+ *  assume that an index obtained from one request will point to the
+ *  same input in a different request.
  * 
- *  @param trace The trace.
- *  @param id Returns the id associated with the trace.
+ *  The lifetime of the returned input object matches that of the
+ *  request and so the input object should not be accessed after the
+ *  request object is released.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input Returns the input corresponding to the index.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @ByPtrPtr TRITONBACKEND_Input input);
 
-/** Get the parent id associated with a trace. The parent id indicates
- *  a parent-child relationship between two traces. A parent id value
- *  of 0 indicates that there is no parent trace.
+/** Get the number of output tensors requested to be returned in the
+ *  request.
  * 
- *  @param trace The trace.
- *  @param id Returns the parent id associated with the trace.
+ *  @param request The inference request.
+ *  @param count Returns the number of output tensors.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer parent_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer parent_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
-    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] parent_id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
 
-/** Get the name of the model associated with a trace. The caller does
- *  not own the returned string and must not modify or delete it. The
- *  lifetime of the returned string extends only as long as 'trace'.
+/** Get the name of a requested output tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
  * 
- *  @param trace The trace.
- *  @param model_name Returns the name of the model associated with
- *  the trace.
+ *  @param request The inference request.
+ *  @param index The index of the requested output tensor. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_RequestOutputCount.
+ *  @param output_name Returns the name of the requested output tensor
+ *  corresponding to the index.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace trace, @Cast("const char**") PointerPointer model_name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr BytePointer model_name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr ByteBuffer model_name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
-    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr byte[] model_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] output_name);
 
-/** Get the version of the model associated with a trace.
+/** Release the request. The request should be released when it is no
+ *  longer needed by the backend. If this call returns with an error
+ *  (i.e. non-nullptr) then the request was not released and ownership
+ *  remains with the backend. If this call returns with success, the
+ *  'request' object is no longer owned by the backend and must not be
+ *  used. Any tensor names, data types, shapes, input tensors,
+ *  etc. returned by TRITONBACKEND_Request* functions for this request
+ *  are no longer valid. If a persistent copy of that data is required
+ *  it must be created before calling this function.
  * 
- *  @param trace The trace.
- *  @param model_version Returns the version of the model associated
- *  with the trace.
+ *  @param request The inference request.
+ *  @param release_flags Flags indicating what type of request release
+ *  should be performed. @see TRITONSERVER_RequestReleaseFlag. @see
+ *  TRITONSERVER_InferenceRequestReleaseFn_t.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongPointer model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongBuffer model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
-    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") long[] model_version);
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestRelease(
+    TRITONBACKEND_Request request, @Cast("uint32_t") int release_flags);
 
-/** TRITONSERVER_InferenceRequest
- * 
- *  Object representing an inference request. The inference request
- *  provides the meta-data and input tensor values needed for an
- *  inference and returns the inference result meta-data and output
- *  tensors. An inference request object can be modified and reused
- *  multiple times.
+/**
+ *  TRITONBACKEND_ResponseFactory
  * 
+ *  Object representing an inference response factory. Using a
+ *  response factory is not required; instead a response can be
+ *  generated directly from a TRITONBACKEND_Request object using
+ *  TRITONBACKEND_ResponseNew(). A response factory allows a request
+ *  to be released before all responses have been sent. Releasing a
+ *  request as early as possible releases all input tensor data and
+ *  therefore may be desirable in some cases.
  <p>
- *  Inference request flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_RequestFlag {
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START(1),
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END(2);
-
-    public final int value;
-    private TRITONSERVER_RequestFlag(int v) { this.value = v; }
-    private TRITONSERVER_RequestFlag(TRITONSERVER_RequestFlag e) { this.value = e.value; }
-    public TRITONSERVER_RequestFlag intern() { for (TRITONSERVER_RequestFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Inference request release flags. The enum values must be
- *  power-of-2 values. */
-public enum TRITONSERVER_RequestReleaseFlag {
-  TRITONSERVER_REQUEST_RELEASE_ALL(1);
-
-    public final int value;
-    private TRITONSERVER_RequestReleaseFlag(int v) { this.value = v; }
-    private TRITONSERVER_RequestReleaseFlag(TRITONSERVER_RequestReleaseFlag e) { this.value = e.value; }
-    public TRITONSERVER_RequestReleaseFlag intern() { for (TRITONSERVER_RequestReleaseFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Inference response complete flags. The enum values must be
- *  power-of-2 values. */
-public enum TRITONSERVER_ResponseCompleteFlag {
-  TRITONSERVER_RESPONSE_COMPLETE_FINAL(1);
-
-    public final int value;
-    private TRITONSERVER_ResponseCompleteFlag(int v) { this.value = v; }
-    private TRITONSERVER_ResponseCompleteFlag(TRITONSERVER_ResponseCompleteFlag e) { this.value = e.value; }
-    public TRITONSERVER_ResponseCompleteFlag intern() { for (TRITONSERVER_ResponseCompleteFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-// Targeting ../tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
-
-
-// Targeting ../tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
-
-
-
-/** Create a new inference request object.
+ *  Create the response factory associated with a request.
  * 
- *  @param inference_request Returns the new request object.
- *  @param server the inference server object.
- *  @param model_name The name of the model to use for the request.
- *  @param model_version The version of the model to use for the
- *  request. If -1 then the server will choose a version based on the
- *  model's policy.
+ *  @param factory Returns the new response factory.
+ *  @param request The inference request.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
-    @Cast("TRITONSERVER_InferenceRequest**") PointerPointer inference_request,
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
-    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
-    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
+    @Cast("TRITONBACKEND_ResponseFactory**") PointerPointer factory, TRITONBACKEND_Request request);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
+    @ByPtrPtr TRITONBACKEND_ResponseFactory factory, TRITONBACKEND_Request request);
 
-/** Delete an inference request object.
+/** Destroy a response factory.
  * 
- *  @param inference_request The request object.
+ *  @param factory The response factory.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestDelete(
-    TRITONSERVER_InferenceRequest inference_request);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryDelete(
+    TRITONBACKEND_ResponseFactory factory);
 
-/** Get the ID for a request. The returned ID is owned by
- *  'inference_request' and must not be modified or freed by the
- *  caller.
+/** Send response flags without a corresponding response.
  * 
- *  @param inference_request The request object.
- *  @param id Returns the ID.
+ *  @param factory The response factory.
+ *  @param send_flags Flags to send. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") PointerPointer id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr BytePointer id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr byte[] id);
-
-/** Set the ID for a request.
- * 
- *  @param inference_request The request object.
- *  @param id The ID.
- *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
-    TRITONSERVER_InferenceRequest inference_request, String id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer id);
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactorySendFlags(
+    TRITONBACKEND_ResponseFactory factory, @Cast("const uint32_t") int send_flags);
 
-/** Get the flag(s) associated with a request. On return 'flags' holds
- *  a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
- *  available flags.
+/**
+ *  TRITONBACKEND_Response
  * 
- *  @param inference_request The request object.
- *  @param flags Returns the flags.
+ *  Object representing an inference response. For a given request,
+ *  the backend must carefully manage the lifecycle of responses
+ *  generated for that request to ensure that the output tensor
+ *  buffers are allocated correctly. When a response is created with
+ *  TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
+ *  all the outputs and corresponding buffers must be created for that
+ *  response using TRITONBACKEND_ResponseOutput and
+ *  TRITONBACKEND_OutputBuffer *before* another response is created
+ *  for the request. For a given response, outputs can be created in
+ *  any order but they must be created sequentially/sychronously (for
+ *  example, the backend cannot use multiple threads to simultaneously
+ *  add multiple outputs to a response).
+ * 
+ *  The above requirement applies only to responses being generated
+ *  for a given request. The backend may generate responses in
+ *  parallel on multiple threads as long as those responses are for
+ *  different requests.
+ * 
+ *  This order of response creation must be strictly followed. But,
+ *  once response(s) are created they do not need to be sent
+ *  immediately, nor do they need to be sent in the order they were
+ *  created. The backend may even delete a created response instead of
+ *  sending it by using TRITONBACKEND_ResponseDelete.
+ <p>
+ *  Create a response for a request.
+ * 
+ *  @param response Returns the new response.
+ *  @param request The request.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer flags);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer flags);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] flags);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_Request request);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_Request request);
 
-/** Set the flag(s) associated with a request. 'flags' should holds a
- *  bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
- *  available flags.
+/** Create a response using a factory.
  * 
- *  @param inference_request The request object.
- *  @param flags The flags.
+ *  @param response Returns the new response.
+ *  @param factory The response factory.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetFlags(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int flags);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_ResponseFactory factory);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_ResponseFactory factory);
 
-/** Get the correlation ID of the inference request. Default is 0,
- *  which indictes that the request has no correlation ID. The
- *  correlation ID is used to indicate two or more inference request
- *  are related to each other. How this relationship is handled by the
- *  inference server is determined by the model's scheduling
- *  policy.
+/** Destroy a response. It is not necessary to delete a response if
+ *  TRITONBACKEND_ResponseSend is called as that function transfers
+ *  ownership of the response object to Triton.
  * 
- *  @param inference_request The request object.
- *  @param correlation_id Returns the correlation ID.
+ *  @param response The response.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer correlation_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer correlation_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] correlation_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseDelete(
+    TRITONBACKEND_Response response);
 
-/** Set the correlation ID of the inference request. Default is 0, which
- *  indictes that the request has no correlation ID. The correlation ID
- *  is used to indicate two or more inference request are related to
- *  each other. How this relationship is handled by the inference
- *  server is determined by the model's scheduling policy.
+/** Set a string parameter in the response.
  * 
- *  @param inference_request The request object.
- *  @param correlation_id The correlation ID.
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetCorrelationId(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long correlation_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, String name, String value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const char*") BytePointer value);
 
-/** Get the priority for a request. The default is 0 indicating that
- *  the request does not specify a priority and so will use the
- *  model's default priority.
+/** Set an integer parameter in the response.
  * 
- *  @param inference_request The request object.
- *  @param priority Returns the priority level.
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer priority);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer priority);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] priority);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const int64_t") long value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const int64_t") long value);
 
-/** Set the priority for a request. The default is 0 indicating that
- *  the request does not specify a priority and so will use the
- *  model's default priority.
+/** Set an boolean parameter in the response.
  * 
- *  @param inference_request The request object.
- *  @param priority The priority level.
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetPriority(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int priority);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const bool") boolean value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const bool") boolean value);
 
-/** Get the timeout for a request, in microseconds. The default is 0
- *  which indicates that the request has no timeout.
+/** Create an output tensor in the response. The lifetime of the
+ *  returned output tensor object matches that of the response and so
+ *  the output tensor object should not be accessed after the response
+ *  object is deleted.
  * 
- *  @param inference_request The request object.
- *  @param timeout_us Returns the timeout, in microseconds.
+ *  @param response The response.
+ *  @param output Returns the new response output.
+ *  @param name The name of the output tensor.
+ *  @param datatype The datatype of the output tensor.
+ *  @param shape The shape of the output tensor.
+ *  @param dims_count The number of dimensions in the output tensor
+ *  shape.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer timeout_us);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer timeout_us);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] timeout_us);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
+    String name, TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, TRITONSERVER_DataType datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
 
-/** Set the timeout for a request, in microseconds. The default is 0
- *  which indicates that the request has no timeout.
+/** Send a response. Calling this function transfers ownership of the
+ *  response object to Triton. The caller must not access or delete
+ *  the response object after calling this function.
  * 
- *  @param inference_request The request object.
- *  @param timeout_us The timeout, in microseconds.
+ *  @param response The response.
+ *  @param send_flags Flags associated with the response. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
+ *  @param error The TRITONSERVER_Error to send if the response is an
+ *  error, or nullptr if the response is successful.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long timeout_us);
+///
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
+    TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
+    TRITONSERVER_Error error);
 
-/** Add an input to a request.
+/**
+ *  TRITONBACKEND_Backend
  * 
- *  @param inference_request The request object.
- *  @param name The name of the input.
- *  @param datatype The type of the input. Valid type names are BOOL,
- *  UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
- *  FP32, FP64, and BYTES.
- *  @param shape The shape of the input.
- *  @param dim_count The number of dimensions of 'shape'.
+ *  Object representing a backend.
+ * 
+ <p>
+ *  TRITONBACKEND_ExecutionPolicy
+ * 
+ *  Types of execution policy that can be implemented by a backend.
+ * 
+ *    TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
+ *      blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
+ *      to handle another inference. Upon returning from
+ *      TRITONBACKEND_ModelInstanceExecute, Triton may immediately
+ *      call TRITONBACKEND_ModelInstanceExecute for the same instance
+ *      to execute a new batch of requests. Thus, most backends using
+ *      this policy will not return from
+ *      TRITONBACKEND_ModelInstanceExecute until all responses have
+ *      been sent and all requests have been released. This is the
+ *      default execution policy.
+ *  */
+public enum TRITONBACKEND_ExecutionPolicy {
+  TRITONBACKEND_EXECUTION_BLOCKING(0);
+
+    public final int value;
+    private TRITONBACKEND_ExecutionPolicy(int v) { this.value = v; }
+    private TRITONBACKEND_ExecutionPolicy(TRITONBACKEND_ExecutionPolicy e) { this.value = e.value; }
+    public TRITONBACKEND_ExecutionPolicy intern() { for (TRITONBACKEND_ExecutionPolicy e : values()) if (e.value == value) return e; return this; }
+    @Override public String toString() { return intern().name(); }
+}
+
+/** Get the name of the backend. The caller does not own the returned
+ *  string and must not modify or delete it. The lifetime of the
+ *  returned string extends only as long as 'backend'.
+ * 
+ *  @param backend The backend.
+ *  @param name Returns the name of the backend.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongPointer shape,
-    @Cast("uint64_t") long dim_count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
-    @Cast("uint64_t") long dim_count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") long[] shape,
-    @Cast("uint64_t") long dim_count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
-    @Cast("uint64_t") long dim_count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongBuffer shape,
-    @Cast("uint64_t") long dim_count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
-    @Cast("uint64_t") long dim_count);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr byte[] name);
 
-/** Remove an input from a request.
+/** Get the backend configuration.  The 'backend_config' message is
+ *  owned by Triton and should not be modified or freed by the caller.
  * 
- *  @param inference_request The request object.
- *  @param name The name of the input.
+ *  The backend configuration, as JSON, is:
+ * 
+ *    {
+ *      "cmdline" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param backend The backend.
+ *  @param backend_config Returns the backend configuration as a message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
-    TRITONSERVER_InferenceRequest inference_request, String name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @Cast("TRITONSERVER_Message**") PointerPointer backend_config);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONSERVER_Message backend_config);
 
-/** Remove all inputs from a request.
+/** Get the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
  * 
- *  @param inference_request The request object.
+ *  @param backend The backend.
+ *  @param policy Returns the execution policy.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputs(
-    TRITONSERVER_InferenceRequest inference_request);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntPointer policy);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntBuffer policy);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") int[] policy);
 
-/** Assign a buffer of data to an input. The buffer will be appended
- *  to any existing buffers for that input. The 'inference_request'
- *  object takes ownership of the buffer and so the caller should not
- *  modify or free the buffer until that ownership is released by
- *  'inference_request' being deleted or by the input being removed
- *  from 'inference_request'.
+/** Set the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
+ *  the backend's execution policy after calling
+ *  TRITONBACKEND_Initialize, so to be recognized changes to the
+ *  execution policy must be made in TRITONBACKEND_Initialize.
  * 
- *  @param inference_request The request object.
- *  @param name The name of the input.
- *  @param base The base address of the input data.
- *  @param byte_size The size, in bytes, of the input data.
- *  @param memory_type The memory type of the input data.
- *  @param memory_type_id The memory type id of the input data.
+ *  @param backend The backend.
+ *  @param policy The execution policy.
  *  @return a TRITONSERVER_Error indicating success or failure. */
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
-    TRITONSERVER_InferenceRequest inference_request, String name,
-    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t") long memory_type_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
-    @Cast("int64_t") long memory_type_id);
 
-/** Assign a buffer of data to an input for execution on all model instances
- *  with the specified host policy. The buffer will be appended to any existing
- *  buffers for that input on all devices with this host policy. The
- *  'inference_request' object takes ownership of the buffer and so the caller
- *  should not modify or free the buffer until that ownership is released by
- *  'inference_request' being deleted or by the input being removed from
- *  'inference_request'. If the execution is scheduled on a device that does not
- *  have a input buffer specified using this function, then the input buffer
- *  specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
- *  a non-host policy specific version of data must be added using that API.
- *  @param inference_request The request object.
- *  @param name The name of the input.
- *  @param base The base address of the input data.
- *  @param byte_size The size, in bytes, of the input data.
- *  @param memory_type The memory type of the input data.
- *  @param memory_type_id The memory type id of the input data.
- *  @param host_policy_name All model instances executing with this host_policy
- *  will use this input buffer for execution.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
-    TRITONSERVER_InferenceRequest inference_request, String name,
-    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
-    @Cast("int64_t") long memory_type_id, String host_policy_name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
-    @Cast("int64_t") long memory_type_id, @Cast("const char*") BytePointer host_policy_name);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
+    TRITONBACKEND_Backend backend, TRITONBACKEND_ExecutionPolicy policy);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
 
-/** Clear all input data from an input, releasing ownership of the
- *  buffer(s) that were appended to the input with
- *  TRITONSERVER_InferenceRequestAppendInputData or
- *  TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
- *  @param inference_request The request object.
- *  @param name The name of the input. */
+/** Get the location of the files that make up the backend
+ *  implementation. This location contains the backend shared library
+ *  and any other files located with the shared library. The
+ *  'location' communicated depends on how the backend is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
+ *      made available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory containing this
+ *      backend's artifacts. The returned string is owned by Triton,
+ *      not the caller, and so should not be modified or freed.
+ * 
+ *  @param backend The backend.
+ *  @param artifact_type Returns the artifact type for the backend.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
-    TRITONSERVER_InferenceRequest inference_request, String name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
 
-/** Add an output request to an inference request.
+/** Get the memory manager associated with a backend.
  * 
- *  @param inference_request The request object.
- *  @param name The name of the output.
+ *  @param backend The backend.
+ *  @param manager Returns the memory manager.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
-    TRITONSERVER_InferenceRequest inference_request, String name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_MemoryManager**") PointerPointer manager);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONBACKEND_MemoryManager manager);
 
-/** Remove an output request from an inference request.
+/** Get the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
  * 
- *  @param inference_request The request object.
- *  @param name The name of the output.
+ *  @param backend The backend.
+ *  @param state Returns the user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
-    TRITONSERVER_InferenceRequest inference_request, String name);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
-    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") @ByPtrPtr Pointer state);
 
-/** Remove all output requests from an inference request.
+/** Set the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
  * 
- *  @param inference_request The request object.
+ *  @param backend The backend.
+ *  @param state The user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
-    TRITONSERVER_InferenceRequest inference_request);
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetState(
+    TRITONBACKEND_Backend backend, Pointer state);
 
-/** Set the release callback for an inference request. The release
- *  callback is called by Triton to return ownership of the request
- *  object.
+/**
+ *  TRITONBACKEND_Model
  * 
- *  @param inference_request The request object.
- *  @param request_release_fn The function called to return ownership
- *  of the 'inference_request' object.
- *  @param request_release_userp User-provided pointer that is
- *  delivered to the 'request_release_fn' callback.
+ *  Object representing a model implemented using the backend.
+ * 
+ <p>
+ *  Get the name of the model. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param model The model.
+ *  @param name Returns the model name.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetReleaseCallback(
-    TRITONSERVER_InferenceRequest inference_request,
-    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
-    Pointer request_release_userp);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr byte[] name);
 
-/** Set the allocator and response callback for an inference
- *  request. The allocator is used to allocate buffers for any output
- *  tensors included in responses that are produced for this
- *  request. The response callback is called to return response
- *  objects representing responses produced for this request.
+/** Get the version of the model.
  * 
- *  @param inference_request The request object.
- *  @param response_allocator The TRITONSERVER_ResponseAllocator to use
- *  to allocate buffers to hold inference results.
- *  @param response_allocator_userp User-provided pointer that is
- *  delivered to the response allocator's start and allocation functions.
- *  @param response_fn The function called to deliver an inference
- *  response for this request.
- *  @param response_userp User-provided pointer that is delivered to
- *  the 'response_fn' callback.
+ *  @param model The model.
+ *  @param version Returns the model version.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
-///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetResponseCallback(
-    TRITONSERVER_InferenceRequest inference_request,
-    TRITONSERVER_ResponseAllocator response_allocator,
-    Pointer response_allocator_userp,
-    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
-    Pointer response_userp);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongPointer version);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongBuffer version);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") long[] version);
 
-/** TRITONSERVER_InferenceResponse
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to Triton as indicated by 'artifact_type'.
  * 
- *  Object representing an inference response. The inference response
- *  provides the meta-data and output tensor values calculated by the
- *  inference.
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
+ *      available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory in the model repository
+ *      that contains this model's artifacts. The returned string is
+ *      owned by Triton, not the caller, and so should not be modified
+ *      or freed.
  * 
- <p>
- *  Delete an inference response object.
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the model.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. The configuration is available via this call even
+ *  before the model is loaded and so can be used in
+ *  TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
+ *  returns equivalent information but is not useable until after the
+ *  model loads.
  * 
- *  @param inference_response The response object.
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseDelete(
-    TRITONSERVER_InferenceResponse inference_response);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
 
-/** Return the error status of an inference response. Return a
- *  TRITONSERVER_Error object on failure, return nullptr on success.
- *  The returned error object is owned by 'inference_response' and so
- *  should not be deleted by the caller.
+/** Whether the backend should attempt to auto-complete the model configuration.
+ *  If true, the model should fill the inputs, outputs, and max batch size in
+ *  the model configuration if incomplete. If the model configuration is
+ *  changed,  the new configuration must be reported to Triton using
+ *  TRITONBACKEND_ModelSetConfig.
  * 
- *  @param inference_response The response object.
- *  @return a TRITONSERVER_Error indicating the success or failure
- *  status of the response. */
+ *  @param model The model.
+ *  @param auto_complete_config Returns whether the backend should auto-complete
+ *  the model configuration.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseError(
-    TRITONSERVER_InferenceResponse inference_response);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
 
-/** Get model used to produce a response. The caller does not own the
- *  returned model name value and must not modify or delete it. The
- *  lifetime of all returned values extends until 'inference_response'
- *  is deleted.
+/** Set the model configuration in Triton server. Only the inputs, outputs,
+ *  and max batch size can be changed. Any other changes to the model
+ *  configuration will be ignored by Triton. This function can only be called
+ *  from TRITONBACKEND_ModelInitialize, calling in any other context will result
+ *  in an error being returned. The function does not take ownership of the
+ *  message object and so the caller should call TRITONSERVER_MessageDelete to
+ *  release the object once the function returns.
  * 
- *  @param inference_response The response object.
- *  @param model_name Returns the name of the model.
- *  @param model_version Returns the version of the model.
- *  this response.
+ *  @param model The model.
+ *  @param config_version The format version of the model configuration.
+ *  If the configuration is not represented in the version's format
+ *  then an error will be returned. Currently only version 1 is supported.
+ *  @param model_config The updated model configuration as a message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") PointerPointer model_name,
-    @Cast("int64_t*") LongPointer model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr BytePointer model_name,
-    @Cast("int64_t*") LongPointer model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr ByteBuffer model_name,
-    @Cast("int64_t*") LongBuffer model_version);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr byte[] model_name,
-    @Cast("int64_t*") long[] model_version);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    TRITONSERVER_Message model_config);
 
-/** Get the ID of the request corresponding to a response. The caller
- *  does not own the returned ID and must not modify or delete it. The
- *  lifetime of all returned values extends until 'inference_response'
- *  is deleted.
+/** Get the TRITONSERVER_Server object that this model is being served
+ *  by.
  * 
- *  @param inference_response The response object.
- *  @param request_id Returns the ID of the request corresponding to
- *  this response.
+ *  @param model The model.
+ *  @param server Returns the server.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse inference_response,
-    @Cast("const char**") PointerPointer request_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse inference_response,
-    @Cast("const char**") @ByPtrPtr BytePointer request_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse inference_response,
-    @Cast("const char**") @ByPtrPtr ByteBuffer request_id);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
-    TRITONSERVER_InferenceResponse inference_response,
-    @Cast("const char**") @ByPtrPtr byte[] request_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @Cast("TRITONSERVER_Server**") PointerPointer server);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONSERVER_Server server);
 
-/** Get the number of parameters available in the response.
+/** Get the backend used by the model.
  * 
- *  @param inference_response The response object.
- *  @param count Returns the number of parameters.
+ *  @param model The model.
+ *  @param model Returns the backend object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_Backend**") PointerPointer backend);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONBACKEND_Backend backend);
 
-/** Get all information about a parameter. The caller does not own any
- *  of the returned values and must not modify or delete them. The
- *  lifetime of all returned values extends until 'inference_response'
- *  is deleted.
- * 
- *  The 'vvalue' returns a void* pointer that must be cast
- *  appropriately based on 'type'. For example:
- * 
- *    void* vvalue;
- *    TRITONSERVER_ParameterType type;
- *    TRITONSERVER_InferenceResponseParameter(
- *                      response, index, &name, &type, &vvalue);
- *    switch (type) {
- *      case TRITONSERVER_PARAMETER_BOOL:
- *        bool value = *(reinterpret_cast<bool*>(vvalue));
- *        ...
- *      case TRITONSERVER_PARAMETER_INT:
- *        int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
- *        ...
- *      case TRITONSERVER_PARAMETER_STRING:
- *        const char* value = reinterpret_cast<const char*>(vvalue);
- *        ...
+/** Get the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
  * 
- *  @param inference_response The response object.
- *  @param index The index of the parameter, must be 0 <= index <
- *  count, where 'count' is the value returned by
- *  TRITONSERVER_InferenceResponseParameterCount.
- *  @param name Returns the name of the parameter.
- *  @param type Returns the type of the parameter.
- *  @param vvalue Returns a pointer to the parameter value.
+ *  @param model The model.
+ *  @param state Returns the user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") PointerPointer vvalue);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_ParameterType*") IntBuffer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_ParameterType*") int[] type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") @ByPtrPtr Pointer state);
 
-/** Get the number of outputs available in the response.
+/** Set the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
  * 
- *  @param inference_response The response object.
- *  @param count Returns the number of output tensors.
+ *  @param model The model.
+ *  @param state The user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetState(
+    TRITONBACKEND_Model model, Pointer state);
 
-/** Get all information about an output tensor.  The tensor data is
- *  returned as the base pointer to the data and the size, in bytes,
- *  of the data. The caller does not own any of the returned values
- *  and must not modify or delete them. The lifetime of all returned
- *  values extends until 'inference_response' is deleted.
+/**
+ *  TRITONBACKEND_ModelInstance
  * 
- *  @param inference_response The response object.
- *  @param index The index of the output tensor, must be 0 <= index <
- *  count, where 'count' is the value returned by
- *  TRITONSERVER_InferenceResponseOutputCount.
- *  @param name Returns the name of the output.
- *  @param datatype Returns the type of the output.
- *  @param shape Returns the shape of the output.
- *  @param dim_count Returns the number of dimensions of the returned
- *  shape.
- *  @param base Returns the tensor data for the output.
- *  @param byte_size Returns the size, in bytes, of the data.
- *  @param memory_type Returns the memory type of the data.
- *  @param memory_type_id Returns the memory type id of the data.
- *  @param userp The user-specified value associated with the buffer
- *  in TRITONSERVER_ResponseAllocatorAllocFn_t.
+ *  Object representing a model instance implemented using the
+ *  backend.
+ * 
+ <p>
+ *  Get the name of the model instance. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param instance The model instance.
+ *  @param name Returns the instance name.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
-    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size,
-    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
-    @Cast("void**") PointerPointer userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
-    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
-    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
-    @Cast("void**") @ByPtrPtr Pointer userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
-    @Cast("uint64_t*") LongBuffer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
-    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id,
-    @Cast("void**") @ByPtrPtr Pointer userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
-    @Cast("uint64_t*") long[] dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
-    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id,
-    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr byte[] name);
 
-/** Get a classification label associated with an output for a given
- *  index.  The caller does not own the returned label and must not
- *  modify or delete it. The lifetime of all returned label extends
- *  until 'inference_response' is deleted.
+/** Get the kind of the model instance.
  * 
- *  @param inference_response The response object.
- *  @param index The index of the output tensor, must be 0 <= index <
- *  count, where 'count' is the value returned by
- *  TRITONSERVER_InferenceResponseOutputCount.
- *  @param class_index The index of the class.
- *  @param name Returns the label corresponding to 'class_index' or
- *  nullptr if no label.
+ *  @param instance The model instance.
+ *  @param kind Returns the instance kind.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const size_t") long class_index, @Cast("const char**") PointerPointer label);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr BytePointer label);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr ByteBuffer label);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
-    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
-    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr byte[] label);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") IntPointer kind);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") IntBuffer kind);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") int[] kind);
 
+/** Get the device ID of the model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param device_id Returns the instance device ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-/** TRITONSERVER_ServerOptions
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntPointer device_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntBuffer device_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, int[] device_id);
+
+/** Get the host policy setting.  The 'host_policy' message is
+ *  owned by Triton and should not be modified or freed by the caller.
  * 
- *  Options to use when creating an inference server.
+ *  The host policy setting, as JSON, is:
  * 
- <p>
- *  Model control modes */
-public enum TRITONSERVER_ModelControlMode {
-  TRITONSERVER_MODEL_CONTROL_NONE(0),
-  TRITONSERVER_MODEL_CONTROL_POLL(1),
-  TRITONSERVER_MODEL_CONTROL_EXPLICIT(2);
+ *    {
+ *      "<host_policy>" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param instance The model instance.
+ *  @param host_policy Returns the host policy setting as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
 
-    public final int value;
-    private TRITONSERVER_ModelControlMode(int v) { this.value = v; }
-    private TRITONSERVER_ModelControlMode(TRITONSERVER_ModelControlMode e) { this.value = e.value; }
-    public TRITONSERVER_ModelControlMode intern() { for (TRITONSERVER_ModelControlMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONSERVER_Message**") PointerPointer host_policy);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONSERVER_Message host_policy);
 
-/** Create a new server options object. The caller takes ownership of
- *  the TRITONSERVER_ServerOptions object and must call
- *  TRITONSERVER_ServerOptionsDelete to release the object.
+/** Whether the model instance is passive.
  * 
- *  @param options Returns the new server options object.
+ *  @param instance The model instance.
+ *  @param is_passive Returns true if the instance is passive, false otherwise
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
-    @Cast("TRITONSERVER_ServerOptions**") PointerPointer options);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
-    @ByPtrPtr TRITONSERVER_ServerOptions options);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
 
-/** Delete a server options object.
+/** Get the number of optimization profiles to be loaded for the instance.
  * 
- *  @param options The server options object.
+ *  @param instance The model instance.
+ *  @param count Returns the number of optimization profiles.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsDelete(
-    TRITONSERVER_ServerOptions options);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
 
-/** Set the textual ID for the server in a server options. The ID is a
- *  name that identifies the server.
+/** Get the name of optimization profile. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'instance'.
  * 
- *  @param options The server options object.
- *  @param server_id The server identifier.
+ *  @param instance The model instance.
+ *  @param index The index of the optimization profile. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_ModelInstanceProfileCount.
+ *  @param profile_name Returns the name of the optimization profile
+ *  corresponding to the index.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
-    TRITONSERVER_ServerOptions options, String server_id);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer server_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] profile_name);
 
-/** Set the model repository path in a server options. The path must be
- *  the full absolute path to the model repository. This function can be called
- *  multiple times with different paths to set multiple model repositories.
- *  Note that if a model is not unique across all model repositories
- *  at any time, the model will not be available.
+/** Get the model associated with a model instance.
  * 
- *  @param options The server options object.
- *  @param model_repository_path The full path to the model repository.
+ *  @param instance The model instance.
+ *  @param backend Returns the model object.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-    TRITONSERVER_ServerOptions options, String model_repository_path);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_repository_path);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Model**") PointerPointer model);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Model model);
 
-/** Set the model control mode in a server options. For each mode the models
- *  will be managed as the following:
- * 
- *    TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
- *    loaded on startup. After startup any changes to the model repository will
- *    be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
- *    an error.
- * 
- *    TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
- *    loaded on startup. The model repository can be polled periodically using
- *    TRITONSERVER_ServerPollModelRepository and the server will load, unload,
- *    and updated models according to changes in the model repository.
- * 
- *    TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
- *    not be loaded on startup. The corresponding model control APIs must be
- *    called to load / unload a model in the model repository.
+/** Get the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
  * 
- *  @param options The server options object.
- *  @param mode The mode to use for the model control.
+ *  @param instance The model instance.
+ *  @param state Returns the user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions options, TRITONSERVER_ModelControlMode mode);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_ModelControlMode") int mode);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") @ByPtrPtr Pointer state);
 
-/** Set the model to be loaded at startup in a server options. The model must be
- *  present in one, and only one, of the specified model repositories.
- *  This function can be called multiple times with different model name
- *  to set multiple startup models.
- *  Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
+/** Set the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
  * 
- *  @param options The server options object.
- *  @param mode_name The name of the model to load on startup.
+ *  @param instance The model instance.
+ *  @param state The user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
-    TRITONSERVER_ServerOptions options, String model_name);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_name);
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSetState(
+    TRITONBACKEND_ModelInstance instance, Pointer state);
 
-/** Enable or disable strict model configuration handling in a server
- *  options.
+/** Record statistics for an inference request.
  * 
- *  @param options The server options object.
- *  @param strict True to enable strict model configuration handling,
- *  false to disable.
+ *  Set 'success' true to indicate that the inference request
+ *  completed successfully. In this case all timestamps should be
+ *  non-zero values reported in nanoseconds and should be collected
+ *  using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  Set 'success' to false to indicate that the inference request failed
+ *  to complete successfully. In this case all timestamps values are
+ *  ignored.
+ * 
+ *  For consistency of measurement across different backends, the
+ *  timestamps should be collected at the following points during
+ *  TRITONBACKEND_ModelInstanceExecute.
+ * 
+ *    TRITONBACKEND_ModelInstanceExecute()
+ *      CAPTURE TIMESPACE (exec_start_ns)
+ *      < process input tensors to prepare them for inference
+ *        execution, including copying the tensors to/from GPU if
+ *        necessary>
+ *      CAPTURE TIMESPACE (compute_start_ns)
+ *      < perform inference computations to produce outputs >
+ *      CAPTURE TIMESPACE (compute_end_ns)
+ *      < allocate output buffers and extract output tensors, including
+ *        copying the tensors to/from GPU if necessary>
+ *      CAPTURE TIMESPACE (exec_end_ns)
+ *      return
+ * 
+ *  Note that these statistics are associated with a valid
+ *  TRITONBACKEND_Request object and so must be reported before the
+ *  request is released. For backends that release the request before
+ *  all response(s) are sent, these statistics cannot capture
+ *  information about the time required to produce the response.
+ * 
+ *  @param instance The model instance.
+ *  @param request The inference request that statistics are being
+ *  reported for.
+ *  @param success True if the inference request completed
+ *  successfully, false if it failed to complete.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictModelConfig(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportStatistics(
+    TRITONBACKEND_ModelInstance instance, TRITONBACKEND_Request request,
+    @Cast("const bool") boolean success, @Cast("const uint64_t") long exec_start_ns,
+    @Cast("const uint64_t") long compute_start_ns, @Cast("const uint64_t") long compute_end_ns,
+    @Cast("const uint64_t") long exec_end_ns);
 
-/** Set the total pinned memory byte size that the server can allocate
- *  in a server options. The pinned memory pool will be shared across
- *  Triton itself and the backends that use
- *  TRITONBACKEND_MemoryManager to allocate memory.
+/** Record statistics for the execution of an entire batch of
+ *  inference requests.
  * 
- *  @param options The server options object.
- *  @param size The pinned memory pool byte size.
+ *  All timestamps should be non-zero values reported in nanoseconds
+ *  and should be collected using
+ *  std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  See TRITONBACKEND_ModelInstanceReportStatistics for more information about
+ *  the timestamps.
+ * 
+ *  'batch_size' is the sum of the batch sizes for the individual
+ *  requests that were delivered together in the call to
+ *  TRITONBACKEND_ModelInstanceExecute. For example, if three requests
+ *  are passed to TRITONBACKEND_ModelInstanceExecute and those
+ *  requests have batch size 1, 2, and 3; then 'batch_size' should be
+ *  set to 6.
+ * 
+ *  @param instance The model instance.
+ *  @param batch_size Combined batch size of all the individual
+ *  requests executed in the batch.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions options, @Cast("uint64_t") long size);
 
-/** Set the total CUDA memory byte size that the server can allocate
- *  on given GPU device in a server options. The pinned memory pool
- *  will be shared across Triton itself and the backends that use
- *  TRITONBACKEND_MemoryManager to allocate memory.
- * 
- *  @param options The server options object.
- *  @param gpu_device The GPU device to allocate the memory pool.
- *  @param size The CUDA memory pool byte size.
- *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
-    TRITONSERVER_ServerOptions options, int gpu_device, @Cast("uint64_t") long size);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportBatchStatistics(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint64_t") long batch_size,
+    @Cast("const uint64_t") long exec_start_ns, @Cast("const uint64_t") long compute_start_ns,
+    @Cast("const uint64_t") long compute_end_ns, @Cast("const uint64_t") long exec_end_ns);
 
-/** Set the minimum support CUDA compute capability in a server
- *  options.
+
+/**
+ *  The following functions can be implemented by a backend. Functions
+ *  indicated as required must be implemented or the backend will fail
+ *  to load.
  * 
- *  @param options The server options object.
- *  @param cc The minimum CUDA compute capability.
+ <p>
+ *  Initialize a backend. This function is optional, a backend is not
+ *  required to implement it. This function is called once when a
+ *  backend is loaded to allow the backend to initialize any state
+ *  associated with the backend. A backend has a single state that is
+ *  shared across all models that use the backend.
+ * 
+ *  @param backend The backend.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-    TRITONSERVER_ServerOptions options, double cc);
+public static native TRITONSERVER_Error TRITONBACKEND_Initialize(
+    TRITONBACKEND_Backend backend);
 
-/** Enable or disable exit-on-error in a server options.
+/** Finalize for a backend. This function is optional, a backend is
+ *  not required to implement it. This function is called once, just
+ *  before the backend is unloaded. All state associated with the
+ *  backend should be freed and any threads created for the backend
+ *  should be exited/joined before returning from this function.
  * 
- *  @param options The server options object.
- *  @param exit True to enable exiting on intialization error, false
- *  to continue.
+ *  @param backend The backend.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitOnError(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean exit);
+public static native TRITONSERVER_Error TRITONBACKEND_Finalize(
+    TRITONBACKEND_Backend backend);
 
-/** Enable or disable strict readiness handling in a server options.
+/** Initialize for a model. This function is optional, a backend is
+ *  not required to implement it. This function is called once when a
+ *  model that uses the backend is loaded to allow the backend to
+ *  initialize any state associated with the model. The backend should
+ *  also examine the model configuration to determine if the
+ *  configuration is suitable for the backend. Any errors reported by
+ *  this function will prevent the model from loading.
  * 
- *  @param options The server options object.
- *  @param strict True to enable strict readiness handling, false to
- *  disable.
+ *  @param model The model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictReadiness(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInitialize(
+    TRITONBACKEND_Model model);
 
-/** Set the exit timeout, in seconds, for the server in a server
- *  options.
+/** Finalize for a model. This function is optional, a backend is not
+ *  required to implement it. This function is called once for a
+ *  model, just before the model is unloaded from Triton. All state
+ *  associated with the model should be freed and any threads created
+ *  for the model should be exited/joined before returning from this
+ *  function.
  * 
- *  @param options The server options object.
- *  @param timeout The exit timeout, in seconds.
+ *  @param model The model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitTimeout(
-    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int timeout);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelFinalize(
+    TRITONBACKEND_Model model);
 
-/** Set the number of threads used in buffer manager in a server options.
+/** Initialize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once when a model instance is created to allow the backend to
+ *  initialize any state associated with the instance.
  * 
- *  @param thread_count The number of threads.
+ *  @param instance The model instance.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
-    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int thread_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceInitialize(
+    TRITONBACKEND_ModelInstance instance);
 
-/** Enable or disable info level logging.
+/** Finalize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once for an instance, just before the corresponding model is
+ *  unloaded from Triton. All state associated with the instance
+ *  should be freed and any threads created for the instance should be
+ *  exited/joined before returning from this function.
  * 
- *  @param options The server options object.
- *  @param log True to enable info logging, false to disable.
+ *  @param instance The model instance.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogInfo(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceFinalize(
+    TRITONBACKEND_ModelInstance instance);
 
-/** Enable or disable warning level logging.
+/** Execute a batch of one or more requests on a model instance. This
+ *  function is required. Triton will not perform multiple
+ *  simultaneous calls to this function for a given model 'instance';
+ *  however, there may be simultaneous calls for different model
+ *  instances (for the same or different models).
  * 
- *  @param options The server options object.
- *  @param log True to enable warning logging, false to disable.
+ *  If an error is returned the ownership of the request objects
+ *  remains with Triton and the backend must not retain references to
+ *  the request objects or access them in any way.
+ * 
+ *  If success is returned, ownership of the request objects is
+ *  transferred to the backend and it is then responsible for creating
+ *  responses and releasing the request objects.
+ * 
+ *  @param instance The model instance.
+ *  @param requests The requests.
+ *  @param request_count The number of requests in the batch.
  *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Request**") PointerPointer requests,
+    @Cast("const uint32_t") int request_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Request requests,
+    @Cast("const uint32_t") int request_count);
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogWarn(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
 
-/** Enable or disable error level logging.
- * 
- *  @param options The server options object.
- *  @param log True to enable error logging, false to disable.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+// #ifdef __cplusplus
+// #endif
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogError(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
 
-/** Set verbose logging level. Level zero disables verbose logging.
- * 
- *  @param options The server options object.
- *  @param level The verbose logging level.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+// Parsed from tritonrepoagent.h
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogVerbose(
-    TRITONSERVER_ServerOptions options, int level);
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+// #include "triton/core/tritonserver.h"
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONREPOAGENT
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllimport)
+// #elif defined(__GNUC__)
+// #define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONREPOAGENT_ISPEC
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllexport)
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// Targeting ../tritonserver/TRITONREPOAGENT_Agent.java
 
-/** Enable or disable metrics collection in a server options.
- * 
- *  @param options The server options object.
- *  @param metrics True to enable metrics, false to disable.
- *  @return a TRITONSERVER_Error indicating success or failure. */
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMetrics(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean metrics);
+// Targeting ../tritonserver/TRITONREPOAGENT_AgentModel.java
 
-/** Enable or disable GPU metrics collection in a server options. GPU
- *  metrics are collected if both this option and
- *  TRITONSERVER_ServerOptionsSetMetrics are true.
- * 
- *  @param options The server options object.
- *  @param gpu_metrics True to enable GPU metrics, false to disable.
- *  @return a TRITONSERVER_Error indicating success or failure. */
 
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetGpuMetrics(
-    TRITONSERVER_ServerOptions options, @Cast("bool") boolean gpu_metrics);
 
-/** Set the directory containing backend shared libraries. This
- *  directory is searched last after the version and model directory
- *  in the model repository when looking for the backend shared
- *  library for a model. If the backend is named 'be' the directory
- *  searched is 'backend_dir'/be/libtriton_be.so.
+/**
+ *  TRITONREPOAGENT API Version
  * 
- *  @param options The server options object.
- *  @param backend_dir The full path of the backend directory.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
-    TRITONSERVER_ServerOptions options, String backend_dir);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_dir);
-
-/** Set the directory containing repository agent shared libraries. This
- *  directory is searched when looking for the repository agent shared
- *  library for a model. If the backend is named 'ra' the directory
- *  searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
+ *  The TRITONREPOAGENT API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  repository agent should check that the API version used to compile
+ *  the agent is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the agent.
  * 
- *  @param options The server options object.
- *  @param repoagent_dir The full path of the repository agent directory.
- *  @return a TRITONSERVER_Error indicating success or failure. */
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton repository agent API version does not support this agent");
+ *    }
+ *  */
+public static final int TRITONREPOAGENT_API_VERSION_MAJOR = 0;
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-    TRITONSERVER_ServerOptions options, String repoagent_dir);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer repoagent_dir);
+public static final int TRITONREPOAGENT_API_VERSION_MINOR = 1;
 
-/** Set a configuration setting for a named backend in a server
- *  options.
+/** Get the TRITONREPOAGENT API version supported by Triton. This
+ *  value can be compared against the
+ *  TRITONREPOAGENT_API_VERSION_MAJOR and
+ *  TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
+ *  ensure that Triton is compatible with the agent.
  * 
- *  @param options The server options object.
- *  @param backend_name The name of the backend.
- *  @param setting The name of the setting.
- *  @param value The setting value.
+ *  @param major Returns the TRITONREPOAGENT API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONREPOAGENT API minor version supported
+ *  by Triton.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
-    TRITONSERVER_ServerOptions options, String backend_name,
-    String setting, String value);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_name,
-    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
-
-/** Set a host policy setting for a given policy name in a server options.
- * 
- *  @param options The server options object.
- *  @param policy_name The name of the policy.
- *  @param setting The name of the setting.
- *  @param value The setting value.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
-    TRITONSERVER_ServerOptions options, String policy_name,
-    String setting, String value);
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
-    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer policy_name,
-    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
 
-/** TRITONSERVER_Server
+/** TRITONREPOAGENT_ArtifactType
  * 
- *  An inference server.
+ *  The ways that the files that make up a model's repository content
+ *  are communicated between Triton and the agent.
  * 
- <p>
- *  Model batch flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_ModelBatchFlag {
-  TRITONSERVER_BATCH_UNKNOWN(1),
-  TRITONSERVER_BATCH_FIRST_DIM(2);
-
-    public final int value;
-    private TRITONSERVER_ModelBatchFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelBatchFlag(TRITONSERVER_ModelBatchFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelBatchFlag intern() { for (TRITONSERVER_ModelBatchFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
-
-/** Model index flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_ModelIndexFlag {
-  TRITONSERVER_INDEX_FLAG_READY(1);
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a locally
+ *      accessible filesystem. The agent can access these files using
+ *      an appropriate filesystem API.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a remote filesystem.
+ *      The remote filesystem path follows the same convention as is used for
+ *      repository paths, for example, "s3://" prefix indicates an S3 path.
+ *  */
+public enum TRITONREPOAGENT_ArtifactType {
+  TRITONREPOAGENT_ARTIFACT_FILESYSTEM(0),
+  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM(1);
 
     public final int value;
-    private TRITONSERVER_ModelIndexFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelIndexFlag(TRITONSERVER_ModelIndexFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelIndexFlag intern() { for (TRITONSERVER_ModelIndexFlag e : values()) if (e.value == value) return e; return this; }
+    private TRITONREPOAGENT_ArtifactType(int v) { this.value = v; }
+    private TRITONREPOAGENT_ArtifactType(TRITONREPOAGENT_ArtifactType e) { this.value = e.value; }
+    public TRITONREPOAGENT_ArtifactType intern() { for (TRITONREPOAGENT_ArtifactType e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-/** Model transaction policy flags. The enum values must be
- *  power-of-2 values. */
-public enum TRITONSERVER_ModelTxnPropertyFlag {
-  TRITONSERVER_TXN_ONE_TO_ONE(1),
-  TRITONSERVER_TXN_DECOUPLED(2);
+/** TRITONREPOAGENT_ActionType
+ * 
+ *  Types of repository actions that can be handled by an agent.
+ *  The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
+ *  TRITONREPOAGENT_ModelInitialize and ends with a call to
+ *  TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
+ *  state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
+ *  Possible lifecycles are:
+ * 
+ *  LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
+ *  LOAD -> LOAD_FAIL
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
+ *      successfully and the model is now loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
+ *      successfully. The model is not loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
+ *  */
+public enum TRITONREPOAGENT_ActionType {
+  TRITONREPOAGENT_ACTION_LOAD(0),
+  TRITONREPOAGENT_ACTION_LOAD_COMPLETE(1),
+  TRITONREPOAGENT_ACTION_LOAD_FAIL(2),
+  TRITONREPOAGENT_ACTION_UNLOAD(3),
+  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE(4);
 
     public final int value;
-    private TRITONSERVER_ModelTxnPropertyFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelTxnPropertyFlag(TRITONSERVER_ModelTxnPropertyFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelTxnPropertyFlag intern() { for (TRITONSERVER_ModelTxnPropertyFlag e : values()) if (e.value == value) return e; return this; }
+    private TRITONREPOAGENT_ActionType(int v) { this.value = v; }
+    private TRITONREPOAGENT_ActionType(TRITONREPOAGENT_ActionType e) { this.value = e.value; }
+    public TRITONREPOAGENT_ActionType intern() { for (TRITONREPOAGENT_ActionType e : values()) if (e.value == value) return e; return this; }
     @Override public String toString() { return intern().name(); }
 }
 
-/** Create a new server object. The caller takes ownership of the
- *  TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
- *  to release the object.
- * 
- *  @param server Returns the new inference server object.
- *  @param options The inference server options object.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
-    @Cast("TRITONSERVER_Server**") PointerPointer server, TRITONSERVER_ServerOptions options);
-public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
-    @ByPtrPtr TRITONSERVER_Server server, TRITONSERVER_ServerOptions options);
-
-/** Delete a server object. If server is not already stopped it is
- *  stopped before being deleted.
- * 
- *  @param server The inference server object.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerDelete(
-    TRITONSERVER_Server server);
-
-/** Stop a server object. A server can't be restarted once it is
- *  stopped.
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to the agent as indicated by 'artifact_type'.
  * 
- *  @param server The inference server object.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerStop(
-    TRITONSERVER_Server server);
-
-/** Check the model repository for changes and update server state
- *  based on those changes.
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to the agent via the local
+ *      filesytem. 'location' returns the full path to the directory
+ *      in the model repository that contains the model's
+ *      artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The
+ *      contents of the directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents. Use
+ *      TRITONREPOAGENT_RepositoryAcquire to get a location that can be
+ *      used to modify the model repository contents.
  * 
- *  @param server The inference server object.
- *  @return a TRITONSERVER_Error indicating success or failure. */
-
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server server);
-
-/** Is the server live?
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to the agent via a remote filesystem.
+ *      'location' returns the full path to the remote directory that contains
+ *      the model's artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The contents of
+ *      the remote directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents.
+ *      Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
+ *      that can be used to write updated model repository contents.
  * 
- *  @param server The inference server object.
- *  @param live Returns true if server is live, false otherwise.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the location.
+ *  @param path Returns the location.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
-    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
-    TRITONSERVER_Server server, @Cast("bool*") boolean[] live);
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntBuffer artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") int[] artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
 
-/** Is the server ready?
+/** Acquire a location where the agent can produce a new version of
+ *  the model repository files. This is a convenience method to create
+ *  a temporary directory for the agent. The agent is responsible for
+ *  calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
+ *  TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
+ *  acquired location is empty. The 'location' communicated depends on
+ *  the requested 'artifact_type'.
  * 
- *  @param server The inference server object.
- *  @param ready Returns true if server is ready, false otherwise.
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
+ *      on the local filesystem. 'location' returns the full path to
+ *      an empty directory that the agent should populate with the
+ *      model's artifacts. The returned location string is owned by
+ *      Triton, not the agent, and so should not be modified or freed.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
-    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
-    TRITONSERVER_Server server, @Cast("bool*") boolean[] ready);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
 
-/** Is the model ready?
+/** Discard and release ownership of a previously acquired location
+ *  and its contents. The agent must not access or modify the location
+ *  or its contents after this call.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model to get readiness for.
- *  @param model_version The version of the model to get readiness
- *  for.  If -1 then the server will choose a version based on the
- *  model's policy.
- *  @param ready Returns true if server is ready, false otherwise.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param path The location to release.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const char*") BytePointer location);
 
-/** Get the batch properties of the model. The properties are
- *  communicated by a flags value and an (optional) object returned by
- *  'voidp'.
+/** Inform Triton that the specified repository location should be used for
+ *  the model in place of the original model repository. This method can only be
+ *  called when TRITONREPOAGENT_ModelAction is invoked with
+ *  TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
+ *  communicated depends on how the repository is being
+ *  communicated to Triton as indicated by 'artifact_type'.
  * 
- *    - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
- *      batching properties of the model. This means that the model
- *      does not support batching in any way that is useable by
- *      Triton. The returned 'voidp' value is nullptr.
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to Triton via the local filesytem. 'location' returns
+ *      the full path to the directory. Ownership of the contents of the
+ *      returned directory are transferred to Triton and the agent should not
+ *      modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
+ *      The local filesystem directory can be created using
+ *      TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
+ *      its own local filesystem API.
  * 
- *    - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
- *      along the first dimension of every input and output
- *      tensor. Triton schedulers that perform batching can
- *      automatically batch inference requests along this dimension.
- *      The returned 'voidp' value is nullptr.
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to Triton via a remote filesystem. 'location' returns
+ *      the full path to the remote filesystem directory. Ownership of the
+ *      contents of the returned directory are transferred to Triton and
+ *      the agent should not modified or freed the contents until
+ *      TRITONREPOAGENT_ModelFinalize.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
- *  @param model_version The version of the model.  If -1 then the
- *  server will choose a version based on the model's policy.
- *  @param flags Returns flags indicating the batch properties of the
- *  model.
- *  @param voidp If non-nullptr, returns a point specific to the
- *  'flags' value.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") PointerPointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ArtifactType artifact_type, String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
 
-/** Get the transaction policy of the model. The policy is
- *  communicated by a flags value.
- * 
- *    - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
- *      one response per request.
+/** Get the number of agent parameters defined for a model.
  * 
- *    - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
- *      to many responses per request.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") int[] count);
+
+/** Get a parameter name and value. The caller does not own the
+ *  returned strings and must not modify or delete them.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
- *  @param model_version The version of the model.  If -1 then the
- *  server will choose a version based on the model's policy.
- *  @param txn_flags Returns flags indicating the transaction policy of the
- *  model.
- *  @param voidp If non-nullptr, returns a point specific to the 'flags' value.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param index The index of the parameter. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONREPOAGENT_ModelParameterCount.
+ *  @param parameter_name Returns the name of the parameter.
+ *  @param parameter_value Returns the value of the parameter.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") PointerPointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") PointerPointer parameter_name,
+    @Cast("const char**") PointerPointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer parameter_name,
+    @Cast("const char**") @ByPtrPtr BytePointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer parameter_name,
+    @Cast("const char**") @ByPtrPtr ByteBuffer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] parameter_name,
+    @Cast("const char**") @ByPtrPtr byte[] parameter_value);
 
-/** Get the metadata of the server as a TRITONSERVER_Message object.
- *  The caller takes ownership of the message object and must call
- *  TRITONSERVER_MessageDelete to release the object.
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. If the model repository does not contain a
+ *  config.pbtxt file then 'model_config' is returned as nullptr.
  * 
- *  @param server The inference server object.
- *  @param server_metadata Returns the server metadata message.
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
-    TRITONSERVER_Server server, @Cast("TRITONSERVER_Message**") PointerPointer server_metadata);
-public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
-    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Message server_metadata);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @ByPtrPtr TRITONSERVER_Message model_config);
 
-/** Get the metadata of a model as a TRITONSERVER_Message
- *  object.  The caller takes ownership of the message object and must
- *  call TRITONSERVER_MessageDelete to release the object.
+/** Get the user-specified state associated with the model.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
- *  @param model_version The version of the model.
- *  If -1 then the server will choose a version based on the model's
- *  policy.
- *  @param model_metadata Returns the model metadata message.
+ *  @param model The agent model.
+ *  @param state Returns the user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_metadata);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") @ByPtrPtr Pointer state);
 
-/** Get the statistics of a model as a TRITONSERVER_Message
- *  object. The caller takes ownership of the object and must call
- *  TRITONSERVER_MessageDelete to release the object.
+/** Set the user-specified state associated with the model.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
- *  If empty, then statistics for all available models will be returned,
- *  and the server will choose a version based on those models' policies.
- *  @param model_version The version of the model.  If -1 then the
- *  server will choose a version based on the model's policy.
- *  @param model_stats Returns the model statistics message.
+ *  @param model The agent model.
+ *  @param state The user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_stats);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelSetState(
+    TRITONREPOAGENT_AgentModel model, Pointer state);
 
-/** Get the configuration of a model as a TRITONSERVER_Message object.
- *  The caller takes ownership of the message object and must call
- *  TRITONSERVER_MessageDelete to release the object.
+/** Get the user-specified state associated with the agent.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
- *  @param model_version The version of the model.  If -1 then the
- *  server will choose a version based on the model's policy.
- *  @param config_version The model configuration will be returned in
- *  a format matching this version. If the configuration cannot be
- *  represented in the requested version's format then an error will
- *  be returned. Currently only version 1 is supported.
- *  @param model_config Returns the model config message.
+ *  @param agent The agent.
+ *  @param state Returns the user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
-    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
-    @ByPtrPtr TRITONSERVER_Message model_config);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
-    @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") @ByPtrPtr Pointer state);
 
-/** Get the index of all unique models in the model repositories as a
- *  TRITONSERVER_Message object. The caller takes ownership of the
- *  message object and must call TRITONSERVER_MessageDelete to release
- *  the object.
- * 
- *  If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
- *  that are loaded into the server and ready for inferencing are
- *  returned.
+/** Set the user-specified state associated with the agent.
  * 
- *  @param server The inference server object.
- *  @param flags TRITONSERVER_ModelIndexFlag flags that control how to
- *  collect the index.
- *  @param model_index Return the model index message that holds the
- *  index of all models contained in the server's model repository(s).
+ *  @param agent The agent.
+ *  @param state The user state, or nullptr if no user state.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
-    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
-    @Cast("TRITONSERVER_Message**") PointerPointer model_index);
-public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
-    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
-    @ByPtrPtr TRITONSERVER_Message model_index);
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_SetState(
+    TRITONREPOAGENT_Agent agent, Pointer state);
 
-/** Load the requested model or reload the model if it is already
- *  loaded. The function does not return until the model is loaded or
- *  fails to load. Returned error indicates if model loaded
- *  successfully or not.
+/**
+ *  The following functions can be implemented by an agent. Functions
+ *  indicated as required must be implemented or the agent will fail
+ *  to load.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
+ <p>
+ *  Initialize an agent. This function is optional. This function is
+ *  called once when an agent is loaded to allow the agent to
+ *  initialize any state associated with the agent. An agent has a
+ *  single state that is shared across all invocations of the agent.
+ * 
+ *  @param agent The agent.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
-    TRITONSERVER_Server server, String model_name);
-public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+public static native TRITONSERVER_Error TRITONREPOAGENT_Initialize(
+    TRITONREPOAGENT_Agent agent);
 
-/** Unload the requested model. Unloading a model that is not loaded
- *  on server has no affect and success code will be returned.
- *  The function does not wait for the requested model to be fully unload
- *  and success code will be returned.
- *  Returned error indicates if model unloaded successfully or not.
+/** Finalize for an agent. This function is optional. This function is
+ *  called once, just before the agent is unloaded. All state
+ *  associated with the agent should be freed and any threads created
+ *  for the agent should be exited/joined before returning from this
+ *  function.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
+ *  @param agent The agent.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
-    TRITONSERVER_Server server, String model_name);
-public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+public static native TRITONSERVER_Error TRITONREPOAGENT_Finalize(
+    TRITONREPOAGENT_Agent agent);
 
-/** Unload the requested model, and also unload any dependent model that
- *  was loaded along with the requested model (for example, the models composing
- *  an ensemble). Unloading a model that is not loaded
- *  on server has no affect and success code will be returned.
- *  The function does not wait for the requested model and all dependent
- *  models to be fully unload and success code will be returned.
- *  Returned error indicates if model unloaded successfully or not.
+/** Initialize a model associated with an agent. This function is optional.
+ *  This function is called once when an agent model's lifecycle begins to allow
+ *  the agent model to initialize any state associated with it. An agent model
+ *  has a single state that is shared across all the lifecycle of the agent
+ *  model.
  * 
- *  @param server The inference server object.
- *  @param model_name The name of the model.
+ *  @param agent The agent to be associated with the model.
+ *  @param model The model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
-    TRITONSERVER_Server server, String model_name);
-public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
-    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelInitialize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
 
-/** Get the current metrics for the server. The caller takes ownership
- *  of the metrics object and must call TRITONSERVER_MetricsDelete to
- *  release the object.
+/** Finalize for a model. This function is optional. This function is
+ *  called once, just before the end of the agent model's lifecycle. All state
+ *  associated with the agent model should be freed and any threads created
+ *  for the agent model should be exited/joined before returning from this
+ *  function. If the model acquired a model location using
+ *  TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
+ *  TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
  * 
- *  @param server The inference server object.
- *  @param metrics Returns the metrics.
+ *  @param agent The agent associated with the model.
+ *  @param model The model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
-    TRITONSERVER_Server server, @Cast("TRITONSERVER_Metrics**") PointerPointer metrics);
-public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
-    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Metrics metrics);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelFinalize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
 
-/** Perform inference using the meta-data and inputs supplied by the
- *  'inference_request'. If the function returns success, then the
- *  caller releases ownership of 'inference_request' and must not
- *  access it in any way after this call, until ownership is returned
- *  via the 'request_release_fn' callback registered in the request
- *  object with TRITONSERVER_InferenceRequestSetReleaseCallback.
+/** Handle an action for a specified model. This function is
+ *  required. Triton will not perform multiple simultaneous calls to
+ *  this function for a given agent and model; however, there may be
+ *  simultaneous calls for the agent for different models.
  * 
- *  The function unconditionally takes ownership of 'trace' and so the
- *  caller must not access it in any way after this call (except in
- *  the trace id callback) until ownership is returned via the trace's
- *  release_fn callback.
+ *  If the agent does not handle the action the agent should
+ *  immediately return success (nullptr).
  * 
- *  Responses produced for this request are returned using the
- *  allocator and callback registered with the request by
- *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  Any modification to the model's repository must be made when 'action_type'
+ *  is TRITONREPOAGENT_ACTION_LOAD.
+ *  To modify the model's repository the agent must either acquire a mutable
+ *  location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
+ *  or its own managed location, report the location to Triton via
+ *  TRITONREPOAGENT_ModelRepositoryUpdate, and then return
+ *  success (nullptr). If the agent does not need to make any changes
+ *  to the model repository it should not call
+ *  TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
+ *  To indicate that a model load should fail return a non-success status.
  * 
- *  @param server The inference server object.
- *  @param inference_request The request object.
- *  @param trace The trace object for this request, or nullptr if no
- *  tracing.
+ *  @param agent The agent.
+ *  @param model The model that is the target of the action.
+ *  \action_type The type of action the agent should handle for the model.
  *  @return a TRITONSERVER_Error indicating success or failure. */
-public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
-    TRITONSERVER_Server server,
-    TRITONSERVER_InferenceRequest inference_request,
-    TRITONSERVER_InferenceTrace trace);
-
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    TRITONREPOAGENT_ActionType action_type);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ActionType") int action_type);
 
 // #ifdef __cplusplus
 // #endif
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
index 60af6d7af8e..87049e6ed6c 100644
--- a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -51,7 +51,7 @@
         @Platform(
             value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"},
             compiler = "cpp11",
-            include = {"tritonbackend.h", "tritonrepoagent.h", "tritonserver.h"},
+            include = {"tritonserver.h", "tritonbackend.h", "tritonrepoagent.h"},
 			link = "tritonserver"
         ),
         @Platform(
@@ -121,9 +121,6 @@ public void map(InfoMap infoMap) {
 			   .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
 			   .put(new Info("TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
 			   .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
-			   //.put(new Info("TRITONSERVER_datatype_enum").pointerTypes("TRITONSERVER_DataType"))
-			   //.put(new Info("TRITONSERVER_DataType").valueTypes("TRITONSERVER_DataType").pointerTypes("@Cast(\"TRITONSERVER_DataType*\") PointerPointer", "@ByPtrPtr TRITONSERVER_DataType"))
-			   
         ;
     }
 }

From f802266b5a67357d1be9a8e068d3e53ccd330be3 Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Mon, 11 Oct 2021 20:19:42 +0800
Subject: [PATCH 06/21] added sample

---
 tritonserver/samples/simple.cc   | 928 ++++++++++++++++++++++++++++++
 tritonserver/samples/simple.java | 956 +++++++++++++++++++++++++++++++
 2 files changed, 1884 insertions(+)
 create mode 100644 tritonserver/samples/simple.cc
 create mode 100644 tritonserver/samples/simple.java

diff --git a/tritonserver/samples/simple.cc b/tritonserver/samples/simple.cc
new file mode 100644
index 00000000000..41538abe35f
--- /dev/null
+++ b/tritonserver/samples/simple.cc
@@ -0,0 +1,928 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <rapidjson/document.h>
+#include <rapidjson/error/en.h>
+#include <unistd.h>
+#include <chrono>
+#include <cstring>
+#include <future>
+#include <iostream>
+#include <string>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+#include "src/servers/common.h"
+#include "triton/core/tritonserver.h"
+
+#ifdef TRITON_ENABLE_GPU
+#include <cuda_runtime_api.h>
+#endif  // TRITON_ENABLE_GPU
+
+namespace ni = nvidia::inferenceserver;
+
+namespace {
+
+bool enforce_memory_type = false;
+TRITONSERVER_MemoryType requested_memory_type;
+
+#ifdef TRITON_ENABLE_GPU
+static auto cuda_data_deleter = [](void* data) {
+  if (data != nullptr) {
+    cudaPointerAttributes attr;
+    auto cuerr = cudaPointerGetAttributes(&attr, data);
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to get CUDA pointer attribute of " << data
+                << ": " << cudaGetErrorString(cuerr) << std::endl;
+    }
+    if (attr.type == cudaMemoryTypeDevice) {
+      cuerr = cudaFree(data);
+    } else if (attr.type == cudaMemoryTypeHost) {
+      cuerr = cudaFreeHost(data);
+    }
+    if (cuerr != cudaSuccess) {
+      std::cerr << "error: failed to release CUDA pointer " << data << ": "
+                << cudaGetErrorString(cuerr) << std::endl;
+    }
+  }
+};
+#endif  // TRITON_ENABLE_GPU
+
+void
+Usage(char** argv, const std::string& msg = std::string())
+{
+  if (!msg.empty()) {
+    std::cerr << msg << std::endl;
+  }
+
+  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
+  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
+            << " Enforce the memory type for input and output tensors."
+            << " If not specified, inputs will be in system memory and outputs"
+            << " will be based on the model's preferred type." << std::endl;
+  std::cerr << "\t-v Enable verbose logging" << std::endl;
+  std::cerr << "\t-r [model repository absolute path]" << std::endl;
+
+  exit(1);
+}
+
+TRITONSERVER_Error*
+ResponseAlloc(
+    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
+    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+    int64_t preferred_memory_type_id, void* userp, void** buffer,
+    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
+    int64_t* actual_memory_type_id)
+{
+  // Initially attempt to make the actual memory type and id that we
+  // allocate be the same as preferred memory type
+  *actual_memory_type = preferred_memory_type;
+  *actual_memory_type_id = preferred_memory_type_id;
+
+  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+  // need to do any other book-keeping.
+  if (byte_size == 0) {
+    *buffer = nullptr;
+    *buffer_userp = nullptr;
+    std::cout << "allocated " << byte_size << " bytes for result tensor "
+              << tensor_name << std::endl;
+  } else {
+    void* allocated_ptr = nullptr;
+    if (enforce_memory_type) {
+      *actual_memory_type = requested_memory_type;
+    }
+
+    switch (*actual_memory_type) {
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaHostAlloc failed: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_GPU: {
+        auto err = cudaSetDevice(*actual_memory_type_id);
+        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+            (err != cudaErrorInsufficientDriver)) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "unable to recover current CUDA device: " +
+                  std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+
+        err = cudaMalloc(&allocated_ptr, byte_size);
+        if (err != cudaSuccess) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INTERNAL,
+              std::string(
+                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
+                  .c_str());
+        }
+        break;
+      }
+#endif  // TRITON_ENABLE_GPU
+
+      // Use CPU memory if the requested memory type is unknown
+      // (default case).
+      case TRITONSERVER_MEMORY_CPU:
+      default: {
+        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
+        allocated_ptr = malloc(byte_size);
+        break;
+      }
+    }
+
+    // Pass the tensor name with buffer_userp so we can show it when
+    // releasing the buffer.
+    if (allocated_ptr != nullptr) {
+      *buffer = allocated_ptr;
+      *buffer_userp = new std::string(tensor_name);
+      std::cout << "allocated " << byte_size << " bytes in "
+                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
+                << " for result tensor " << tensor_name << std::endl;
+    }
+  }
+
+  return nullptr;  // Success
+}
+
+TRITONSERVER_Error*
+ResponseRelease(
+    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
+    size_t byte_size, TRITONSERVER_MemoryType memory_type,
+    int64_t memory_type_id)
+{
+  std::string* name = nullptr;
+  if (buffer_userp != nullptr) {
+    name = reinterpret_cast<std::string*>(buffer_userp);
+  } else {
+    name = new std::string("<unknown>");
+  }
+
+  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
+            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
+            << " for result '" << *name << "'" << std::endl;
+  switch (memory_type) {
+    case TRITONSERVER_MEMORY_CPU:
+      free(buffer);
+      break;
+#ifdef TRITON_ENABLE_GPU
+    case TRITONSERVER_MEMORY_CPU_PINNED: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFreeHost(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+    case TRITONSERVER_MEMORY_GPU: {
+      auto err = cudaSetDevice(memory_type_id);
+      if (err == cudaSuccess) {
+        err = cudaFree(buffer);
+      }
+      if (err != cudaSuccess) {
+        std::cerr << "error: failed to cudaFree " << buffer << ": "
+                  << cudaGetErrorString(err) << std::endl;
+      }
+      break;
+    }
+#endif  // TRITON_ENABLE_GPU
+    default:
+      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
+                << std::endl;
+      break;
+  }
+
+  delete name;
+
+  return nullptr;  // Success
+}
+
+void
+InferRequestComplete(
+    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
+{
+  // We reuse the request so we don't delete it here.
+}
+
+void
+InferResponseComplete(
+    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
+{
+  if (response != nullptr) {
+    // Send 'response' to the future.
+    std::promise<TRITONSERVER_InferenceResponse*>* p =
+        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+    p->set_value(response);
+    delete p;
+  }
+}
+
+TRITONSERVER_Error*
+ParseModelMetadata(
+    const rapidjson::Document& model_metadata, bool* is_int,
+    bool* is_torch_model)
+{
+  std::string seen_data_type;
+  for (const auto& input : model_metadata["inputs"].GetArray()) {
+    if (strcmp(input["datatype"].GetString(), "INT32") &&
+        strcmp(input["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "simple lib example only supports model with data type INT32 or "
+          "FP32");
+    }
+    if (seen_data_type.empty()) {
+      seen_data_type = input["datatype"].GetString();
+    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of 'simple' model must have the data type");
+    }
+  }
+  for (const auto& output : model_metadata["outputs"].GetArray()) {
+    if (strcmp(output["datatype"].GetString(), "INT32") &&
+        strcmp(output["datatype"].GetString(), "FP32")) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_UNSUPPORTED,
+          "simple lib example only supports model with data type INT32 or "
+          "FP32");
+    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
+      return TRITONSERVER_ErrorNew(
+          TRITONSERVER_ERROR_INVALID_ARG,
+          "the inputs and outputs of 'simple' model must have the data type");
+    }
+  }
+
+  *is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
+  *is_torch_model =
+      (strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
+  return nullptr;
+}
+
+template <typename T>
+void
+GenerateInputData(
+    std::vector<char>* input0_data, std::vector<char>* input1_data)
+{
+  input0_data->resize(16 * sizeof(T));
+  input1_data->resize(16 * sizeof(T));
+  for (size_t i = 0; i < 16; ++i) {
+    ((T*)input0_data->data())[i] = i;
+    ((T*)input1_data->data())[i] = 1;
+  }
+}
+
+template <typename T>
+void
+CompareResult(
+    const std::string& output0_name, const std::string& output1_name,
+    const void* input0, const void* input1, const char* output0,
+    const char* output1)
+{
+  for (size_t i = 0; i < 16; ++i) {
+    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
+              << ((T*)output0)[i] << std::endl;
+    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
+              << ((T*)output1)[i] << std::endl;
+
+    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
+      FAIL("incorrect sum in " + output0_name);
+    }
+    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
+      FAIL("incorrect difference in " + output1_name);
+    }
+  }
+}
+
+void
+Check(
+    TRITONSERVER_InferenceResponse* response,
+    const std::vector<char>& input0_data, const std::vector<char>& input1_data,
+    const std::string& output0, const std::string& output1,
+    const size_t expected_byte_size,
+    const TRITONSERVER_DataType expected_datatype, const bool is_int)
+{
+  std::unordered_map<std::string, std::vector<char>> output_data;
+
+  uint32_t output_count;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
+      "getting number of response outputs");
+  if (output_count != 2) {
+    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
+  }
+
+  for (uint32_t idx = 0; idx < output_count; ++idx) {
+    const char* cname;
+    TRITONSERVER_DataType datatype;
+    const int64_t* shape;
+    uint64_t dim_count;
+    const void* base;
+    size_t byte_size;
+    TRITONSERVER_MemoryType memory_type;
+    int64_t memory_type_id;
+    void* userp;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseOutput(
+            response, idx, &cname, &datatype, &shape, &dim_count, &base,
+            &byte_size, &memory_type, &memory_type_id, &userp),
+        "getting output info");
+
+    if (cname == nullptr) {
+      FAIL("unable to get output name");
+    }
+
+    std::string name(cname);
+    if ((name != output0) && (name != output1)) {
+      FAIL("unexpected output '" + name + "'");
+    }
+
+    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+      FAIL("unexpected shape for '" + name + "'");
+    }
+
+    if (datatype != expected_datatype) {
+      FAIL(
+          "unexpected datatype '" +
+          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
+          name + "'");
+    }
+
+    if (byte_size != expected_byte_size) {
+      FAIL(
+          "unexpected byte-size, expected " +
+          std::to_string(expected_byte_size) + ", got " +
+          std::to_string(byte_size) + " for " + name);
+    }
+
+    if (enforce_memory_type && (memory_type != requested_memory_type)) {
+      FAIL(
+          "unexpected memory type, expected to be allocated in " +
+          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
+          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
+          ", id " + std::to_string(memory_type_id) + " for " + name);
+    }
+
+    // We make a copy of the data here... which we could avoid for
+    // performance reasons but ok for this simple example.
+    std::vector<char>& odata = output_data[name];
+    switch (memory_type) {
+      case TRITONSERVER_MEMORY_CPU: {
+        std::cout << name << " is stored in system memory" << std::endl;
+        const char* cbase = reinterpret_cast<const char*>(base);
+        odata.assign(cbase, cbase + byte_size);
+        break;
+      }
+
+      case TRITONSERVER_MEMORY_CPU_PINNED: {
+        std::cout << name << " is stored in pinned memory" << std::endl;
+        const char* cbase = reinterpret_cast<const char*>(base);
+        odata.assign(cbase, cbase + byte_size);
+        break;
+      }
+
+#ifdef TRITON_ENABLE_GPU
+      case TRITONSERVER_MEMORY_GPU: {
+        std::cout << name << " is stored in GPU memory" << std::endl;
+        odata.reserve(byte_size);
+        FAIL_IF_CUDA_ERR(
+            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
+            "getting " + name + " data from GPU memory");
+        break;
+      }
+#endif
+
+      default:
+        FAIL("unexpected memory type");
+    }
+  }
+
+  if (is_int) {
+    CompareResult<int32_t>(
+        output0, output1, &input0_data[0], &input1_data[0],
+        output_data[output0].data(), output_data[output1].data());
+  } else {
+    CompareResult<float>(
+        output0, output1, &input0_data[0], &input1_data[0],
+        output_data[output0].data(), output_data[output1].data());
+  }
+}
+
+}  // namespace
+
+int
+main(int argc, char** argv)
+{
+  std::string model_repository_path;
+  int verbose_level = 0;
+
+  // Parse commandline...
+  int opt;
+  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
+    switch (opt) {
+      case 'm': {
+        enforce_memory_type = true;
+        if (!strcmp(optarg, "system")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU;
+        } else if (!strcmp(optarg, "pinned")) {
+          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+        } else if (!strcmp(optarg, "gpu")) {
+          requested_memory_type = TRITONSERVER_MEMORY_GPU;
+        } else {
+          Usage(
+              argv,
+              "-m must be used to specify one of the following types:"
+              " <\"system\"|\"pinned\"|gpu>");
+        }
+        break;
+      }
+      case 'r':
+        model_repository_path = optarg;
+        break;
+      case 'v':
+        verbose_level = 1;
+        break;
+      case '?':
+        Usage(argv);
+        break;
+    }
+  }
+
+  if (model_repository_path.empty()) {
+    Usage(argv, "-r must be used to specify model repository path");
+  }
+#ifndef TRITON_ENABLE_GPU
+  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
+  }
+#endif  // TRITON_ENABLE_GPU
+
+  // Check API version.
+  uint32_t api_version_major, api_version_minor;
+  FAIL_IF_ERR(
+      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
+      "getting Triton API version");
+  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+    FAIL("triton server API version mismatch");
+  }
+
+  // Create the server...
+  TRITONSERVER_ServerOptions* server_options = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsNew(&server_options),
+      "creating server options");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+          server_options, model_repository_path.c_str()),
+      "setting model repository path");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+      "setting verbose logging level");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetBackendDirectory(
+          server_options, "/opt/tritonserver/backends"),
+      "setting backend directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+          server_options, "/opt/tritonserver/repoagents"),
+      "setting repository agent directory");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+      "setting strict model configuration");
+#ifdef TRITON_ENABLE_GPU
+  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+#else
+  double min_compute_capability = 0;
+#endif  // TRITON_ENABLE_GPU
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+          server_options, min_compute_capability),
+      "setting minimum supported CUDA compute capability");
+
+  TRITONSERVER_Server* server_ptr = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerNew(&server_ptr, server_options), "creating server");
+  FAIL_IF_ERR(
+      TRITONSERVER_ServerOptionsDelete(server_options),
+      "deleting server options");
+
+  std::shared_ptr<TRITONSERVER_Server> server(
+      server_ptr, TRITONSERVER_ServerDelete);
+
+  // Wait until the server is both live and ready.
+  size_t health_iters = 0;
+  while (true) {
+    bool live, ready;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsLive(server.get(), &live),
+        "unable to get server liveness");
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerIsReady(server.get(), &ready),
+        "unable to get server readiness");
+    std::cout << "Server Health: live " << live << ", ready " << ready
+              << std::endl;
+    if (live && ready) {
+      break;
+    }
+
+    if (++health_iters >= 10) {
+      FAIL("failed to find healthy inference server");
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(500));
+  }
+
+  // Print status of the server.
+  {
+    TRITONSERVER_Message* server_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
+        "unable to get server metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            server_metadata_message, &buffer, &byte_size),
+        "unable to serialize server metadata message");
+
+    std::cout << "Server Status:" << std::endl;
+    std::cout << std::string(buffer, byte_size) << std::endl;
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(server_metadata_message),
+        "deleting status metadata");
+  }
+
+  const std::string model_name("simple");
+
+  // Wait for the model to become available.
+  bool is_torch_model = false;
+  bool is_int = true;
+  bool is_ready = false;
+  health_iters = 0;
+  while (!is_ready) {
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelIsReady(
+            server.get(), model_name.c_str(), 1, &is_ready),
+        "unable to get model readiness");
+    if (!is_ready) {
+      if (++health_iters >= 10) {
+        FAIL("model failed to be ready in 10 iterations");
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(500));
+      continue;
+    }
+
+    TRITONSERVER_Message* model_metadata_message;
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerModelMetadata(
+            server.get(), model_name.c_str(), 1, &model_metadata_message),
+        "unable to get model metadata message");
+    const char* buffer;
+    size_t byte_size;
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageSerializeToJson(
+            model_metadata_message, &buffer, &byte_size),
+        "unable to serialize model status protobuf");
+
+    rapidjson::Document model_metadata;
+    model_metadata.Parse(buffer, byte_size);
+    if (model_metadata.HasParseError()) {
+      FAIL(
+          "error: failed to parse model metadata from JSON: " +
+          std::string(GetParseError_En(model_metadata.GetParseError())) +
+          " at " + std::to_string(model_metadata.GetErrorOffset()));
+    }
+
+    FAIL_IF_ERR(
+        TRITONSERVER_MessageDelete(model_metadata_message),
+        "deleting status protobuf");
+
+    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+      FAIL("unable to find metadata for model");
+    }
+
+    bool found_version = false;
+    if (model_metadata.HasMember("versions")) {
+      for (const auto& version : model_metadata["versions"].GetArray()) {
+        if (strcmp(version.GetString(), "1") == 0) {
+          found_version = true;
+          break;
+        }
+      }
+    }
+    if (!found_version) {
+      FAIL("unable to find version 1 status for model");
+    }
+
+    FAIL_IF_ERR(
+        ParseModelMetadata(model_metadata, &is_int, &is_torch_model),
+        "parsing model metadata");
+  }
+
+  // Create the allocator that will be used to allocate buffers for
+  // the result tensors.
+  TRITONSERVER_ResponseAllocator* allocator = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorNew(
+          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
+      "creating response allocator");
+
+  // Inference
+  TRITONSERVER_InferenceRequest* irequest = nullptr;
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestNew(
+          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
+      "creating inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+      "setting ID for the request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestSetReleaseCallback(
+          irequest, InferRequestComplete, nullptr /* request_release_userp */),
+      "setting request release callback");
+
+  // Inputs
+  auto input0 = is_torch_model ? "INPUT__0" : "INPUT0";
+  auto input1 = is_torch_model ? "INPUT__1" : "INPUT1";
+
+  std::vector<int64_t> input0_shape({1, 16});
+  std::vector<int64_t> input1_shape({1, 16});
+
+  const TRITONSERVER_DataType datatype =
+      (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
+      "setting input 0 meta-data for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddInput(
+          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
+      "setting input 1 meta-data for the request");
+
+  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
+  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+      "requesting output 0 for the request");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+      "requesting output 1 for the request");
+
+  // Create the data for the two input tensors. Initialize the first
+  // to unique values and the second to all ones.
+  std::vector<char> input0_data;
+  std::vector<char> input1_data;
+  if (is_int) {
+    GenerateInputData<int32_t>(&input0_data, &input1_data);
+  } else {
+    GenerateInputData<float>(&input0_data, &input1_data);
+  }
+
+  size_t input0_size = input0_data.size();
+  size_t input1_size = input1_data.size();
+
+  const void* input0_base = &input0_data[0];
+  const void* input1_base = &input1_data[0];
+#ifdef TRITON_ENABLE_GPU
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
+      nullptr, cuda_data_deleter);
+  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
+      nullptr, cuda_data_deleter);
+  bool use_cuda_memory =
+      (enforce_memory_type &&
+       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+  if (use_cuda_memory) {
+    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input0_size),
+          "allocating GPU memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+          "setting INPUT0 data in GPU memory");
+      FAIL_IF_CUDA_ERR(
+          cudaMalloc(&dst, input1_size),
+          "allocating GPU memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
+          "setting INPUT1 data in GPU memory");
+    } else {
+      void* dst;
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT0 data");
+      input0_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
+          "setting INPUT0 data in pinned memory");
+      FAIL_IF_CUDA_ERR(
+          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
+          "allocating pinned memory for INPUT1 data");
+      input1_gpu.reset(dst);
+      FAIL_IF_CUDA_ERR(
+          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
+          "setting INPUT1 data in pinned memory");
+    }
+  }
+
+  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
+  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
+#endif  // TRITON_ENABLE_GPU
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input0, input0_base, input0_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT0 data");
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestAppendInputData(
+          irequest, input1, input1_base, input1_size, requested_memory_type,
+          0 /* memory_type_id */),
+      "assigning INPUT1 data");
+
+  // Perform inference...
+  {
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    // Wait for the inference to complete.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    Check(
+        completed_response, input0_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  // Modify some input data in place and then reuse the request
+  // object. For simplicity we only do this when the input tensors are
+  // in non-pinned system memory.
+  if (!enforce_memory_type ||
+      (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+    if (is_int) {
+      int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
+      input0_base[0] = 27;
+    } else {
+      float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
+      input0_base[0] = 27.0;
+    }
+
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    // Using a new promise so have to re-register the callback to set
+    // the promise as the userp.
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    // Wait for the inference to complete.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    Check(
+        completed_response, input0_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  // Remove input data and then add back different data.
+  {
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+        "removing INPUT0 data");
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestAppendInputData(
+            irequest, input0, input1_base, input1_size, requested_memory_type,
+            0 /* memory_type_id */),
+        "assigning INPUT1 data to INPUT0");
+
+    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+
+    // Using a new promise so have to re-register the callback to set
+    // the promise as the userp.
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceRequestSetResponseCallback(
+            irequest, allocator, nullptr /* response_allocator_userp */,
+            InferResponseComplete, reinterpret_cast<void*>(p)),
+        "setting response callback");
+
+    FAIL_IF_ERR(
+        TRITONSERVER_ServerInferAsync(
+            server.get(), irequest, nullptr /* trace */),
+        "running inference");
+
+    // Wait for the inference to complete.
+    TRITONSERVER_InferenceResponse* completed_response = completed.get();
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseError(completed_response),
+        "response status");
+
+    // Both inputs are using input1_data...
+    Check(
+        completed_response, input1_data, input1_data, output0, output1,
+        input0_size, datatype, is_int);
+
+    FAIL_IF_ERR(
+        TRITONSERVER_InferenceResponseDelete(completed_response),
+        "deleting inference response");
+  }
+
+  FAIL_IF_ERR(
+      TRITONSERVER_InferenceRequestDelete(irequest),
+      "deleting inference request");
+
+  FAIL_IF_ERR(
+      TRITONSERVER_ResponseAllocatorDelete(allocator),
+      "deleting response allocator");
+
+  return 0;
+}
diff --git a/tritonserver/samples/simple.java b/tritonserver/samples/simple.java
new file mode 100644
index 00000000000..b2b543f017e
--- /dev/null
+++ b/tritonserver/samples/simple.java
@@ -0,0 +1,956 @@
+import java.io.*;
+import java.util.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.BytePointer;
+
+import org.bytedeco.cuda.cudart.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import static org.bytedeco.tensorrt.global.tritonserver.*;
+
+public class Simple {
+
+	static void FAIL(String msg)
+    {
+        System.err.println("Cuda failure: " + msg);
+        System.exit(1);
+    }
+
+	static void FAIL_IF_ERR(TRITONSERVER_Error X, String MSG)
+    {
+        TRITONSERVER_Error err_ = X;
+		if (err__ != null) {
+		    System.err.println("error: " + MSG
+				+ ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
+				+ TRITONSERVER_ErrorMessage(err__));
+			TRITONSERVER_ErrorDelete(err__);    
+            System.exit(1);	
+		}
+    }
+    
+    static void FAIL_IF_CUDA_ERR(cudaError_t X, String MSG)
+	{
+        cudaError_t err__ = X;                                               
+        if (err__ != cudaSuccess) {
+			System.err.println("error: " + MSG
+				+ ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
+				+ cudaGetErrorString(err__));
+			System.exit(1);                                                                 
+        }                                                                      
+    }
+
+    boolean enforce_memory_type = false;
+    TRITONSERVER_MemoryType requested_memory_type;
+
+	final boolean triton_enable_gpu = false;
+	if (triton_enable_gpu)
+    {   
+        public static class cuda_data_deleter extends FunctionPointer {
+        	public void call(Pointer data) {
+				if (data != null) {
+                	cudaPointerAttributes attr;
+                	auto cuerr = cudaPointerGetAttributes(attr, data);
+                	if (cuerr != cudaSuccess) {
+                    	//std::cerr << "error: failed to get CUDA pointer attribute of " << data
+                    	//    << ": " << cudaGetErrorString(cuerr) << std::endl;
+                    	//jack: how to print "Pointer data" here, %what?
+			        	System.err.printf("error: failed to get CUDA pointer attribute of %?: %s\n", data, cudaGetErrorString(cuerr));
+                	}
+                	if (attr.type == cudaMemoryTypeDevice) {
+                    	cuerr = cudaFree(data);
+                	} else if (attr.type == cudaMemoryTypeHost) {
+                    	cuerr = cudaFreeHost(data);
+                	}
+                	if (cuerr != cudaSuccess) {
+                    	//std::cerr << "error: failed to release CUDA pointer " << data << ": "
+                    	//    << cudaGetErrorString(cuerr) << std::endl;
+                    	//jack: how to print "Pointer data" here, %what?
+			        	System.err.printf("error: failed to release CUDA pointer %?: %s\n", data, cudaGetErrorString(cuerr)); ??
+                	}
+            	}
+
+        	}
+    	}
+        
+    }			
+
+    void Usage(String[] args, String msg = String) 
+    {
+        if (!msg.isEmpty()) {
+            System.err.printf("%s\n", msg);
+        }
+
+        System.err.printf("Usage: %s [options]\n", argv[0].get());
+        System.err.printf("\t-m <\"system\"|\"pinned\"|gpu>\n");
+		System.err.printf("Enforce the memory type for input and output tensors.\n");
+		System.err.printf("If not specified, inputs will be in system memory and outputs\n");
+		System.err.printf("will be based on the model's preferred type.\n");
+        System.err.printf("\t-v Enable verbose logging\n");
+        System.err.printf("\t-r [model repository absolute path]\n");
+        System.err.printf("\t-c Enable web camera input.\n");
+
+        System.exit(1);
+    }
+
+	TRITONSERVER_Error ResponseAlloc(TRITONSERVER_ResponseAllocator allocator,
+		char tensor_name, long byte_size, TRITONSERVER_MemoryType preferred_memory_type,
+		long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+		PointerPointer buffer_userp, TRITONSERVER_MemoryType actual_memory_type,
+		long actual_memory_type_id)
+	{
+		// Initially attempt to make the actual memory type and id that we
+		// allocate be the same as preferred memory type
+		actual_memory_type = preferred_memory_type;
+		actual_memory_type_id = preferred_memory_type_id;
+	
+		// If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+		// need to do any other book-keeping.
+		if (byte_size == 0) {
+			buffer = null;
+			buffer_userp = null;
+			System.out.printf("allocated %d %s\n", byte_size, tensor_name);
+		} else {
+			Pointer allocated_ptr = null;
+			if (enforce_memory_type) {
+				actual_memory_type = requested_memory_type;
+			}
+	
+			switch (actual_memory_type) {
+				if (triton_enable_gpu)
+				{
+					case TRITONSERVER_MEMORY_CPU_PINNED: {
+						int err = cudaSetDevice(actual_memory_type_id);
+						if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+							(err != cudaErrorInsufficientDriver)) {
+							return TRITONSERVER_ErrorNew(
+									   TRITONSERVER_ERROR_INTERNAL,
+									   new BytePointer("unable to recover current CUDA device: cudaGetErrorString(err)"));		           
+						}
+	
+						err = cudaHostAlloc(allocated_ptr, byte_size, cudaHostAllocPortable);
+						if (err != cudaSuccess) {
+							return TRITONSERVER_ErrorNew(
+									   TRITONSERVER_ERROR_INTERNAL,
+									   new BytePointer("cudaHostAlloc failed: cudaGetErrorString(err)"));
+						}
+						break;
+					}
+	
+					case TRITONSERVER_MEMORY_GPU: {
+						int err = cudaSetDevice(actual_memory_type_id);
+						if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+							(err != cudaErrorInsufficientDriver)) {
+							return TRITONSERVER_ErrorNew(
+									   TRITONSERVER_ERROR_INTERNAL,
+									   new BytePointer("unable to recover current CUDA device: cudaGetErrorString(err)"));
+						}
+	
+						err = cudaMalloc(allocated_ptr, byte_size);
+						if (err != cudaSuccess) {
+							return TRITONSERVER_ErrorNew(
+								       TRITONSERVER_ERROR_INTERNAL,
+								       new BytePointer("cudaMalloc failed: cudaGetErrorString(err)"));
+						}
+						break;
+					}
+				}   
+					
+			    // Use CPU memory if the requested memory type is unknown
+				// (default case).
+				case TRITONSERVER_MEMORY_CPU:
+				default: {
+					actual_memory_type = TRITONSERVER_MEMORY_CPU;
+					allocated_ptr = new byte[byte_size];
+					break;
+				}
+			}   
+	
+			// Pass the tensor name with buffer_userp so we can show it when
+			// releasing the buffer.
+			if (allocated_ptr != null) {
+				buffer = allocated_ptr;
+				buffer_userp = new String(tensor_name);
+				System.out.printf("allocated %d bytes in %s for result tensor %s\n", byte_size, 
+					TRITONSERVER_MemoryTypeString(actual_memory_type), tensor_name);
+			}
+		}
+	
+		return null;  // Success
+	}	 
+	
+	TRITONSERVER_Error ResponseRelease(TRITONSERVER_ResponseAllocator allocator,
+		Pointer buffer, Pointer buffer_userp, long byte_size, TRITONSERVER_MemoryType memory_type,
+		long memory_type_id)
+	{
+		String name = null;
+		if (buffer_userp != null) {
+			name = (String)(buffer_userp);
+		} else {
+			name = new String("<unknown>");
+		}
+	
+		System.out.printf("Releasing buffer of size %d in %s for result %s\n", byte_size, 
+			TRITONSERVER_MemoryTypeString(memory_type), name);
+		switch (memory_type) {
+			case TRITONSERVER_MEMORY_CPU:
+				//jack: for c++ free, I just use "= null", is this correct?
+				//free(buffer);
+				buffer = null;
+				break;
+			
+		   if (triton_enable_gpu){
+			case TRITONSERVER_MEMORY_CPU_PINNED: {
+				int err = cudaSetDevice(memory_type_id);
+				if (err == cudaSuccess) {
+					err = cudaFreeHost(buffer);
+				}
+				if (err != cudaSuccess) {
+					System.err.printf("error: failed to cudaFree: %s.\n", cudaGetErrorString(err));
+				}
+				break;
+			}
+			case TRITONSERVER_MEMORY_GPU: {
+				int err = cudaSetDevice(memory_type_id);
+				if (err == cudaSuccess) {
+					err = cudaFree(buffer);
+				}
+				if (err != cudaSuccess) {
+					System.err.printf("error: failed to cudaFree: %s.\n", cudaGetErrorString(err));
+				}
+				break;
+			}
+		   }
+		   
+			default:
+				System.err.printf("error: unexpected buffer allocated in CUDA managed memory.\n");
+				break;
+		}
+	
+		name = null;
+	
+		return null;  // Success
+	}
+
+	void 
+	InferRequestComplete(
+		 TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+	{
+		 // We reuse the request so we don't delete it here.
+	}
+		 
+	void
+	InferResponseComplete(
+		TRITONSERVER_InferenceResponse response, long flags, Pointer userp)
+	{
+		 if (response != null) {
+		 // Send 'response' to the future.
+		 //jack: how to do with std::promise? and which java object can do with .set_value?
+			std::promise<TRITONSERVER_InferenceResponse*>* p =
+			reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+			p->set_value(response);
+			p = null;
+		 }
+	}
+
+	void 
+	InferResponseComplete(
+		TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+	{
+		if (response != null) {
+			// Send 'response' to the future.
+			//jack: how to do with std::promise, set_value can be replaced by which java func? for reinterpret_cast, should be replaced by which one?
+			std::promise<TRITONSERVER_InferenceResponse*>* p =
+				reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
+			p->set_value(response);
+			p = null;
+		}
+	}
+		
+	TRITONSERVER_Error
+	ParseModelMetadata(
+		const rapidjson::Document& model_metadata, boolean is_int,
+		boolean is_torch_model)
+	{
+		String seen_data_type;
+		for (const auto& input : model_metadata["inputs"].GetArray()) {
+		    if (strcmp(input["datatype"].GetString(), "INT32") &&
+				strcmp(input["datatype"].GetString(), "FP32")) {
+			    return TRITONSERVER_ErrorNew(
+				  TRITONSERVER_ERROR_UNSUPPORTED,
+				  new BytePointer("simple lib example only supports model with data type INT32 or FP32"));
+			}
+			if (seen_data_type.isEmpty()) {
+			    seen_data_type = input["datatype"].GetString();
+			} else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
+			  return TRITONSERVER_ErrorNew(
+				  TRITONSERVER_ERROR_INVALID_ARG,
+				  new BytePointer("the inputs and outputs of 'simple' model must have the data type"));
+			}
+		}
+		for (const auto& output : model_metadata["outputs"].GetArray()) {
+			if (strcmp(output["datatype"].GetString(), "INT32") &&
+				strcmp(output["datatype"].GetString(), "FP32")) {
+			    return TRITONSERVER_ErrorNew(
+				  TRITONSERVER_ERROR_UNSUPPORTED,
+				  new BytePointer("simple lib example only supports model with data type INT32 or FP32"));
+			} else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
+			    return TRITONSERVER_ErrorNew(
+				  TRITONSERVER_ERROR_INVALID_ARG,
+				  new BytePointer("the inputs and outputs of 'simple' model must have the data type"));
+			}
+		}
+		//jack: check about c_str and strcmp
+		is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
+		is_torch_model =
+			(strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
+		return null;
+	}
+
+	//jack: how to do with template? how to do with resize?
+	template <typename T>
+	void
+	GenerateInputData(
+			std::vector<char>* input0_data, std::vector<char>* input1_data)
+	{
+		 input0_data->resize(16 * sizeof(T));
+		 input1_data->resize(16 * sizeof(T));
+		 for (size_t i = 0; i < 16; ++i) {
+			 ((T*)input0_data->data())[i] = i;
+			 ((T*)input1_data->data())[i] = 1;
+		 }
+	}
+		
+	template <typename T>
+	void
+	CompareResult(
+		String output0_name, String output1_name,
+		Pointer input0, Pointer input1, Pointer output0,
+		Pointer output1)
+	{
+		 for (size_t i = 0; i < 16; ++i) {
+			std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
+					  << ((T*)output0)[i] << std::endl;
+			std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
+					  << ((T*)output1)[i] << std::endl;
+		
+			if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
+			  FAIL("incorrect sum in " + output0_name);
+			}
+			if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
+			  FAIL("incorrect difference in " + output1_name);
+			}
+		 }
+	}
+		
+	void
+	Check(
+		TRITONSERVER_InferenceResponse response,
+		char[] input0_data, char[] input1_data,
+		String output0, String output1,
+		long expected_byte_size,
+		TRITONSERVER_DataType expected_datatype, boolean is_int)
+	{
+	//jack: how to do with unordered_map? 
+	    std::unordered_map<std::string, std::vector<char>> output_data;
+		
+		long output_count;
+		FAIL_IF_ERR(
+			TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+			"getting number of response outputs");
+		if (output_count != 2) {
+			FAIL("expecting 2 response outputs, got " + String(output_count));
+		}
+		
+		for (long idx = 0; idx < output_count; ++idx) {
+			BytePointer cname;
+			TRITONSERVER_DataType datatype;
+		//jack: is there PointerLong? int64 should be long, right?
+			const int64_t* shape;
+			long dim_count;
+			Pointer base;
+			long byte_size;
+			TRITONSERVER_MemoryType memory_type;
+			long memory_type_id;
+			Pointer userp;
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseOutput(
+					response, idx, cname, datatype, shape, dim_count, base,
+					byte_size, memory_type, memory_type_id, userp),
+				"getting output info");
+		
+			if (cname == null) {
+			  FAIL("unable to get output name");
+			}
+		
+			String name(cname);
+			if ((name != output0) && (name != output1)) {
+			  FAIL("unexpected output '" + name + "'");
+			}
+		//jack: when the above shape issue fixed, will change this to some position stuff 
+			if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
+			  FAIL("unexpected shape for '" + name + "'");
+			}
+		
+			if (datatype != expected_datatype) {
+			  FAIL(
+				  "unexpected datatype '" +
+				  String(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
+				  name + "'");
+			}
+		
+			if (byte_size != expected_byte_size) {
+			  FAIL(
+				  "unexpected byte-size, expected " +
+				  String(expected_byte_size) + ", got " +
+				  String(byte_size) + " for " + name);
+			}
+		
+			if (enforce_memory_type && (memory_type != requested_memory_type)) {
+			  FAIL(
+				  "unexpected memory type, expected to be allocated in " +
+				  String(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
+				  ", got " + String(TRITONSERVER_MemoryTypeString(memory_type)) +
+				  ", id " + String(memory_type_id) + " for " + name);
+			}
+		
+			// We make a copy of the data here... which we could avoid for
+			// performance reasons but ok for this simple example.
+			//jack: change this when unordered_map is fixed
+			char[] odata = output_data[name];
+			//jack: how to do with std::vector func of assign?
+			switch (memory_type) {
+			  case TRITONSERVER_MEMORY_CPU: {
+				std::cout << name << " is stored in system memory" << std::endl;
+				const char* cbase = reinterpret_cast<const char*>(base);
+				odata.assign(cbase, cbase + byte_size);
+				break;
+			  }
+		
+			  case TRITONSERVER_MEMORY_CPU_PINNED: {
+				std::cout << name << " is stored in pinned memory" << std::endl;
+				const char* cbase = reinterpret_cast<const char*>(base);
+				odata.assign(cbase, cbase + byte_size);
+				break;
+			  }
+		
+		if (triton_enable_gpu)
+		{
+			  case TRITONSERVER_MEMORY_GPU: {
+				std::cout << name << " is stored in GPU memory" << std::endl;
+				odata.reserve(byte_size);
+				FAIL_IF_CUDA_ERR(
+					cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
+					"getting " + name + " data from GPU memory");
+				break;
+			  }
+		}
+		
+			  default:
+				FAIL("unexpected memory type");
+			}
+		  }
+		
+		  if (is_int) {
+			CompareResult<int32_t>(
+				output0, output1, &input0_data[0], &input1_data[0],
+				output_data[output0].data(), output_data[output1].data());
+		  } else {
+			CompareResult<float>(
+				output0, output1, &input0_data[0], &input1_data[0],
+				output_data[output0].data(), output_data[output1].data());
+		  }
+	}
+		
+	
+	public static void main(String[] args)
+	{
+		String model_repository_path;
+		int verbose_level = 0;
+		
+		// Parse commandline...
+		//jack: how to do arg check in java, any reference?
+		int opt;
+		  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
+			switch (opt) {
+			  case 'm': {
+				enforce_memory_type = true;
+				if (!strcmp(optarg, "system")) {
+				  requested_memory_type = TRITONSERVER_MEMORY_CPU;
+				} else if (!strcmp(optarg, "pinned")) {
+				  requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+				} else if (!strcmp(optarg, "gpu")) {
+				  requested_memory_type = TRITONSERVER_MEMORY_GPU;
+				} else {
+				  Usage(
+					  argv,
+					  "-m must be used to specify one of the following types:"
+					  " <\"system\"|\"pinned\"|gpu>");
+				}
+				break;
+			  }
+			  case 'r':
+				model_repository_path = optarg;
+				break;
+			  case 'v':
+				verbose_level = 1;
+				break;
+			  case '?':
+				Usage(argv);
+				break;
+			}
+		  }
+		
+		  if (model_repository_path.isEmpty()) {
+			Usage(argv, "-r must be used to specify model repository path");
+		  }
+		if (triton_enable_gpu)
+		{
+		  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+			Usage(argv, "-m can only be set to \"system\" without enabling GPU");
+		  }
+		}
+				
+		  // Check API version.
+	    long api_version_major, api_version_minor;
+		FAIL_IF_ERR(
+			TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+			"getting Triton API version");
+		if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
+			(TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
+			FAIL("triton server API version mismatch");
+		}
+		
+		// Create the server...
+		TRITONSERVER_ServerOptions server_options = null;
+		FAIL_IF_ERR(
+		    TRITONSERVER_ServerOptionsNew(server_options),
+			"creating server options");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+				  server_options, model_repository_path.c_str()),
+			"setting model repository path");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+			"setting verbose logging level");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsSetBackendDirectory(
+				server_options, "/opt/tritonserver/backends"),
+			"setting backend directory");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+				server_options, "/opt/tritonserver/repoagents"),
+			"setting repository agent directory");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+			"setting strict model configuration");
+		if (triton_enable_gpu)
+		{
+		  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+		}
+		else
+		{
+		  double min_compute_capability = 0;
+		}	
+		  FAIL_IF_ERR(
+			  TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+				  server_options, min_compute_capability),
+			  "setting minimum supported CUDA compute capability");
+		
+		TRITONSERVER_Server server_ptr = null;
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+		FAIL_IF_ERR(
+			TRITONSERVER_ServerOptionsDelete(server_options),
+			"deleting server options");
+		//jack: how to do with shared_ptr here?
+		std::shared_ptr<TRITONSERVER_Server> server(
+			server_ptr, TRITONSERVER_ServerDelete);
+		
+		// Wait until the server is both live and ready.
+		long health_iters = 0;
+		while (true) {
+		    boolean live, ready;
+			FAIL_IF_ERR(
+			//jack: how to do with get func of shared_ptr?
+				TRITONSERVER_ServerIsLive(server.get(), &live),
+				"unable to get server liveness");
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerIsReady(server.get(), &ready),
+				"unable to get server readiness");
+			System.out.println("Server Health: live" + ", ready");
+			
+			if (live && ready) {
+			  break;
+			}
+		
+			if (++health_iters >= 10) {
+			  FAIL("failed to find healthy inference server");
+			}
+		
+			Thread.sleep(500);
+		}
+		
+		// Print status of the server.
+		{
+		    TRITONSERVER_Message server_metadata_message;
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerMetadata(server.get(), server_metadata_message),
+				"unable to get server metadata message");
+			Pointer buffer;
+			long byte_size;
+			FAIL_IF_ERR(
+				TRITONSERVER_MessageSerializeToJson(
+					server_metadata_message, buffer, byte_size),
+				"unable to serialize server metadata message");
+		
+			System.out.println("Server Status: ");
+			System.out.println(String(buffer, byte_size));
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_MessageDelete(server_metadata_message),
+				"deleting status metadata");
+		  }
+		  //jack: is this right??
+		  String model_name = "simple";
+		
+		  // Wait for the model to become available.
+		  boolean is_torch_model = false;
+		  boolean is_int = true;
+		  boolean is_ready = false;
+		  health_iters = 0;
+		  while (!is_ready) {
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerModelIsReady(
+					server.get(), model_name.c_str(), 1, &is_ready),
+				"unable to get model readiness");
+			if (!is_ready) {
+			  if (++health_iters >= 10) {
+				FAIL("model failed to be ready in 10 iterations");
+			  }
+			  Thread.sleep(500);
+			  continue;
+			}
+		
+			TRITONSERVER_Message model_metadata_message;
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerModelMetadata(
+					server.get(), model_name.c_str(), 1, model_metadata_message),
+				"unable to get model metadata message");
+			Pointer buffer;
+			long byte_size;
+			FAIL_IF_ERR(
+				TRITONSERVER_MessageSerializeToJson(
+					model_metadata_message, buffer, byte_size),
+				"unable to serialize model status protobuf");
+		
+			rapidjson::Document model_metadata;
+			model_metadata.Parse(buffer, byte_size);
+			if (model_metadata.HasParseError()) {
+			  FAIL(
+				  "error: failed to parse model metadata from JSON: " +
+				  String(GetParseError_En(model_metadata.GetParseError())) +
+				  " at " + String(model_metadata.GetErrorOffset()));
+			}
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_MessageDelete(model_metadata_message),
+				"deleting status protobuf");
+		    //jack: how to do with strcmp?
+			if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
+			  FAIL("unable to find metadata for model");
+			}
+		
+			boolean found_version = false;
+			if (model_metadata.HasMember("versions")) {
+			  //jack: how to set type for auto here?
+			  for (const auto& version : model_metadata["versions"].GetArray()) {
+				if (strcmp(version.GetString(), "1") == 0) {
+				  found_version = true;
+				  break;
+				}
+			  }
+			}
+			if (!found_version) {
+			  FAIL("unable to find version 1 status for model");
+			}
+		
+			FAIL_IF_ERR(
+				ParseModelMetadata(model_metadata, is_int, is_torch_model),
+				"parsing model metadata");
+		  }
+		
+		  // Create the allocator that will be used to allocate buffers for
+		  // the result tensors.
+		  TRITONSERVER_ResponseAllocator allocator = null;
+		  FAIL_IF_ERR(
+			  TRITONSERVER_ResponseAllocatorNew(
+				  allocator, ResponseAlloc, ResponseRelease, null /* start_fn */),
+			  "creating response allocator");
+		
+		  // Inference
+		  TRITONSERVER_InferenceRequest irequest = null;
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestNew(
+				  irequest, server.get(), model_name.c_str(), -1 /* model_version */),
+			  "creating inference request");
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+			  "setting ID for the request");
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestSetReleaseCallback(
+				  irequest, InferRequestComplete, null /* request_release_userp */),
+			  "setting request release callback");
+		
+		  // Inputs
+		  //jack: dont know how to do with this
+		  auto input0 = is_torch_model ? "INPUT__0" : "INPUT0";
+		  auto input1 = is_torch_model ? "INPUT__1" : "INPUT1";
+		  //jack: how to do this with long []?
+		  std::vector<int64_t> input0_shape({1, 16});
+		  std::vector<int64_t> input1_shape({1, 16});
+		
+		  TRITONSERVER_DataType datatype =
+			  (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAddInput(
+				  irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
+			  "setting input 0 meta-data for the request");
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAddInput(
+				  irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
+			  "setting input 1 meta-data for the request");
+		  //jack: how to set this auto?
+		  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
+		  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+			  "requesting output 0 for the request");
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+			  "requesting output 1 for the request");
+		
+		  // Create the data for the two input tensors. Initialize the first
+		  // to unique values and the second to all ones.
+		  std::vector<char> input0_data;
+		  std::vector<char> input1_data;
+		  char[] input0_data;
+		  char[] input1_data;
+		  //jack: will do this if template is fixed
+		  if (is_int) {
+			GenerateInputData<int32_t>(&input0_data, &input1_data);
+		  } else {
+			GenerateInputData<float>(&input0_data, &input1_data);
+		  }
+		  //jack: how to do size of char[]?
+		  size_t input0_size = input0_data.size();
+		  size_t input1_size = input1_data.size();
+		
+		  const void* input0_base = &input0_data[0];
+		  const void* input1_base = &input1_data[0];
+		
+		if (triton_enable_gpu)
+		{
+		  //jack: how to do with this?
+		  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
+			  nullptr, cuda_data_deleter);
+		  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
+			  nullptr, cuda_data_deleter);
+		  boolean use_cuda_memory =
+			  (enforce_memory_type &&
+			   (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+		  if (use_cuda_memory) {
+			FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+			if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+			  Pointer dst;
+			  FAIL_IF_CUDA_ERR(
+				  cudaMalloc(dst, input0_size),
+				  "allocating GPU memory for INPUT0 data");
+			  input0_gpu.reset(dst);
+			  FAIL_IF_CUDA_ERR(
+				  cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
+				  "setting INPUT0 data in GPU memory");
+			  FAIL_IF_CUDA_ERR(
+				  cudaMalloc(&dst, input1_size),
+				  "allocating GPU memory for INPUT1 data");
+			  input1_gpu.reset(dst);
+			  FAIL_IF_CUDA_ERR(
+				  cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
+				  "setting INPUT1 data in GPU memory");
+			} else {
+			  Pointer dst;
+			  FAIL_IF_CUDA_ERR(
+				  cudaHostAlloc(dst, input0_size, cudaHostAllocPortable),
+				  "allocating pinned memory for INPUT0 data");
+			  input0_gpu.reset(dst);
+			  FAIL_IF_CUDA_ERR(
+				  cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
+				  "setting INPUT0 data in pinned memory");
+			  FAIL_IF_CUDA_ERR(
+				  cudaHostAlloc(dst, input1_size, cudaHostAllocPortable),
+				  "allocating pinned memory for INPUT1 data");
+			  input1_gpu.reset(dst);
+			  FAIL_IF_CUDA_ERR(
+				  cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
+				  "setting INPUT1 data in pinned memory");
+			}
+		  }
+		
+		  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
+		  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
+		}
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAppendInputData(
+				  irequest, input0, input0_base, input0_size, requested_memory_type,
+				  0 /* memory_type_id */),
+			  "assigning INPUT0 data");
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestAppendInputData(
+				  irequest, input1, input1_base, input1_size, requested_memory_type,
+				  0 /* memory_type_id */),
+			  "assigning INPUT1 data");
+		
+		  // Perform inference...
+		  {
+		    //jack: how to do with std::promise
+			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+			//jack: how to do with std::future
+			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceRequestSetResponseCallback(
+					irequest, allocator, null /* response_allocator_userp */,
+					InferResponseComplete, reinterpret_cast<void*>(p)),
+				"setting response callback");
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerInferAsync(
+					server.get(), irequest, null /* trace */),
+				"running inference");
+		
+			// Wait for the inference to complete.
+			TRITONSERVER_InferenceResponse completed_response = completed.get();
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseError(completed_response),
+				"response status");
+		
+			Check(
+				completed_response, input0_data, input1_data, output0, output1,
+				input0_size, datatype, is_int);
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseDelete(completed_response),
+				"deleting inference response");
+		  }
+		
+		  // Modify some input data in place and then reuse the request
+		  // object. For simplicity we only do this when the input tensors are
+		  // in non-pinned system memory.
+		  if (!enforce_memory_type ||
+			  (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+			if (is_int) {
+			//jack: how to do with reinterpret_cast?
+			  int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
+			  input0_base[0] = 27;
+			} else {
+			  float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
+			  input0_base[0] = 27.0;
+			}
+		    //jack: promise and future
+			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+		
+			// Using a new promise so have to re-register the callback to set
+			// the promise as the userp.
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceRequestSetResponseCallback(
+					irequest, allocator, null /* response_allocator_userp */,
+					InferResponseComplete, reinterpret_cast<void*>(p)),
+				"setting response callback");
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerInferAsync(
+					server.get(), irequest, null /* trace */),
+				"running inference");
+		
+			// Wait for the inference to complete.
+			TRITONSERVER_InferenceResponse completed_response = completed.get();
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseError(completed_response),
+				"response status");
+		
+			Check(
+				completed_response, input0_data, input1_data, output0, output1,
+				input0_size, datatype, is_int);
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseDelete(completed_response),
+				"deleting inference response");
+		  }
+		
+		  // Remove input data and then add back different data.
+		  {
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+				"removing INPUT0 data");
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceRequestAppendInputData(
+					irequest, input0, input1_base, input1_size, requested_memory_type,
+					0 /* memory_type_id */),
+				"assigning INPUT1 data to INPUT0");
+		
+			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
+			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
+		
+			// Using a new promise so have to re-register the callback to set
+			// the promise as the userp.
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceRequestSetResponseCallback(
+					irequest, allocator, null /* response_allocator_userp */,
+					InferResponseComplete, reinterpret_cast<void*>(p)),
+				"setting response callback");
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_ServerInferAsync(
+					server.get(), irequest, null /* trace */),
+				"running inference");
+		
+			// Wait for the inference to complete.
+			TRITONSERVER_InferenceResponse completed_response = completed.get();
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseError(completed_response),
+				"response status");
+		
+			// Both inputs are using input1_data...
+			Check(
+				completed_response, input1_data, input1_data, output0, output1,
+				input0_size, datatype, is_int);
+		
+			FAIL_IF_ERR(
+				TRITONSERVER_InferenceResponseDelete(completed_response),
+				"deleting inference response");
+		  }
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_InferenceRequestDelete(irequest),
+			  "deleting inference request");
+		
+		  FAIL_IF_ERR(
+			  TRITONSERVER_ResponseAllocatorDelete(allocator),
+			  "deleting response allocator");
+		
+		  System.exit(0);
+	}
+    
+
+
+	
+}	
+
+

From 93f737adde569045466110f528bd2ef1bd2b4dad Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Wed, 13 Oct 2021 12:20:13 +0000
Subject: [PATCH 07/21] Add initial version of tritonserver/samples/Simple.java

---
 tritonserver/platform/pom.xml                 |   7 +-
 tritonserver/platform/redist/pom.xml          |   2 +-
 tritonserver/pom.xml                          |   2 +-
 tritonserver/samples/Simple.java              | 970 ++++++++++++++++++
 tritonserver/samples/pom.xml                  |  26 +
 tritonserver/samples/simple.cc                | 928 -----------------
 tritonserver/samples/simple.java              | 956 -----------------
 .../tritonserver/global/tritonserver.java     | 592 +++++------
 ...ITONSERVER_InferenceTraceActivityFn_t.java |   2 +-
 ...ITONSERVER_ResponseAllocatorAllocFn_t.java |   2 +-
 ...ONSERVER_ResponseAllocatorReleaseFn_t.java |   2 +-
 .../tritonserver/presets/tritonserver.java    |  24 +-
 12 files changed, 1287 insertions(+), 2226 deletions(-)
 create mode 100644 tritonserver/samples/Simple.java
 create mode 100644 tritonserver/samples/pom.xml
 delete mode 100644 tritonserver/samples/simple.cc
 delete mode 100644 tritonserver/samples/simple.java

diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
index 9ef9770c490..329ed1da3b5 100644
--- a/tritonserver/platform/pom.xml
+++ b/tritonserver/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform</artifactId>
-  <version>8.0-${project.parent.version}</version>
+  <version>2.12-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for Tritonserver</name>
 
   <properties>
@@ -25,6 +25,11 @@
       <artifactId>cuda-platform</artifactId>
       <version>11.4-8.2-${project.parent.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tensorrt-platform</artifactId>
+      <version>8.0-${project.parent.version}</version>
+    </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>${javacpp.moduleId}</artifactId>
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
index cf514cb4214..81064b97268 100644
--- a/tritonserver/platform/redist/pom.xml
+++ b/tritonserver/platform/redist/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform-redist</artifactId>
-  <version>8.0-${project.parent.version}</version>
+  <version>2.12-${project.parent.version}</version>
   <name>JavaCPP Presets Platform Redist for Tritonserver</name>
 
   <properties>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index e051310dcf2..92fa782a462 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -24,7 +24,7 @@
       <groupId>org.bytedeco</groupId>
       <artifactId>tensorrt</artifactId>
       <version>8.0-${project.parent.version}</version>
-      </dependency>
+    </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>javacpp</artifactId>
diff --git a/tritonserver/samples/Simple.java b/tritonserver/samples/Simple.java
new file mode 100644
index 00000000000..f3e854228a7
--- /dev/null
+++ b/tritonserver/samples/Simple.java
@@ -0,0 +1,970 @@
+// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+import com.google.gson.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+public class Simple {
+    static final double TRITON_MIN_COMPUTE_CAPABILITY = 6.0;
+
+    static void FAIL(String MSG) {
+        System.err.println("Cuda failure: " + MSG);
+        System.exit(1);
+    }
+
+    static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG) {
+        if (err__ != null) {
+            System.err.println("error: " + MSG + ":"
+                             + TRITONSERVER_ErrorCodeString(err__) + " - "
+                             + TRITONSERVER_ErrorMessage(err__));
+            TRITONSERVER_ErrorDelete(err__);
+            System.exit(1);
+        }
+    }
+
+    static void FAIL_IF_CUDA_ERR(int err__, String MSG) {
+        if (err__ != cudaSuccess) {
+            System.err.println("error: " + MSG + ": " + cudaGetErrorString(err__));
+            System.exit(1);
+        }
+    }
+
+    static boolean enforce_memory_type = false;
+    static int requested_memory_type;
+
+    static class CudaDataDeleter extends Pointer {
+        public CudaDataDeleter() { super((Pointer)null); }
+        public void reset(Pointer p) {
+            this.address = p.address();
+            this.deallocator(new FreeDeallocator(this));
+        }
+        protected static class FreeDeallocator extends Pointer implements Deallocator {
+            FreeDeallocator(Pointer p) { super(p); }
+            @Override public void deallocate() {
+                if (!isNull()) {
+                  cudaPointerAttributes attr = new cudaPointerAttributes(null);
+                  int cuerr = cudaPointerGetAttributes(attr, this);
+                  if (cuerr != cudaSuccess) {
+                    System.err.println("error: failed to get CUDA pointer attribute of " + this
+                                     + ": " + cudaGetErrorString(cuerr).getString());
+                  }
+                  if (attr.type() == cudaMemoryTypeDevice) {
+                    cuerr = cudaFree(this);
+                  } else if (attr.type() == cudaMemoryTypeHost) {
+                    cuerr = cudaFreeHost(this);
+                  }
+                  if (cuerr != cudaSuccess) {
+                    System.err.println("error: failed to release CUDA pointer " + this
+                                     + ": " + cudaGetErrorString(cuerr).getString());
+                  }
+                }
+            }
+        }
+    }
+
+    static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
+        public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p) { super(p); deallocator(new DeleteDeallocator(this)); }
+        protected static class DeleteDeallocator extends TRITONSERVER_Server implements Deallocator {
+            DeleteDeallocator(Pointer p) { super(p); }
+            @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
+        }
+    }
+
+    static void
+    Usage(String msg)
+    {
+      if (msg != null) {
+        System.err.println(msg);
+      }
+
+      System.err.println("Usage: java " + Simple.class.getSimpleName() + " [options]");
+      System.err.println("\t-m <\"system\"|\"pinned\"|gpu>"
+                       + " Enforce the memory type for input and output tensors."
+                       + " If not specified, inputs will be in system memory and outputs"
+                       + " will be based on the model's preferred type.");
+      System.err.println("\t-v Enable verbose logging");
+      System.err.println("\t-r [model repository absolute path]");
+
+      System.exit(1);
+    }
+
+    static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
+        @Override public TRITONSERVER_Error call (
+            TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+            long byte_size, int preferred_memory_type,
+            long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+            PointerPointer buffer_userp, IntPointer actual_memory_type,
+            LongPointer actual_memory_type_id)
+        {
+          // Initially attempt to make the actual memory type and id that we
+          // allocate be the same as preferred memory type
+          actual_memory_type.put(0, preferred_memory_type);
+          actual_memory_type_id.put(0, preferred_memory_type_id);
+
+          // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+          // need to do any other book-keeping.
+          if (byte_size == 0) {
+            buffer.put(0, null);
+            buffer_userp.put(0, null);
+            System.out.println("allocated " + byte_size + " bytes for result tensor " + tensor_name);
+          } else {
+            Pointer allocated_ptr = new Pointer();
+            if (enforce_memory_type) {
+              actual_memory_type.put(0, requested_memory_type);
+            }
+
+            switch (actual_memory_type.get()) {
+              case TRITONSERVER_MEMORY_CPU_PINNED: {
+                int err = cudaSetDevice((int)actual_memory_type_id.get());
+                if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+                    (err != cudaErrorInsufficientDriver)) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unable to recover current CUDA device: " +
+                          cudaGetErrorString(err).getString());
+                }
+
+                err = cudaHostAlloc(allocated_ptr, byte_size, cudaHostAllocPortable);
+                if (err != cudaSuccess) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "cudaHostAlloc failed: " +
+                          cudaGetErrorString(err).getString());
+                }
+                break;
+              }
+
+              case TRITONSERVER_MEMORY_GPU: {
+                int err = cudaSetDevice((int)actual_memory_type_id.get());
+                if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+                    (err != cudaErrorInsufficientDriver)) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unable to recover current CUDA device: " +
+                          cudaGetErrorString(err).getString());
+                }
+
+                err = cudaMalloc(allocated_ptr, byte_size);
+                if (err != cudaSuccess) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "cudaMalloc failed: " + cudaGetErrorString(err).getString());
+                }
+                break;
+              }
+
+              // Use CPU memory if the requested memory type is unknown
+              // (default case).
+              case TRITONSERVER_MEMORY_CPU:
+              default: {
+                actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
+                allocated_ptr = Pointer.malloc(byte_size);
+                break;
+              }
+            }
+
+            // Pass the tensor name with buffer_userp so we can show it when
+            // releasing the buffer.
+            if (!allocated_ptr.isNull()) {
+              buffer.put(0, allocated_ptr);
+              buffer_userp.put(0, new BytePointer(tensor_name));
+              System.out.println("allocated " + byte_size + " bytes in "
+                               + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
+                               + " for result tensor " + tensor_name);
+            }
+          }
+
+          return null;  // Success
+        }
+    }
+
+    static class ResponseRelease extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
+        @Override public TRITONSERVER_Error call (
+            TRITONSERVER_ResponseAllocator allocator, Pointer buffer, Pointer buffer_userp,
+            long byte_size, int memory_type, long memory_type_id)
+        {
+          BytePointer name = null;
+          if (buffer_userp != null) {
+            name = new BytePointer(buffer_userp);
+          } else {
+            name = new BytePointer("<unknown>");
+          }
+
+          System.out.println("Releasing buffer " + buffer + " of size " + byte_size
+                           + " in " + TRITONSERVER_MemoryTypeString(memory_type)
+                           + " for result '" + name.getString() + "'");
+          switch (memory_type) {
+            case TRITONSERVER_MEMORY_CPU:
+              Pointer.free(buffer);
+              break;
+            case TRITONSERVER_MEMORY_CPU_PINNED: {
+              int err = cudaSetDevice((int)memory_type_id);
+              if (err == cudaSuccess) {
+                err = cudaFreeHost(buffer);
+              }
+              if (err != cudaSuccess) {
+                System.err.println("error: failed to cudaFree " + buffer + ": "
+                                 + cudaGetErrorString(err));
+              }
+              break;
+            }
+            case TRITONSERVER_MEMORY_GPU: {
+              int err = cudaSetDevice((int)memory_type_id);
+              if (err == cudaSuccess) {
+                err = cudaFree(buffer);
+              }
+              if (err != cudaSuccess) {
+                System.err.println("error: failed to cudaFree " + buffer + ": "
+                                 + cudaGetErrorString(err));
+              }
+              break;
+            }
+            default:
+              System.err.println("error: unexpected buffer allocated in CUDA managed memory");
+              break;
+          }
+
+          name.deallocate();
+
+          return null;  // Success
+        }
+    }
+
+    static class InferRequestComplete extends TRITONSERVER_InferenceRequestReleaseFn_t {
+        @Override public void call (
+            TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+        {
+          // We reuse the request so we don't delete it here.
+        }
+    }
+
+    static class InferResponseComplete extends TRITONSERVER_InferenceResponseCompleteFn_t {
+        @Override public void call (
+            TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+        {
+          if (response != null) {
+            // Send 'response' to the future.
+            futures.get(userp).complete(response);
+          }
+        }
+    }
+
+    static ConcurrentHashMap<Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures = new ConcurrentHashMap<>();
+    static ResponseAlloc responseAlloc = new ResponseAlloc();
+    static ResponseRelease responseRelease = new ResponseRelease();
+    static InferRequestComplete inferRequestComplete = new InferRequestComplete();
+    static InferResponseComplete inferResponseComplete = new InferResponseComplete();
+
+    static TRITONSERVER_Error
+    ParseModelMetadata(
+        JsonObject model_metadata, boolean[] is_int,
+        boolean[] is_torch_model)
+    {
+      String seen_data_type = null;
+      for (JsonElement input_element : model_metadata.get("inputs").getAsJsonArray()) {
+        JsonObject input = input_element.getAsJsonObject();
+        if (!input.get("datatype").getAsString().equals("INT32") &&
+            !input.get("datatype").getAsString().equals("FP32")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "simple lib example only supports model with data type INT32 or " +
+              "FP32");
+        }
+        if (seen_data_type == null) {
+          seen_data_type = input.get("datatype").getAsString();
+        } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "the inputs and outputs of 'simple' model must have the data type");
+        }
+      }
+      for (JsonElement output_element : model_metadata.get("outputs").getAsJsonArray()) {
+        JsonObject output = output_element.getAsJsonObject();
+        if (!output.get("datatype").getAsString().equals("INT32") &&
+            !output.get("datatype").getAsString().equals("FP32")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "simple lib example only supports model with data type INT32 or " +
+              "FP32");
+        } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "the inputs and outputs of 'simple' model must have the data type");
+        }
+      }
+
+      is_int[0] = seen_data_type.equals("INT32");
+      is_torch_model[0] =
+          model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
+      return null;
+    }
+
+    static void
+    GenerateInputData(
+        IntPointer[] input0_data, IntPointer[] input1_data)
+    {
+      input0_data[0] = new IntPointer(16);
+      input1_data[0] = new IntPointer(16);
+      for (int i = 0; i < 16; ++i) {
+        input0_data[0].put(i, i);
+        input1_data[0].put(i, 1);
+      }
+    }
+
+    static void
+    GenerateInputData(
+        FloatPointer[] input0_data, FloatPointer[] input1_data)
+    {
+      input0_data[0] = new FloatPointer(16);
+      input1_data[0] = new FloatPointer(16);
+      for (int i = 0; i < 16; ++i) {
+        input0_data[0].put(i, i);
+        input1_data[0].put(i, 1);
+      }
+    }
+
+    static void
+    CompareResult(
+        String output0_name, String output1_name,
+        IntPointer input0, IntPointer input1, IntPointer output0,
+        IntPointer output1)
+    {
+      for (int i = 0; i < 16; ++i) {
+        System.out.println(input0.get(i) + " + " + input1.get(i) + " = "
+                         + output0.get(i));
+        System.out.println(input0.get(i) + " - " + input1.get(i) + " = "
+                         + output1.get(i));
+
+        if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+          FAIL("incorrect sum in " + output0_name);
+        }
+        if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+          FAIL("incorrect difference in " + output1_name);
+        }
+      }
+    }
+
+    static void
+    CompareResult(
+        String output0_name, String output1_name,
+        FloatPointer input0, FloatPointer input1, FloatPointer output0,
+        FloatPointer output1)
+    {
+      for (int i = 0; i < 16; ++i) {
+        System.out.println(input0.get(i) + " + " + input1.get(i) + " = "
+                         + output0.get(i));
+        System.out.println(input0.get(i) + " - " + input1.get(i) + " = "
+                         + output1.get(i));
+
+        if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+          FAIL("incorrect sum in " + output0_name);
+        }
+        if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+          FAIL("incorrect difference in " + output1_name);
+        }
+      }
+    }
+
+    static void
+    Check(
+        TRITONSERVER_InferenceResponse response,
+        Pointer input0_data, Pointer input1_data,
+        String output0, String output1,
+        long expected_byte_size,
+        int expected_datatype, boolean is_int)
+    {
+      HashMap<String, Pointer> output_data = new HashMap<>();
+
+      int[] output_count = {0};
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+          "getting number of response outputs");
+      if (output_count[0] != 2) {
+        FAIL("expecting 2 response outputs, got " + output_count[0]);
+      }
+
+      for (int idx = 0; idx < output_count[0]; ++idx) {
+        BytePointer cname = new BytePointer((Pointer)null);
+        IntPointer datatype = new IntPointer(1);
+        LongPointer shape = new LongPointer((Pointer)null);
+        LongPointer dim_count = new LongPointer(1);
+        Pointer base = new Pointer();
+        SizeTPointer byte_size = new SizeTPointer(1);
+        IntPointer memory_type = new IntPointer(1);
+        LongPointer memory_type_id = new LongPointer(1);
+        Pointer userp = new Pointer();
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseOutput(
+                response, idx, cname, datatype, shape, dim_count, base,
+                byte_size, memory_type, memory_type_id, userp),
+            "getting output info");
+
+        if (cname.isNull()) {
+          FAIL("unable to get output name");
+        }
+
+        String name = cname.getString();
+        if ((!name.equals(output0)) && (!name.equals(output1))) {
+          FAIL("unexpected output '" + name + "'");
+        }
+
+        if ((dim_count.get() != 2) || (shape.get(0) != 1) || (shape.get(1) != 16)) {
+          FAIL("unexpected shape for '" + name + "'");
+        }
+
+        if (datatype.get() != expected_datatype) {
+          FAIL(
+              "unexpected datatype '" +
+              TRITONSERVER_DataTypeString(datatype.get()) + "' for '" +
+              name + "'");
+        }
+
+        if (byte_size.get() != expected_byte_size) {
+          FAIL(
+              "unexpected byte-size, expected " +
+              expected_byte_size + ", got " +
+              byte_size.get() + " for " + name);
+        }
+
+        if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
+          FAIL(
+              "unexpected memory type, expected to be allocated in " +
+              TRITONSERVER_MemoryTypeString(requested_memory_type) +
+              ", got " + TRITONSERVER_MemoryTypeString(memory_type.get()) +
+              ", id " + memory_type_id.get() + " for " + name);
+        }
+
+        // We make a copy of the data here... which we could avoid for
+        // performance reasons but ok for this simple example.
+        BytePointer odata = new BytePointer(byte_size);
+        output_data.put(name, odata);
+        switch (memory_type.get()) {
+          case TRITONSERVER_MEMORY_CPU: {
+            System.out.println(name + " is stored in system memory");
+            odata.put(base.limit(byte_size.get()));
+            break;
+          }
+
+          case TRITONSERVER_MEMORY_CPU_PINNED: {
+            System.out.println(name + " is stored in pinned memory");
+            odata.put(base.limit(byte_size.get()));
+            break;
+          }
+
+          case TRITONSERVER_MEMORY_GPU: {
+            System.out.println(name + " is stored in GPU memory");
+            FAIL_IF_CUDA_ERR(
+                cudaMemcpy(odata, base, byte_size.get(), cudaMemcpyDeviceToHost),
+                "getting " + name + " data from GPU memory");
+            break;
+          }
+
+          default:
+            FAIL("unexpected memory type");
+        }
+      }
+
+      if (is_int) {
+        CompareResult(
+            output0, output1, new IntPointer(input0_data), new IntPointer(input1_data),
+            new IntPointer(output_data.get(output0)), new IntPointer(output_data.get(output1)));
+      } else {
+        CompareResult(
+            output0, output1, new FloatPointer(input0_data), new FloatPointer(input1_data),
+            new FloatPointer(output_data.get(output0)), new FloatPointer(output_data.get(output1)));
+      }
+    }
+
+    public static void
+    main(String[] args) throws Exception
+    {
+      String model_repository_path = null;
+      int verbose_level = 0;
+
+      // Parse commandline...
+      for (int i = 0; i < args.length; i++) {
+        switch (args[i]) {
+          case "-m": {
+            enforce_memory_type = true;
+            i++;
+            if (args[i].equals("system")) {
+              requested_memory_type = TRITONSERVER_MEMORY_CPU;
+            } else if (args[i].equals("pinned")) {
+              requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+            } else if (args[i].equals("gpu")) {
+              requested_memory_type = TRITONSERVER_MEMORY_GPU;
+            } else {
+              Usage(
+                  "-m must be used to specify one of the following types:" +
+                  " <\"system\"|\"pinned\"|gpu>");
+            }
+            break;
+          }
+          case "-r":
+            model_repository_path = args[++i];
+            break;
+          case "-v":
+            verbose_level = 1;
+            break;
+          case "-?":
+            Usage(null);
+            break;
+        }
+      }
+
+      if (model_repository_path == null) {
+        Usage("-r must be used to specify model repository path");
+      }
+      if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+        Usage("-m can only be set to \"system\" without enabling GPU");
+      }
+
+      // Check API version.
+      int[] api_version_major = {0}, api_version_minor = {0};
+      FAIL_IF_ERR(
+          TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+          "getting Triton API version");
+      if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0]) ||
+          (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
+        FAIL("triton server API version mismatch");
+      }
+
+      // Create the server...
+      TRITONSERVER_ServerOptions server_options = new TRITONSERVER_ServerOptions(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsNew(server_options),
+          "creating server options");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+              server_options, model_repository_path),
+          "setting model repository path");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+          "setting verbose logging level");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetBackendDirectory(
+              server_options, "/opt/tritonserver/backends"),
+          "setting backend directory");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+              server_options, "/opt/tritonserver/repoagents"),
+          "setting repository agent directory");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+          "setting strict model configuration");
+      double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+              server_options, min_compute_capability),
+          "setting minimum supported CUDA compute capability");
+
+      TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsDelete(server_options),
+          "deleting server options");
+
+      TRITONSERVER_ServerDeleter server = new TRITONSERVER_ServerDeleter(server_ptr);
+
+      // Wait until the server is both live and ready.
+      int health_iters = 0;
+      while (true) {
+        boolean[] live = {false}, ready = {false};
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerIsLive(server, live),
+            "unable to get server liveness");
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerIsReady(server, ready),
+            "unable to get server readiness");
+        System.out.println("Server Health: live " + live[0] + ", ready " + ready[0]);
+        if (live[0] && ready[0]) {
+          break;
+        }
+
+        if (++health_iters >= 10) {
+          FAIL("failed to find healthy inference server");
+        }
+
+        Thread.sleep(500);
+      }
+
+      // Print status of the server.
+      {
+        TRITONSERVER_Message server_metadata_message = new TRITONSERVER_Message(null);
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerMetadata(server, server_metadata_message),
+            "unable to get server metadata message");
+        BytePointer buffer = new BytePointer((Pointer)null);
+        SizeTPointer byte_size = new SizeTPointer(1);
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageSerializeToJson(
+                server_metadata_message, buffer, byte_size),
+            "unable to serialize server metadata message");
+
+        System.out.println("Server Status:");
+        System.out.println(buffer.limit(byte_size.get()).getString());
+
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageDelete(server_metadata_message),
+            "deleting status metadata");
+      }
+
+      String model_name = "simple";
+
+      // Wait for the model to become available.
+      boolean[] is_torch_model = {false};
+      boolean[] is_int = {true};
+      boolean[] is_ready = {false};
+      health_iters = 0;
+      while (!is_ready[0]) {
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerModelIsReady(
+                server, model_name, 1, is_ready),
+            "unable to get model readiness");
+        if (!is_ready[0]) {
+          if (++health_iters >= 10) {
+            FAIL("model failed to be ready in 10 iterations");
+          }
+          Thread.sleep(500);
+          continue;
+        }
+
+        TRITONSERVER_Message model_metadata_message = new TRITONSERVER_Message(null);
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerModelMetadata(
+                server, model_name, 1, model_metadata_message),
+            "unable to get model metadata message");
+        BytePointer buffer = new BytePointer((Pointer)null);
+        SizeTPointer byte_size = new SizeTPointer(1);
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageSerializeToJson(
+                model_metadata_message, buffer, byte_size),
+            "unable to serialize model status protobuf");
+
+        JsonParser parser = new JsonParser();
+        JsonObject model_metadata = null;
+        try {
+          model_metadata = parser.parse(buffer.limit(byte_size.get()).getString()).getAsJsonObject();
+        } catch (Exception e) {
+          FAIL("error: failed to parse model metadata from JSON: " + e);
+        }
+
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageDelete(model_metadata_message),
+            "deleting status protobuf");
+
+        if (!model_metadata.get("name").getAsString().equals(model_name)) {
+          FAIL("unable to find metadata for model");
+        }
+
+        boolean found_version = false;
+        if (model_metadata.has("versions")) {
+          for (JsonElement version : model_metadata.get("versions").getAsJsonArray()) {
+            if (version.getAsString().equals("1")) {
+              found_version = true;
+              break;
+            }
+          }
+        }
+        if (!found_version) {
+          FAIL("unable to find version 1 status for model");
+        }
+
+        FAIL_IF_ERR(
+            ParseModelMetadata(model_metadata, is_int, is_torch_model),
+            "parsing model metadata");
+      }
+
+      // Create the allocator that will be used to allocate buffers for
+      // the result tensors.
+      TRITONSERVER_ResponseAllocator allocator = new TRITONSERVER_ResponseAllocator(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ResponseAllocatorNew(
+              allocator, responseAlloc, responseRelease, null /* start_fn */),
+          "creating response allocator");
+
+      // Inference
+      TRITONSERVER_InferenceRequest irequest = new TRITONSERVER_InferenceRequest(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestNew(
+              irequest, server, model_name, -1 /* model_version */),
+          "creating inference request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+          "setting ID for the request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetReleaseCallback(
+              irequest, inferRequestComplete, null /* request_release_userp */),
+          "setting request release callback");
+
+      // Inputs
+      String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT0";
+      String input1 = is_torch_model[0] ? "INPUT__1" : "INPUT1";
+
+      long[] input0_shape = {1, 16};
+      long[] input1_shape = {1, 16};
+
+      int datatype =
+          (is_int[0]) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddInput(
+              irequest, input0, datatype, input0_shape, input0_shape.length),
+          "setting input 0 meta-data for the request");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddInput(
+              irequest, input1, datatype, input1_shape, input1_shape.length),
+          "setting input 1 meta-data for the request");
+
+      String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT0";
+      String output1 = is_torch_model[0] ? "OUTPUT__1" : "OUTPUT1";
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+          "requesting output 0 for the request");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+          "requesting output 1 for the request");
+
+      // Create the data for the two input tensors. Initialize the first
+      // to unique values and the second to all ones.
+      BytePointer input0_data;
+      BytePointer input1_data;
+      if (is_int[0]) {
+        IntPointer[] p0 = {null}, p1 = {null};
+        GenerateInputData(p0, p1);
+        input0_data = p0[0].getPointer(BytePointer.class);
+        input1_data = p1[0].getPointer(BytePointer.class);
+      } else {
+        FloatPointer[] p0 = {null}, p1 = {null};
+        GenerateInputData(p0, p1);
+        input0_data = p0[0].getPointer(BytePointer.class);
+        input1_data = p1[0].getPointer(BytePointer.class);
+      }
+
+      long input0_size = input0_data.limit();
+      long input1_size = input1_data.limit();
+
+      Pointer input0_base = input0_data;
+      Pointer input1_base = input1_data;
+      CudaDataDeleter input0_gpu = new CudaDataDeleter();
+      CudaDataDeleter input1_gpu = new CudaDataDeleter();
+      boolean use_cuda_memory =
+          (enforce_memory_type &&
+           (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+      if (use_cuda_memory) {
+        FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+        if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+          Pointer dst = new Pointer();
+          FAIL_IF_CUDA_ERR(
+              cudaMalloc(dst, input0_size),
+              "allocating GPU memory for INPUT0 data");
+          input0_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input0_data, input0_size, cudaMemcpyHostToDevice),
+              "setting INPUT0 data in GPU memory");
+          FAIL_IF_CUDA_ERR(
+              cudaMalloc(dst, input1_size),
+              "allocating GPU memory for INPUT1 data");
+          input1_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input1_data, input1_size, cudaMemcpyHostToDevice),
+              "setting INPUT1 data in GPU memory");
+        } else {
+          Pointer dst = new Pointer();
+          FAIL_IF_CUDA_ERR(
+              cudaHostAlloc(dst, input0_size, cudaHostAllocPortable),
+              "allocating pinned memory for INPUT0 data");
+          input0_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input0_data, input0_size, cudaMemcpyHostToHost),
+              "setting INPUT0 data in pinned memory");
+          FAIL_IF_CUDA_ERR(
+              cudaHostAlloc(dst, input1_size, cudaHostAllocPortable),
+              "allocating pinned memory for INPUT1 data");
+          input1_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input1_data, input1_size, cudaMemcpyHostToHost),
+              "setting INPUT1 data in pinned memory");
+        }
+      }
+
+      input0_base = use_cuda_memory ? input0_gpu : input0_data;
+      input1_base = use_cuda_memory ? input1_gpu : input1_data;
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input0, input0_base, input0_size, requested_memory_type,
+              0 /* memory_type_id */),
+          "assigning INPUT0 data");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input1, input1_base, input1_size, requested_memory_type,
+              0 /* memory_type_id */),
+          "assigning INPUT1 data");
+
+      // Perform inference...
+      {
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      // Modify some input data in place and then reuse the request
+      // object. For simplicity we only do this when the input tensors are
+      // in non-pinned system memory.
+      if (!enforce_memory_type ||
+          (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+        if (is_int[0]) {
+          new IntPointer(input0_data).put(0, 27);
+        } else {
+          new FloatPointer(input0_data).put(0, 27.0f);
+        }
+
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        // Using a new promise so have to re-register the callback to set
+        // the promise as the userp.
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      // Remove input data and then add back different data.
+      {
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+            "removing INPUT0 data");
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestAppendInputData(
+                irequest, input0, input1_base, input1_size, requested_memory_type,
+                0 /* memory_type_id */),
+            "assigning INPUT1 data to INPUT0");
+
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        // Using a new promise so have to re-register the callback to set
+        // the promise as the userp.
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        // Both inputs are using input1_data...
+        Check(
+            completed_response, input1_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestDelete(irequest),
+          "deleting inference request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ResponseAllocatorDelete(allocator),
+          "deleting response allocator");
+
+      System.exit(0);
+    }
+}
diff --git a/tritonserver/samples/pom.xml b/tritonserver/samples/pom.xml
new file mode 100644
index 00000000000..73741806ea4
--- /dev/null
+++ b/tritonserver/samples/pom.xml
@@ -0,0 +1,26 @@
+<project>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.bytedeco.tritonserver</groupId>
+    <artifactId>simple</artifactId>
+    <version>1.5.6</version>
+    <properties>
+        <exec.mainClass>Simple</exec.mainClass>
+        <maven.compiler.source>1.8</maven.compiler.source>
+        <maven.compiler.target>1.8</maven.compiler.target>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tritonserver-platform</artifactId>
+            <version>2.12-1.5.6</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.8.8</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <sourceDirectory>.</sourceDirectory>
+    </build>
+</project>
diff --git a/tritonserver/samples/simple.cc b/tritonserver/samples/simple.cc
deleted file mode 100644
index 41538abe35f..00000000000
--- a/tritonserver/samples/simple.cc
+++ /dev/null
@@ -1,928 +0,0 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-//  * Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-//  * Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//  * Neither the name of NVIDIA CORPORATION nor the names of its
-//    contributors may be used to endorse or promote products derived
-//    from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <rapidjson/document.h>
-#include <rapidjson/error/en.h>
-#include <unistd.h>
-#include <chrono>
-#include <cstring>
-#include <future>
-#include <iostream>
-#include <string>
-#include <thread>
-#include <unordered_map>
-#include <vector>
-#include "src/servers/common.h"
-#include "triton/core/tritonserver.h"
-
-#ifdef TRITON_ENABLE_GPU
-#include <cuda_runtime_api.h>
-#endif  // TRITON_ENABLE_GPU
-
-namespace ni = nvidia::inferenceserver;
-
-namespace {
-
-bool enforce_memory_type = false;
-TRITONSERVER_MemoryType requested_memory_type;
-
-#ifdef TRITON_ENABLE_GPU
-static auto cuda_data_deleter = [](void* data) {
-  if (data != nullptr) {
-    cudaPointerAttributes attr;
-    auto cuerr = cudaPointerGetAttributes(&attr, data);
-    if (cuerr != cudaSuccess) {
-      std::cerr << "error: failed to get CUDA pointer attribute of " << data
-                << ": " << cudaGetErrorString(cuerr) << std::endl;
-    }
-    if (attr.type == cudaMemoryTypeDevice) {
-      cuerr = cudaFree(data);
-    } else if (attr.type == cudaMemoryTypeHost) {
-      cuerr = cudaFreeHost(data);
-    }
-    if (cuerr != cudaSuccess) {
-      std::cerr << "error: failed to release CUDA pointer " << data << ": "
-                << cudaGetErrorString(cuerr) << std::endl;
-    }
-  }
-};
-#endif  // TRITON_ENABLE_GPU
-
-void
-Usage(char** argv, const std::string& msg = std::string())
-{
-  if (!msg.empty()) {
-    std::cerr << msg << std::endl;
-  }
-
-  std::cerr << "Usage: " << argv[0] << " [options]" << std::endl;
-  std::cerr << "\t-m <\"system\"|\"pinned\"|gpu>"
-            << " Enforce the memory type for input and output tensors."
-            << " If not specified, inputs will be in system memory and outputs"
-            << " will be based on the model's preferred type." << std::endl;
-  std::cerr << "\t-v Enable verbose logging" << std::endl;
-  std::cerr << "\t-r [model repository absolute path]" << std::endl;
-
-  exit(1);
-}
-
-TRITONSERVER_Error*
-ResponseAlloc(
-    TRITONSERVER_ResponseAllocator* allocator, const char* tensor_name,
-    size_t byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-    int64_t preferred_memory_type_id, void* userp, void** buffer,
-    void** buffer_userp, TRITONSERVER_MemoryType* actual_memory_type,
-    int64_t* actual_memory_type_id)
-{
-  // Initially attempt to make the actual memory type and id that we
-  // allocate be the same as preferred memory type
-  *actual_memory_type = preferred_memory_type;
-  *actual_memory_type_id = preferred_memory_type_id;
-
-  // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
-  // need to do any other book-keeping.
-  if (byte_size == 0) {
-    *buffer = nullptr;
-    *buffer_userp = nullptr;
-    std::cout << "allocated " << byte_size << " bytes for result tensor "
-              << tensor_name << std::endl;
-  } else {
-    void* allocated_ptr = nullptr;
-    if (enforce_memory_type) {
-      *actual_memory_type = requested_memory_type;
-    }
-
-    switch (*actual_memory_type) {
-#ifdef TRITON_ENABLE_GPU
-      case TRITONSERVER_MEMORY_CPU_PINNED: {
-        auto err = cudaSetDevice(*actual_memory_type_id);
-        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
-            (err != cudaErrorInsufficientDriver)) {
-          return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string(
-                  "unable to recover current CUDA device: " +
-                  std::string(cudaGetErrorString(err)))
-                  .c_str());
-        }
-
-        err = cudaHostAlloc(&allocated_ptr, byte_size, cudaHostAllocPortable);
-        if (err != cudaSuccess) {
-          return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string(
-                  "cudaHostAlloc failed: " +
-                  std::string(cudaGetErrorString(err)))
-                  .c_str());
-        }
-        break;
-      }
-
-      case TRITONSERVER_MEMORY_GPU: {
-        auto err = cudaSetDevice(*actual_memory_type_id);
-        if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
-            (err != cudaErrorInsufficientDriver)) {
-          return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string(
-                  "unable to recover current CUDA device: " +
-                  std::string(cudaGetErrorString(err)))
-                  .c_str());
-        }
-
-        err = cudaMalloc(&allocated_ptr, byte_size);
-        if (err != cudaSuccess) {
-          return TRITONSERVER_ErrorNew(
-              TRITONSERVER_ERROR_INTERNAL,
-              std::string(
-                  "cudaMalloc failed: " + std::string(cudaGetErrorString(err)))
-                  .c_str());
-        }
-        break;
-      }
-#endif  // TRITON_ENABLE_GPU
-
-      // Use CPU memory if the requested memory type is unknown
-      // (default case).
-      case TRITONSERVER_MEMORY_CPU:
-      default: {
-        *actual_memory_type = TRITONSERVER_MEMORY_CPU;
-        allocated_ptr = malloc(byte_size);
-        break;
-      }
-    }
-
-    // Pass the tensor name with buffer_userp so we can show it when
-    // releasing the buffer.
-    if (allocated_ptr != nullptr) {
-      *buffer = allocated_ptr;
-      *buffer_userp = new std::string(tensor_name);
-      std::cout << "allocated " << byte_size << " bytes in "
-                << TRITONSERVER_MemoryTypeString(*actual_memory_type)
-                << " for result tensor " << tensor_name << std::endl;
-    }
-  }
-
-  return nullptr;  // Success
-}
-
-TRITONSERVER_Error*
-ResponseRelease(
-    TRITONSERVER_ResponseAllocator* allocator, void* buffer, void* buffer_userp,
-    size_t byte_size, TRITONSERVER_MemoryType memory_type,
-    int64_t memory_type_id)
-{
-  std::string* name = nullptr;
-  if (buffer_userp != nullptr) {
-    name = reinterpret_cast<std::string*>(buffer_userp);
-  } else {
-    name = new std::string("<unknown>");
-  }
-
-  std::cout << "Releasing buffer " << buffer << " of size " << byte_size
-            << " in " << TRITONSERVER_MemoryTypeString(memory_type)
-            << " for result '" << *name << "'" << std::endl;
-  switch (memory_type) {
-    case TRITONSERVER_MEMORY_CPU:
-      free(buffer);
-      break;
-#ifdef TRITON_ENABLE_GPU
-    case TRITONSERVER_MEMORY_CPU_PINNED: {
-      auto err = cudaSetDevice(memory_type_id);
-      if (err == cudaSuccess) {
-        err = cudaFreeHost(buffer);
-      }
-      if (err != cudaSuccess) {
-        std::cerr << "error: failed to cudaFree " << buffer << ": "
-                  << cudaGetErrorString(err) << std::endl;
-      }
-      break;
-    }
-    case TRITONSERVER_MEMORY_GPU: {
-      auto err = cudaSetDevice(memory_type_id);
-      if (err == cudaSuccess) {
-        err = cudaFree(buffer);
-      }
-      if (err != cudaSuccess) {
-        std::cerr << "error: failed to cudaFree " << buffer << ": "
-                  << cudaGetErrorString(err) << std::endl;
-      }
-      break;
-    }
-#endif  // TRITON_ENABLE_GPU
-    default:
-      std::cerr << "error: unexpected buffer allocated in CUDA managed memory"
-                << std::endl;
-      break;
-  }
-
-  delete name;
-
-  return nullptr;  // Success
-}
-
-void
-InferRequestComplete(
-    TRITONSERVER_InferenceRequest* request, const uint32_t flags, void* userp)
-{
-  // We reuse the request so we don't delete it here.
-}
-
-void
-InferResponseComplete(
-    TRITONSERVER_InferenceResponse* response, const uint32_t flags, void* userp)
-{
-  if (response != nullptr) {
-    // Send 'response' to the future.
-    std::promise<TRITONSERVER_InferenceResponse*>* p =
-        reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
-    p->set_value(response);
-    delete p;
-  }
-}
-
-TRITONSERVER_Error*
-ParseModelMetadata(
-    const rapidjson::Document& model_metadata, bool* is_int,
-    bool* is_torch_model)
-{
-  std::string seen_data_type;
-  for (const auto& input : model_metadata["inputs"].GetArray()) {
-    if (strcmp(input["datatype"].GetString(), "INT32") &&
-        strcmp(input["datatype"].GetString(), "FP32")) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "simple lib example only supports model with data type INT32 or "
-          "FP32");
-    }
-    if (seen_data_type.empty()) {
-      seen_data_type = input["datatype"].GetString();
-    } else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          "the inputs and outputs of 'simple' model must have the data type");
-    }
-  }
-  for (const auto& output : model_metadata["outputs"].GetArray()) {
-    if (strcmp(output["datatype"].GetString(), "INT32") &&
-        strcmp(output["datatype"].GetString(), "FP32")) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_UNSUPPORTED,
-          "simple lib example only supports model with data type INT32 or "
-          "FP32");
-    } else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
-      return TRITONSERVER_ErrorNew(
-          TRITONSERVER_ERROR_INVALID_ARG,
-          "the inputs and outputs of 'simple' model must have the data type");
-    }
-  }
-
-  *is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
-  *is_torch_model =
-      (strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
-  return nullptr;
-}
-
-template <typename T>
-void
-GenerateInputData(
-    std::vector<char>* input0_data, std::vector<char>* input1_data)
-{
-  input0_data->resize(16 * sizeof(T));
-  input1_data->resize(16 * sizeof(T));
-  for (size_t i = 0; i < 16; ++i) {
-    ((T*)input0_data->data())[i] = i;
-    ((T*)input1_data->data())[i] = 1;
-  }
-}
-
-template <typename T>
-void
-CompareResult(
-    const std::string& output0_name, const std::string& output1_name,
-    const void* input0, const void* input1, const char* output0,
-    const char* output1)
-{
-  for (size_t i = 0; i < 16; ++i) {
-    std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
-              << ((T*)output0)[i] << std::endl;
-    std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
-              << ((T*)output1)[i] << std::endl;
-
-    if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
-      FAIL("incorrect sum in " + output0_name);
-    }
-    if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
-      FAIL("incorrect difference in " + output1_name);
-    }
-  }
-}
-
-void
-Check(
-    TRITONSERVER_InferenceResponse* response,
-    const std::vector<char>& input0_data, const std::vector<char>& input1_data,
-    const std::string& output0, const std::string& output1,
-    const size_t expected_byte_size,
-    const TRITONSERVER_DataType expected_datatype, const bool is_int)
-{
-  std::unordered_map<std::string, std::vector<char>> output_data;
-
-  uint32_t output_count;
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceResponseOutputCount(response, &output_count),
-      "getting number of response outputs");
-  if (output_count != 2) {
-    FAIL("expecting 2 response outputs, got " + std::to_string(output_count));
-  }
-
-  for (uint32_t idx = 0; idx < output_count; ++idx) {
-    const char* cname;
-    TRITONSERVER_DataType datatype;
-    const int64_t* shape;
-    uint64_t dim_count;
-    const void* base;
-    size_t byte_size;
-    TRITONSERVER_MemoryType memory_type;
-    int64_t memory_type_id;
-    void* userp;
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseOutput(
-            response, idx, &cname, &datatype, &shape, &dim_count, &base,
-            &byte_size, &memory_type, &memory_type_id, &userp),
-        "getting output info");
-
-    if (cname == nullptr) {
-      FAIL("unable to get output name");
-    }
-
-    std::string name(cname);
-    if ((name != output0) && (name != output1)) {
-      FAIL("unexpected output '" + name + "'");
-    }
-
-    if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
-      FAIL("unexpected shape for '" + name + "'");
-    }
-
-    if (datatype != expected_datatype) {
-      FAIL(
-          "unexpected datatype '" +
-          std::string(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
-          name + "'");
-    }
-
-    if (byte_size != expected_byte_size) {
-      FAIL(
-          "unexpected byte-size, expected " +
-          std::to_string(expected_byte_size) + ", got " +
-          std::to_string(byte_size) + " for " + name);
-    }
-
-    if (enforce_memory_type && (memory_type != requested_memory_type)) {
-      FAIL(
-          "unexpected memory type, expected to be allocated in " +
-          std::string(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
-          ", got " + std::string(TRITONSERVER_MemoryTypeString(memory_type)) +
-          ", id " + std::to_string(memory_type_id) + " for " + name);
-    }
-
-    // We make a copy of the data here... which we could avoid for
-    // performance reasons but ok for this simple example.
-    std::vector<char>& odata = output_data[name];
-    switch (memory_type) {
-      case TRITONSERVER_MEMORY_CPU: {
-        std::cout << name << " is stored in system memory" << std::endl;
-        const char* cbase = reinterpret_cast<const char*>(base);
-        odata.assign(cbase, cbase + byte_size);
-        break;
-      }
-
-      case TRITONSERVER_MEMORY_CPU_PINNED: {
-        std::cout << name << " is stored in pinned memory" << std::endl;
-        const char* cbase = reinterpret_cast<const char*>(base);
-        odata.assign(cbase, cbase + byte_size);
-        break;
-      }
-
-#ifdef TRITON_ENABLE_GPU
-      case TRITONSERVER_MEMORY_GPU: {
-        std::cout << name << " is stored in GPU memory" << std::endl;
-        odata.reserve(byte_size);
-        FAIL_IF_CUDA_ERR(
-            cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
-            "getting " + name + " data from GPU memory");
-        break;
-      }
-#endif
-
-      default:
-        FAIL("unexpected memory type");
-    }
-  }
-
-  if (is_int) {
-    CompareResult<int32_t>(
-        output0, output1, &input0_data[0], &input1_data[0],
-        output_data[output0].data(), output_data[output1].data());
-  } else {
-    CompareResult<float>(
-        output0, output1, &input0_data[0], &input1_data[0],
-        output_data[output0].data(), output_data[output1].data());
-  }
-}
-
-}  // namespace
-
-int
-main(int argc, char** argv)
-{
-  std::string model_repository_path;
-  int verbose_level = 0;
-
-  // Parse commandline...
-  int opt;
-  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
-    switch (opt) {
-      case 'm': {
-        enforce_memory_type = true;
-        if (!strcmp(optarg, "system")) {
-          requested_memory_type = TRITONSERVER_MEMORY_CPU;
-        } else if (!strcmp(optarg, "pinned")) {
-          requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
-        } else if (!strcmp(optarg, "gpu")) {
-          requested_memory_type = TRITONSERVER_MEMORY_GPU;
-        } else {
-          Usage(
-              argv,
-              "-m must be used to specify one of the following types:"
-              " <\"system\"|\"pinned\"|gpu>");
-        }
-        break;
-      }
-      case 'r':
-        model_repository_path = optarg;
-        break;
-      case 'v':
-        verbose_level = 1;
-        break;
-      case '?':
-        Usage(argv);
-        break;
-    }
-  }
-
-  if (model_repository_path.empty()) {
-    Usage(argv, "-r must be used to specify model repository path");
-  }
-#ifndef TRITON_ENABLE_GPU
-  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
-    Usage(argv, "-m can only be set to \"system\" without enabling GPU");
-  }
-#endif  // TRITON_ENABLE_GPU
-
-  // Check API version.
-  uint32_t api_version_major, api_version_minor;
-  FAIL_IF_ERR(
-      TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor),
-      "getting Triton API version");
-  if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
-      (TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
-    FAIL("triton server API version mismatch");
-  }
-
-  // Create the server...
-  TRITONSERVER_ServerOptions* server_options = nullptr;
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsNew(&server_options),
-      "creating server options");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-          server_options, model_repository_path.c_str()),
-      "setting model repository path");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
-      "setting verbose logging level");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetBackendDirectory(
-          server_options, "/opt/tritonserver/backends"),
-      "setting backend directory");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-          server_options, "/opt/tritonserver/repoagents"),
-      "setting repository agent directory");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
-      "setting strict model configuration");
-#ifdef TRITON_ENABLE_GPU
-  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
-#else
-  double min_compute_capability = 0;
-#endif  // TRITON_ENABLE_GPU
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-          server_options, min_compute_capability),
-      "setting minimum supported CUDA compute capability");
-
-  TRITONSERVER_Server* server_ptr = nullptr;
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerNew(&server_ptr, server_options), "creating server");
-  FAIL_IF_ERR(
-      TRITONSERVER_ServerOptionsDelete(server_options),
-      "deleting server options");
-
-  std::shared_ptr<TRITONSERVER_Server> server(
-      server_ptr, TRITONSERVER_ServerDelete);
-
-  // Wait until the server is both live and ready.
-  size_t health_iters = 0;
-  while (true) {
-    bool live, ready;
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerIsLive(server.get(), &live),
-        "unable to get server liveness");
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerIsReady(server.get(), &ready),
-        "unable to get server readiness");
-    std::cout << "Server Health: live " << live << ", ready " << ready
-              << std::endl;
-    if (live && ready) {
-      break;
-    }
-
-    if (++health_iters >= 10) {
-      FAIL("failed to find healthy inference server");
-    }
-
-    std::this_thread::sleep_for(std::chrono::milliseconds(500));
-  }
-
-  // Print status of the server.
-  {
-    TRITONSERVER_Message* server_metadata_message;
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerMetadata(server.get(), &server_metadata_message),
-        "unable to get server metadata message");
-    const char* buffer;
-    size_t byte_size;
-    FAIL_IF_ERR(
-        TRITONSERVER_MessageSerializeToJson(
-            server_metadata_message, &buffer, &byte_size),
-        "unable to serialize server metadata message");
-
-    std::cout << "Server Status:" << std::endl;
-    std::cout << std::string(buffer, byte_size) << std::endl;
-
-    FAIL_IF_ERR(
-        TRITONSERVER_MessageDelete(server_metadata_message),
-        "deleting status metadata");
-  }
-
-  const std::string model_name("simple");
-
-  // Wait for the model to become available.
-  bool is_torch_model = false;
-  bool is_int = true;
-  bool is_ready = false;
-  health_iters = 0;
-  while (!is_ready) {
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerModelIsReady(
-            server.get(), model_name.c_str(), 1, &is_ready),
-        "unable to get model readiness");
-    if (!is_ready) {
-      if (++health_iters >= 10) {
-        FAIL("model failed to be ready in 10 iterations");
-      }
-      std::this_thread::sleep_for(std::chrono::milliseconds(500));
-      continue;
-    }
-
-    TRITONSERVER_Message* model_metadata_message;
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerModelMetadata(
-            server.get(), model_name.c_str(), 1, &model_metadata_message),
-        "unable to get model metadata message");
-    const char* buffer;
-    size_t byte_size;
-    FAIL_IF_ERR(
-        TRITONSERVER_MessageSerializeToJson(
-            model_metadata_message, &buffer, &byte_size),
-        "unable to serialize model status protobuf");
-
-    rapidjson::Document model_metadata;
-    model_metadata.Parse(buffer, byte_size);
-    if (model_metadata.HasParseError()) {
-      FAIL(
-          "error: failed to parse model metadata from JSON: " +
-          std::string(GetParseError_En(model_metadata.GetParseError())) +
-          " at " + std::to_string(model_metadata.GetErrorOffset()));
-    }
-
-    FAIL_IF_ERR(
-        TRITONSERVER_MessageDelete(model_metadata_message),
-        "deleting status protobuf");
-
-    if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
-      FAIL("unable to find metadata for model");
-    }
-
-    bool found_version = false;
-    if (model_metadata.HasMember("versions")) {
-      for (const auto& version : model_metadata["versions"].GetArray()) {
-        if (strcmp(version.GetString(), "1") == 0) {
-          found_version = true;
-          break;
-        }
-      }
-    }
-    if (!found_version) {
-      FAIL("unable to find version 1 status for model");
-    }
-
-    FAIL_IF_ERR(
-        ParseModelMetadata(model_metadata, &is_int, &is_torch_model),
-        "parsing model metadata");
-  }
-
-  // Create the allocator that will be used to allocate buffers for
-  // the result tensors.
-  TRITONSERVER_ResponseAllocator* allocator = nullptr;
-  FAIL_IF_ERR(
-      TRITONSERVER_ResponseAllocatorNew(
-          &allocator, ResponseAlloc, ResponseRelease, nullptr /* start_fn */),
-      "creating response allocator");
-
-  // Inference
-  TRITONSERVER_InferenceRequest* irequest = nullptr;
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestNew(
-          &irequest, server.get(), model_name.c_str(), -1 /* model_version */),
-      "creating inference request");
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
-      "setting ID for the request");
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestSetReleaseCallback(
-          irequest, InferRequestComplete, nullptr /* request_release_userp */),
-      "setting request release callback");
-
-  // Inputs
-  auto input0 = is_torch_model ? "INPUT__0" : "INPUT0";
-  auto input1 = is_torch_model ? "INPUT__1" : "INPUT1";
-
-  std::vector<int64_t> input0_shape({1, 16});
-  std::vector<int64_t> input1_shape({1, 16});
-
-  const TRITONSERVER_DataType datatype =
-      (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAddInput(
-          irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
-      "setting input 0 meta-data for the request");
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAddInput(
-          irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
-      "setting input 1 meta-data for the request");
-
-  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
-  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
-      "requesting output 0 for the request");
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
-      "requesting output 1 for the request");
-
-  // Create the data for the two input tensors. Initialize the first
-  // to unique values and the second to all ones.
-  std::vector<char> input0_data;
-  std::vector<char> input1_data;
-  if (is_int) {
-    GenerateInputData<int32_t>(&input0_data, &input1_data);
-  } else {
-    GenerateInputData<float>(&input0_data, &input1_data);
-  }
-
-  size_t input0_size = input0_data.size();
-  size_t input1_size = input1_data.size();
-
-  const void* input0_base = &input0_data[0];
-  const void* input1_base = &input1_data[0];
-#ifdef TRITON_ENABLE_GPU
-  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
-      nullptr, cuda_data_deleter);
-  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
-      nullptr, cuda_data_deleter);
-  bool use_cuda_memory =
-      (enforce_memory_type &&
-       (requested_memory_type != TRITONSERVER_MEMORY_CPU));
-  if (use_cuda_memory) {
-    FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
-    if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
-      void* dst;
-      FAIL_IF_CUDA_ERR(
-          cudaMalloc(&dst, input0_size),
-          "allocating GPU memory for INPUT0 data");
-      input0_gpu.reset(dst);
-      FAIL_IF_CUDA_ERR(
-          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
-          "setting INPUT0 data in GPU memory");
-      FAIL_IF_CUDA_ERR(
-          cudaMalloc(&dst, input1_size),
-          "allocating GPU memory for INPUT1 data");
-      input1_gpu.reset(dst);
-      FAIL_IF_CUDA_ERR(
-          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
-          "setting INPUT1 data in GPU memory");
-    } else {
-      void* dst;
-      FAIL_IF_CUDA_ERR(
-          cudaHostAlloc(&dst, input0_size, cudaHostAllocPortable),
-          "allocating pinned memory for INPUT0 data");
-      input0_gpu.reset(dst);
-      FAIL_IF_CUDA_ERR(
-          cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
-          "setting INPUT0 data in pinned memory");
-      FAIL_IF_CUDA_ERR(
-          cudaHostAlloc(&dst, input1_size, cudaHostAllocPortable),
-          "allocating pinned memory for INPUT1 data");
-      input1_gpu.reset(dst);
-      FAIL_IF_CUDA_ERR(
-          cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
-          "setting INPUT1 data in pinned memory");
-    }
-  }
-
-  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
-  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
-#endif  // TRITON_ENABLE_GPU
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAppendInputData(
-          irequest, input0, input0_base, input0_size, requested_memory_type,
-          0 /* memory_type_id */),
-      "assigning INPUT0 data");
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestAppendInputData(
-          irequest, input1, input1_base, input1_size, requested_memory_type,
-          0 /* memory_type_id */),
-      "assigning INPUT1 data");
-
-  // Perform inference...
-  {
-    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceRequestSetResponseCallback(
-            irequest, allocator, nullptr /* response_allocator_userp */,
-            InferResponseComplete, reinterpret_cast<void*>(p)),
-        "setting response callback");
-
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerInferAsync(
-            server.get(), irequest, nullptr /* trace */),
-        "running inference");
-
-    // Wait for the inference to complete.
-    TRITONSERVER_InferenceResponse* completed_response = completed.get();
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseError(completed_response),
-        "response status");
-
-    Check(
-        completed_response, input0_data, input1_data, output0, output1,
-        input0_size, datatype, is_int);
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseDelete(completed_response),
-        "deleting inference response");
-  }
-
-  // Modify some input data in place and then reuse the request
-  // object. For simplicity we only do this when the input tensors are
-  // in non-pinned system memory.
-  if (!enforce_memory_type ||
-      (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
-    if (is_int) {
-      int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
-      input0_base[0] = 27;
-    } else {
-      float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
-      input0_base[0] = 27.0;
-    }
-
-    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-
-    // Using a new promise so have to re-register the callback to set
-    // the promise as the userp.
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceRequestSetResponseCallback(
-            irequest, allocator, nullptr /* response_allocator_userp */,
-            InferResponseComplete, reinterpret_cast<void*>(p)),
-        "setting response callback");
-
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerInferAsync(
-            server.get(), irequest, nullptr /* trace */),
-        "running inference");
-
-    // Wait for the inference to complete.
-    TRITONSERVER_InferenceResponse* completed_response = completed.get();
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseError(completed_response),
-        "response status");
-
-    Check(
-        completed_response, input0_data, input1_data, output0, output1,
-        input0_size, datatype, is_int);
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseDelete(completed_response),
-        "deleting inference response");
-  }
-
-  // Remove input data and then add back different data.
-  {
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
-        "removing INPUT0 data");
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceRequestAppendInputData(
-            irequest, input0, input1_base, input1_size, requested_memory_type,
-            0 /* memory_type_id */),
-        "assigning INPUT1 data to INPUT0");
-
-    auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-    std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-
-    // Using a new promise so have to re-register the callback to set
-    // the promise as the userp.
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceRequestSetResponseCallback(
-            irequest, allocator, nullptr /* response_allocator_userp */,
-            InferResponseComplete, reinterpret_cast<void*>(p)),
-        "setting response callback");
-
-    FAIL_IF_ERR(
-        TRITONSERVER_ServerInferAsync(
-            server.get(), irequest, nullptr /* trace */),
-        "running inference");
-
-    // Wait for the inference to complete.
-    TRITONSERVER_InferenceResponse* completed_response = completed.get();
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseError(completed_response),
-        "response status");
-
-    // Both inputs are using input1_data...
-    Check(
-        completed_response, input1_data, input1_data, output0, output1,
-        input0_size, datatype, is_int);
-
-    FAIL_IF_ERR(
-        TRITONSERVER_InferenceResponseDelete(completed_response),
-        "deleting inference response");
-  }
-
-  FAIL_IF_ERR(
-      TRITONSERVER_InferenceRequestDelete(irequest),
-      "deleting inference request");
-
-  FAIL_IF_ERR(
-      TRITONSERVER_ResponseAllocatorDelete(allocator),
-      "deleting response allocator");
-
-  return 0;
-}
diff --git a/tritonserver/samples/simple.java b/tritonserver/samples/simple.java
deleted file mode 100644
index b2b543f017e..00000000000
--- a/tritonserver/samples/simple.java
+++ /dev/null
@@ -1,956 +0,0 @@
-import java.io.*;
-import java.util.*;
-import org.bytedeco.javacpp.*;
-import org.bytedeco.javacpp.BytePointer;
-
-import org.bytedeco.cuda.cudart.*;
-import org.bytedeco.tritonserver.tritonserver.*;
-import static org.bytedeco.cuda.global.cudart.*;
-import static org.bytedeco.tensorrt.global.tritonserver.*;
-
-public class Simple {
-
-	static void FAIL(String msg)
-    {
-        System.err.println("Cuda failure: " + msg);
-        System.exit(1);
-    }
-
-	static void FAIL_IF_ERR(TRITONSERVER_Error X, String MSG)
-    {
-        TRITONSERVER_Error err_ = X;
-		if (err__ != null) {
-		    System.err.println("error: " + MSG
-				+ ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
-				+ TRITONSERVER_ErrorMessage(err__));
-			TRITONSERVER_ErrorDelete(err__);    
-            System.exit(1);	
-		}
-    }
-    
-    static void FAIL_IF_CUDA_ERR(cudaError_t X, String MSG)
-	{
-        cudaError_t err__ = X;                                               
-        if (err__ != cudaSuccess) {
-			System.err.println("error: " + MSG
-				+ ":" + TRITONSERVER_ErrorCodeString(err__) + " - "
-				+ cudaGetErrorString(err__));
-			System.exit(1);                                                                 
-        }                                                                      
-    }
-
-    boolean enforce_memory_type = false;
-    TRITONSERVER_MemoryType requested_memory_type;
-
-	final boolean triton_enable_gpu = false;
-	if (triton_enable_gpu)
-    {   
-        public static class cuda_data_deleter extends FunctionPointer {
-        	public void call(Pointer data) {
-				if (data != null) {
-                	cudaPointerAttributes attr;
-                	auto cuerr = cudaPointerGetAttributes(attr, data);
-                	if (cuerr != cudaSuccess) {
-                    	//std::cerr << "error: failed to get CUDA pointer attribute of " << data
-                    	//    << ": " << cudaGetErrorString(cuerr) << std::endl;
-                    	//jack: how to print "Pointer data" here, %what?
-			        	System.err.printf("error: failed to get CUDA pointer attribute of %?: %s\n", data, cudaGetErrorString(cuerr));
-                	}
-                	if (attr.type == cudaMemoryTypeDevice) {
-                    	cuerr = cudaFree(data);
-                	} else if (attr.type == cudaMemoryTypeHost) {
-                    	cuerr = cudaFreeHost(data);
-                	}
-                	if (cuerr != cudaSuccess) {
-                    	//std::cerr << "error: failed to release CUDA pointer " << data << ": "
-                    	//    << cudaGetErrorString(cuerr) << std::endl;
-                    	//jack: how to print "Pointer data" here, %what?
-			        	System.err.printf("error: failed to release CUDA pointer %?: %s\n", data, cudaGetErrorString(cuerr)); ??
-                	}
-            	}
-
-        	}
-    	}
-        
-    }			
-
-    void Usage(String[] args, String msg = String) 
-    {
-        if (!msg.isEmpty()) {
-            System.err.printf("%s\n", msg);
-        }
-
-        System.err.printf("Usage: %s [options]\n", argv[0].get());
-        System.err.printf("\t-m <\"system\"|\"pinned\"|gpu>\n");
-		System.err.printf("Enforce the memory type for input and output tensors.\n");
-		System.err.printf("If not specified, inputs will be in system memory and outputs\n");
-		System.err.printf("will be based on the model's preferred type.\n");
-        System.err.printf("\t-v Enable verbose logging\n");
-        System.err.printf("\t-r [model repository absolute path]\n");
-        System.err.printf("\t-c Enable web camera input.\n");
-
-        System.exit(1);
-    }
-
-	TRITONSERVER_Error ResponseAlloc(TRITONSERVER_ResponseAllocator allocator,
-		char tensor_name, long byte_size, TRITONSERVER_MemoryType preferred_memory_type,
-		long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
-		PointerPointer buffer_userp, TRITONSERVER_MemoryType actual_memory_type,
-		long actual_memory_type_id)
-	{
-		// Initially attempt to make the actual memory type and id that we
-		// allocate be the same as preferred memory type
-		actual_memory_type = preferred_memory_type;
-		actual_memory_type_id = preferred_memory_type_id;
-	
-		// If 'byte_size' is zero just return 'buffer' == nullptr, we don't
-		// need to do any other book-keeping.
-		if (byte_size == 0) {
-			buffer = null;
-			buffer_userp = null;
-			System.out.printf("allocated %d %s\n", byte_size, tensor_name);
-		} else {
-			Pointer allocated_ptr = null;
-			if (enforce_memory_type) {
-				actual_memory_type = requested_memory_type;
-			}
-	
-			switch (actual_memory_type) {
-				if (triton_enable_gpu)
-				{
-					case TRITONSERVER_MEMORY_CPU_PINNED: {
-						int err = cudaSetDevice(actual_memory_type_id);
-						if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
-							(err != cudaErrorInsufficientDriver)) {
-							return TRITONSERVER_ErrorNew(
-									   TRITONSERVER_ERROR_INTERNAL,
-									   new BytePointer("unable to recover current CUDA device: cudaGetErrorString(err)"));		           
-						}
-	
-						err = cudaHostAlloc(allocated_ptr, byte_size, cudaHostAllocPortable);
-						if (err != cudaSuccess) {
-							return TRITONSERVER_ErrorNew(
-									   TRITONSERVER_ERROR_INTERNAL,
-									   new BytePointer("cudaHostAlloc failed: cudaGetErrorString(err)"));
-						}
-						break;
-					}
-	
-					case TRITONSERVER_MEMORY_GPU: {
-						int err = cudaSetDevice(actual_memory_type_id);
-						if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
-							(err != cudaErrorInsufficientDriver)) {
-							return TRITONSERVER_ErrorNew(
-									   TRITONSERVER_ERROR_INTERNAL,
-									   new BytePointer("unable to recover current CUDA device: cudaGetErrorString(err)"));
-						}
-	
-						err = cudaMalloc(allocated_ptr, byte_size);
-						if (err != cudaSuccess) {
-							return TRITONSERVER_ErrorNew(
-								       TRITONSERVER_ERROR_INTERNAL,
-								       new BytePointer("cudaMalloc failed: cudaGetErrorString(err)"));
-						}
-						break;
-					}
-				}   
-					
-			    // Use CPU memory if the requested memory type is unknown
-				// (default case).
-				case TRITONSERVER_MEMORY_CPU:
-				default: {
-					actual_memory_type = TRITONSERVER_MEMORY_CPU;
-					allocated_ptr = new byte[byte_size];
-					break;
-				}
-			}   
-	
-			// Pass the tensor name with buffer_userp so we can show it when
-			// releasing the buffer.
-			if (allocated_ptr != null) {
-				buffer = allocated_ptr;
-				buffer_userp = new String(tensor_name);
-				System.out.printf("allocated %d bytes in %s for result tensor %s\n", byte_size, 
-					TRITONSERVER_MemoryTypeString(actual_memory_type), tensor_name);
-			}
-		}
-	
-		return null;  // Success
-	}	 
-	
-	TRITONSERVER_Error ResponseRelease(TRITONSERVER_ResponseAllocator allocator,
-		Pointer buffer, Pointer buffer_userp, long byte_size, TRITONSERVER_MemoryType memory_type,
-		long memory_type_id)
-	{
-		String name = null;
-		if (buffer_userp != null) {
-			name = (String)(buffer_userp);
-		} else {
-			name = new String("<unknown>");
-		}
-	
-		System.out.printf("Releasing buffer of size %d in %s for result %s\n", byte_size, 
-			TRITONSERVER_MemoryTypeString(memory_type), name);
-		switch (memory_type) {
-			case TRITONSERVER_MEMORY_CPU:
-				//jack: for c++ free, I just use "= null", is this correct?
-				//free(buffer);
-				buffer = null;
-				break;
-			
-		   if (triton_enable_gpu){
-			case TRITONSERVER_MEMORY_CPU_PINNED: {
-				int err = cudaSetDevice(memory_type_id);
-				if (err == cudaSuccess) {
-					err = cudaFreeHost(buffer);
-				}
-				if (err != cudaSuccess) {
-					System.err.printf("error: failed to cudaFree: %s.\n", cudaGetErrorString(err));
-				}
-				break;
-			}
-			case TRITONSERVER_MEMORY_GPU: {
-				int err = cudaSetDevice(memory_type_id);
-				if (err == cudaSuccess) {
-					err = cudaFree(buffer);
-				}
-				if (err != cudaSuccess) {
-					System.err.printf("error: failed to cudaFree: %s.\n", cudaGetErrorString(err));
-				}
-				break;
-			}
-		   }
-		   
-			default:
-				System.err.printf("error: unexpected buffer allocated in CUDA managed memory.\n");
-				break;
-		}
-	
-		name = null;
-	
-		return null;  // Success
-	}
-
-	void 
-	InferRequestComplete(
-		 TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
-	{
-		 // We reuse the request so we don't delete it here.
-	}
-		 
-	void
-	InferResponseComplete(
-		TRITONSERVER_InferenceResponse response, long flags, Pointer userp)
-	{
-		 if (response != null) {
-		 // Send 'response' to the future.
-		 //jack: how to do with std::promise? and which java object can do with .set_value?
-			std::promise<TRITONSERVER_InferenceResponse*>* p =
-			reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
-			p->set_value(response);
-			p = null;
-		 }
-	}
-
-	void 
-	InferResponseComplete(
-		TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
-	{
-		if (response != null) {
-			// Send 'response' to the future.
-			//jack: how to do with std::promise, set_value can be replaced by which java func? for reinterpret_cast, should be replaced by which one?
-			std::promise<TRITONSERVER_InferenceResponse*>* p =
-				reinterpret_cast<std::promise<TRITONSERVER_InferenceResponse*>*>(userp);
-			p->set_value(response);
-			p = null;
-		}
-	}
-		
-	TRITONSERVER_Error
-	ParseModelMetadata(
-		const rapidjson::Document& model_metadata, boolean is_int,
-		boolean is_torch_model)
-	{
-		String seen_data_type;
-		for (const auto& input : model_metadata["inputs"].GetArray()) {
-		    if (strcmp(input["datatype"].GetString(), "INT32") &&
-				strcmp(input["datatype"].GetString(), "FP32")) {
-			    return TRITONSERVER_ErrorNew(
-				  TRITONSERVER_ERROR_UNSUPPORTED,
-				  new BytePointer("simple lib example only supports model with data type INT32 or FP32"));
-			}
-			if (seen_data_type.isEmpty()) {
-			    seen_data_type = input["datatype"].GetString();
-			} else if (strcmp(seen_data_type.c_str(), input["datatype"].GetString())) {
-			  return TRITONSERVER_ErrorNew(
-				  TRITONSERVER_ERROR_INVALID_ARG,
-				  new BytePointer("the inputs and outputs of 'simple' model must have the data type"));
-			}
-		}
-		for (const auto& output : model_metadata["outputs"].GetArray()) {
-			if (strcmp(output["datatype"].GetString(), "INT32") &&
-				strcmp(output["datatype"].GetString(), "FP32")) {
-			    return TRITONSERVER_ErrorNew(
-				  TRITONSERVER_ERROR_UNSUPPORTED,
-				  new BytePointer("simple lib example only supports model with data type INT32 or FP32"));
-			} else if (strcmp(seen_data_type.c_str(), output["datatype"].GetString())) {
-			    return TRITONSERVER_ErrorNew(
-				  TRITONSERVER_ERROR_INVALID_ARG,
-				  new BytePointer("the inputs and outputs of 'simple' model must have the data type"));
-			}
-		}
-		//jack: check about c_str and strcmp
-		is_int = (strcmp(seen_data_type.c_str(), "INT32") == 0);
-		is_torch_model =
-			(strcmp(model_metadata["platform"].GetString(), "pytorch_libtorch") == 0);
-		return null;
-	}
-
-	//jack: how to do with template? how to do with resize?
-	template <typename T>
-	void
-	GenerateInputData(
-			std::vector<char>* input0_data, std::vector<char>* input1_data)
-	{
-		 input0_data->resize(16 * sizeof(T));
-		 input1_data->resize(16 * sizeof(T));
-		 for (size_t i = 0; i < 16; ++i) {
-			 ((T*)input0_data->data())[i] = i;
-			 ((T*)input1_data->data())[i] = 1;
-		 }
-	}
-		
-	template <typename T>
-	void
-	CompareResult(
-		String output0_name, String output1_name,
-		Pointer input0, Pointer input1, Pointer output0,
-		Pointer output1)
-	{
-		 for (size_t i = 0; i < 16; ++i) {
-			std::cout << ((T*)input0)[i] << " + " << ((T*)input1)[i] << " = "
-					  << ((T*)output0)[i] << std::endl;
-			std::cout << ((T*)input0)[i] << " - " << ((T*)input1)[i] << " = "
-					  << ((T*)output1)[i] << std::endl;
-		
-			if ((((T*)input0)[i] + ((T*)input1)[i]) != ((T*)output0)[i]) {
-			  FAIL("incorrect sum in " + output0_name);
-			}
-			if ((((T*)input0)[i] - ((T*)input1)[i]) != ((T*)output1)[i]) {
-			  FAIL("incorrect difference in " + output1_name);
-			}
-		 }
-	}
-		
-	void
-	Check(
-		TRITONSERVER_InferenceResponse response,
-		char[] input0_data, char[] input1_data,
-		String output0, String output1,
-		long expected_byte_size,
-		TRITONSERVER_DataType expected_datatype, boolean is_int)
-	{
-	//jack: how to do with unordered_map? 
-	    std::unordered_map<std::string, std::vector<char>> output_data;
-		
-		long output_count;
-		FAIL_IF_ERR(
-			TRITONSERVER_InferenceResponseOutputCount(response, output_count),
-			"getting number of response outputs");
-		if (output_count != 2) {
-			FAIL("expecting 2 response outputs, got " + String(output_count));
-		}
-		
-		for (long idx = 0; idx < output_count; ++idx) {
-			BytePointer cname;
-			TRITONSERVER_DataType datatype;
-		//jack: is there PointerLong? int64 should be long, right?
-			const int64_t* shape;
-			long dim_count;
-			Pointer base;
-			long byte_size;
-			TRITONSERVER_MemoryType memory_type;
-			long memory_type_id;
-			Pointer userp;
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseOutput(
-					response, idx, cname, datatype, shape, dim_count, base,
-					byte_size, memory_type, memory_type_id, userp),
-				"getting output info");
-		
-			if (cname == null) {
-			  FAIL("unable to get output name");
-			}
-		
-			String name(cname);
-			if ((name != output0) && (name != output1)) {
-			  FAIL("unexpected output '" + name + "'");
-			}
-		//jack: when the above shape issue fixed, will change this to some position stuff 
-			if ((dim_count != 2) || (shape[0] != 1) || (shape[1] != 16)) {
-			  FAIL("unexpected shape for '" + name + "'");
-			}
-		
-			if (datatype != expected_datatype) {
-			  FAIL(
-				  "unexpected datatype '" +
-				  String(TRITONSERVER_DataTypeString(datatype)) + "' for '" +
-				  name + "'");
-			}
-		
-			if (byte_size != expected_byte_size) {
-			  FAIL(
-				  "unexpected byte-size, expected " +
-				  String(expected_byte_size) + ", got " +
-				  String(byte_size) + " for " + name);
-			}
-		
-			if (enforce_memory_type && (memory_type != requested_memory_type)) {
-			  FAIL(
-				  "unexpected memory type, expected to be allocated in " +
-				  String(TRITONSERVER_MemoryTypeString(requested_memory_type)) +
-				  ", got " + String(TRITONSERVER_MemoryTypeString(memory_type)) +
-				  ", id " + String(memory_type_id) + " for " + name);
-			}
-		
-			// We make a copy of the data here... which we could avoid for
-			// performance reasons but ok for this simple example.
-			//jack: change this when unordered_map is fixed
-			char[] odata = output_data[name];
-			//jack: how to do with std::vector func of assign?
-			switch (memory_type) {
-			  case TRITONSERVER_MEMORY_CPU: {
-				std::cout << name << " is stored in system memory" << std::endl;
-				const char* cbase = reinterpret_cast<const char*>(base);
-				odata.assign(cbase, cbase + byte_size);
-				break;
-			  }
-		
-			  case TRITONSERVER_MEMORY_CPU_PINNED: {
-				std::cout << name << " is stored in pinned memory" << std::endl;
-				const char* cbase = reinterpret_cast<const char*>(base);
-				odata.assign(cbase, cbase + byte_size);
-				break;
-			  }
-		
-		if (triton_enable_gpu)
-		{
-			  case TRITONSERVER_MEMORY_GPU: {
-				std::cout << name << " is stored in GPU memory" << std::endl;
-				odata.reserve(byte_size);
-				FAIL_IF_CUDA_ERR(
-					cudaMemcpy(&odata[0], base, byte_size, cudaMemcpyDeviceToHost),
-					"getting " + name + " data from GPU memory");
-				break;
-			  }
-		}
-		
-			  default:
-				FAIL("unexpected memory type");
-			}
-		  }
-		
-		  if (is_int) {
-			CompareResult<int32_t>(
-				output0, output1, &input0_data[0], &input1_data[0],
-				output_data[output0].data(), output_data[output1].data());
-		  } else {
-			CompareResult<float>(
-				output0, output1, &input0_data[0], &input1_data[0],
-				output_data[output0].data(), output_data[output1].data());
-		  }
-	}
-		
-	
-	public static void main(String[] args)
-	{
-		String model_repository_path;
-		int verbose_level = 0;
-		
-		// Parse commandline...
-		//jack: how to do arg check in java, any reference?
-		int opt;
-		  while ((opt = getopt(argc, argv, "vm:r:")) != -1) {
-			switch (opt) {
-			  case 'm': {
-				enforce_memory_type = true;
-				if (!strcmp(optarg, "system")) {
-				  requested_memory_type = TRITONSERVER_MEMORY_CPU;
-				} else if (!strcmp(optarg, "pinned")) {
-				  requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
-				} else if (!strcmp(optarg, "gpu")) {
-				  requested_memory_type = TRITONSERVER_MEMORY_GPU;
-				} else {
-				  Usage(
-					  argv,
-					  "-m must be used to specify one of the following types:"
-					  " <\"system\"|\"pinned\"|gpu>");
-				}
-				break;
-			  }
-			  case 'r':
-				model_repository_path = optarg;
-				break;
-			  case 'v':
-				verbose_level = 1;
-				break;
-			  case '?':
-				Usage(argv);
-				break;
-			}
-		  }
-		
-		  if (model_repository_path.isEmpty()) {
-			Usage(argv, "-r must be used to specify model repository path");
-		  }
-		if (triton_enable_gpu)
-		{
-		  if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
-			Usage(argv, "-m can only be set to \"system\" without enabling GPU");
-		  }
-		}
-				
-		  // Check API version.
-	    long api_version_major, api_version_minor;
-		FAIL_IF_ERR(
-			TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
-			"getting Triton API version");
-		if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major) ||
-			(TRITONSERVER_API_VERSION_MINOR > api_version_minor)) {
-			FAIL("triton server API version mismatch");
-		}
-		
-		// Create the server...
-		TRITONSERVER_ServerOptions server_options = null;
-		FAIL_IF_ERR(
-		    TRITONSERVER_ServerOptionsNew(server_options),
-			"creating server options");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsSetModelRepositoryPath(
-				  server_options, model_repository_path.c_str()),
-			"setting model repository path");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
-			"setting verbose logging level");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsSetBackendDirectory(
-				server_options, "/opt/tritonserver/backends"),
-			"setting backend directory");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
-				server_options, "/opt/tritonserver/repoagents"),
-			"setting repository agent directory");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
-			"setting strict model configuration");
-		if (triton_enable_gpu)
-		{
-		  double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
-		}
-		else
-		{
-		  double min_compute_capability = 0;
-		}	
-		  FAIL_IF_ERR(
-			  TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
-				  server_options, min_compute_capability),
-			  "setting minimum supported CUDA compute capability");
-		
-		TRITONSERVER_Server server_ptr = null;
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
-		FAIL_IF_ERR(
-			TRITONSERVER_ServerOptionsDelete(server_options),
-			"deleting server options");
-		//jack: how to do with shared_ptr here?
-		std::shared_ptr<TRITONSERVER_Server> server(
-			server_ptr, TRITONSERVER_ServerDelete);
-		
-		// Wait until the server is both live and ready.
-		long health_iters = 0;
-		while (true) {
-		    boolean live, ready;
-			FAIL_IF_ERR(
-			//jack: how to do with get func of shared_ptr?
-				TRITONSERVER_ServerIsLive(server.get(), &live),
-				"unable to get server liveness");
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerIsReady(server.get(), &ready),
-				"unable to get server readiness");
-			System.out.println("Server Health: live" + ", ready");
-			
-			if (live && ready) {
-			  break;
-			}
-		
-			if (++health_iters >= 10) {
-			  FAIL("failed to find healthy inference server");
-			}
-		
-			Thread.sleep(500);
-		}
-		
-		// Print status of the server.
-		{
-		    TRITONSERVER_Message server_metadata_message;
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerMetadata(server.get(), server_metadata_message),
-				"unable to get server metadata message");
-			Pointer buffer;
-			long byte_size;
-			FAIL_IF_ERR(
-				TRITONSERVER_MessageSerializeToJson(
-					server_metadata_message, buffer, byte_size),
-				"unable to serialize server metadata message");
-		
-			System.out.println("Server Status: ");
-			System.out.println(String(buffer, byte_size));
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_MessageDelete(server_metadata_message),
-				"deleting status metadata");
-		  }
-		  //jack: is this right??
-		  String model_name = "simple";
-		
-		  // Wait for the model to become available.
-		  boolean is_torch_model = false;
-		  boolean is_int = true;
-		  boolean is_ready = false;
-		  health_iters = 0;
-		  while (!is_ready) {
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerModelIsReady(
-					server.get(), model_name.c_str(), 1, &is_ready),
-				"unable to get model readiness");
-			if (!is_ready) {
-			  if (++health_iters >= 10) {
-				FAIL("model failed to be ready in 10 iterations");
-			  }
-			  Thread.sleep(500);
-			  continue;
-			}
-		
-			TRITONSERVER_Message model_metadata_message;
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerModelMetadata(
-					server.get(), model_name.c_str(), 1, model_metadata_message),
-				"unable to get model metadata message");
-			Pointer buffer;
-			long byte_size;
-			FAIL_IF_ERR(
-				TRITONSERVER_MessageSerializeToJson(
-					model_metadata_message, buffer, byte_size),
-				"unable to serialize model status protobuf");
-		
-			rapidjson::Document model_metadata;
-			model_metadata.Parse(buffer, byte_size);
-			if (model_metadata.HasParseError()) {
-			  FAIL(
-				  "error: failed to parse model metadata from JSON: " +
-				  String(GetParseError_En(model_metadata.GetParseError())) +
-				  " at " + String(model_metadata.GetErrorOffset()));
-			}
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_MessageDelete(model_metadata_message),
-				"deleting status protobuf");
-		    //jack: how to do with strcmp?
-			if (strcmp(model_metadata["name"].GetString(), model_name.c_str())) {
-			  FAIL("unable to find metadata for model");
-			}
-		
-			boolean found_version = false;
-			if (model_metadata.HasMember("versions")) {
-			  //jack: how to set type for auto here?
-			  for (const auto& version : model_metadata["versions"].GetArray()) {
-				if (strcmp(version.GetString(), "1") == 0) {
-				  found_version = true;
-				  break;
-				}
-			  }
-			}
-			if (!found_version) {
-			  FAIL("unable to find version 1 status for model");
-			}
-		
-			FAIL_IF_ERR(
-				ParseModelMetadata(model_metadata, is_int, is_torch_model),
-				"parsing model metadata");
-		  }
-		
-		  // Create the allocator that will be used to allocate buffers for
-		  // the result tensors.
-		  TRITONSERVER_ResponseAllocator allocator = null;
-		  FAIL_IF_ERR(
-			  TRITONSERVER_ResponseAllocatorNew(
-				  allocator, ResponseAlloc, ResponseRelease, null /* start_fn */),
-			  "creating response allocator");
-		
-		  // Inference
-		  TRITONSERVER_InferenceRequest irequest = null;
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestNew(
-				  irequest, server.get(), model_name.c_str(), -1 /* model_version */),
-			  "creating inference request");
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
-			  "setting ID for the request");
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestSetReleaseCallback(
-				  irequest, InferRequestComplete, null /* request_release_userp */),
-			  "setting request release callback");
-		
-		  // Inputs
-		  //jack: dont know how to do with this
-		  auto input0 = is_torch_model ? "INPUT__0" : "INPUT0";
-		  auto input1 = is_torch_model ? "INPUT__1" : "INPUT1";
-		  //jack: how to do this with long []?
-		  std::vector<int64_t> input0_shape({1, 16});
-		  std::vector<int64_t> input1_shape({1, 16});
-		
-		  TRITONSERVER_DataType datatype =
-			  (is_int) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAddInput(
-				  irequest, input0, datatype, &input0_shape[0], input0_shape.size()),
-			  "setting input 0 meta-data for the request");
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAddInput(
-				  irequest, input1, datatype, &input1_shape[0], input1_shape.size()),
-			  "setting input 1 meta-data for the request");
-		  //jack: how to set this auto?
-		  auto output0 = is_torch_model ? "OUTPUT__0" : "OUTPUT0";
-		  auto output1 = is_torch_model ? "OUTPUT__1" : "OUTPUT1";
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
-			  "requesting output 0 for the request");
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
-			  "requesting output 1 for the request");
-		
-		  // Create the data for the two input tensors. Initialize the first
-		  // to unique values and the second to all ones.
-		  std::vector<char> input0_data;
-		  std::vector<char> input1_data;
-		  char[] input0_data;
-		  char[] input1_data;
-		  //jack: will do this if template is fixed
-		  if (is_int) {
-			GenerateInputData<int32_t>(&input0_data, &input1_data);
-		  } else {
-			GenerateInputData<float>(&input0_data, &input1_data);
-		  }
-		  //jack: how to do size of char[]?
-		  size_t input0_size = input0_data.size();
-		  size_t input1_size = input1_data.size();
-		
-		  const void* input0_base = &input0_data[0];
-		  const void* input1_base = &input1_data[0];
-		
-		if (triton_enable_gpu)
-		{
-		  //jack: how to do with this?
-		  std::unique_ptr<void, decltype(cuda_data_deleter)> input0_gpu(
-			  nullptr, cuda_data_deleter);
-		  std::unique_ptr<void, decltype(cuda_data_deleter)> input1_gpu(
-			  nullptr, cuda_data_deleter);
-		  boolean use_cuda_memory =
-			  (enforce_memory_type &&
-			   (requested_memory_type != TRITONSERVER_MEMORY_CPU));
-		  if (use_cuda_memory) {
-			FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
-			if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
-			  Pointer dst;
-			  FAIL_IF_CUDA_ERR(
-				  cudaMalloc(dst, input0_size),
-				  "allocating GPU memory for INPUT0 data");
-			  input0_gpu.reset(dst);
-			  FAIL_IF_CUDA_ERR(
-				  cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToDevice),
-				  "setting INPUT0 data in GPU memory");
-			  FAIL_IF_CUDA_ERR(
-				  cudaMalloc(&dst, input1_size),
-				  "allocating GPU memory for INPUT1 data");
-			  input1_gpu.reset(dst);
-			  FAIL_IF_CUDA_ERR(
-				  cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToDevice),
-				  "setting INPUT1 data in GPU memory");
-			} else {
-			  Pointer dst;
-			  FAIL_IF_CUDA_ERR(
-				  cudaHostAlloc(dst, input0_size, cudaHostAllocPortable),
-				  "allocating pinned memory for INPUT0 data");
-			  input0_gpu.reset(dst);
-			  FAIL_IF_CUDA_ERR(
-				  cudaMemcpy(dst, &input0_data[0], input0_size, cudaMemcpyHostToHost),
-				  "setting INPUT0 data in pinned memory");
-			  FAIL_IF_CUDA_ERR(
-				  cudaHostAlloc(dst, input1_size, cudaHostAllocPortable),
-				  "allocating pinned memory for INPUT1 data");
-			  input1_gpu.reset(dst);
-			  FAIL_IF_CUDA_ERR(
-				  cudaMemcpy(dst, &input1_data[0], input1_size, cudaMemcpyHostToHost),
-				  "setting INPUT1 data in pinned memory");
-			}
-		  }
-		
-		  input0_base = use_cuda_memory ? input0_gpu.get() : &input0_data[0];
-		  input1_base = use_cuda_memory ? input1_gpu.get() : &input1_data[0];
-		}
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAppendInputData(
-				  irequest, input0, input0_base, input0_size, requested_memory_type,
-				  0 /* memory_type_id */),
-			  "assigning INPUT0 data");
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestAppendInputData(
-				  irequest, input1, input1_base, input1_size, requested_memory_type,
-				  0 /* memory_type_id */),
-			  "assigning INPUT1 data");
-		
-		  // Perform inference...
-		  {
-		    //jack: how to do with std::promise
-			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-			//jack: how to do with std::future
-			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceRequestSetResponseCallback(
-					irequest, allocator, null /* response_allocator_userp */,
-					InferResponseComplete, reinterpret_cast<void*>(p)),
-				"setting response callback");
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerInferAsync(
-					server.get(), irequest, null /* trace */),
-				"running inference");
-		
-			// Wait for the inference to complete.
-			TRITONSERVER_InferenceResponse completed_response = completed.get();
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseError(completed_response),
-				"response status");
-		
-			Check(
-				completed_response, input0_data, input1_data, output0, output1,
-				input0_size, datatype, is_int);
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseDelete(completed_response),
-				"deleting inference response");
-		  }
-		
-		  // Modify some input data in place and then reuse the request
-		  // object. For simplicity we only do this when the input tensors are
-		  // in non-pinned system memory.
-		  if (!enforce_memory_type ||
-			  (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
-			if (is_int) {
-			//jack: how to do with reinterpret_cast?
-			  int32_t* input0_base = reinterpret_cast<int32_t*>(&input0_data[0]);
-			  input0_base[0] = 27;
-			} else {
-			  float* input0_base = reinterpret_cast<float*>(&input0_data[0]);
-			  input0_base[0] = 27.0;
-			}
-		    //jack: promise and future
-			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-		
-			// Using a new promise so have to re-register the callback to set
-			// the promise as the userp.
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceRequestSetResponseCallback(
-					irequest, allocator, null /* response_allocator_userp */,
-					InferResponseComplete, reinterpret_cast<void*>(p)),
-				"setting response callback");
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerInferAsync(
-					server.get(), irequest, null /* trace */),
-				"running inference");
-		
-			// Wait for the inference to complete.
-			TRITONSERVER_InferenceResponse completed_response = completed.get();
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseError(completed_response),
-				"response status");
-		
-			Check(
-				completed_response, input0_data, input1_data, output0, output1,
-				input0_size, datatype, is_int);
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseDelete(completed_response),
-				"deleting inference response");
-		  }
-		
-		  // Remove input data and then add back different data.
-		  {
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
-				"removing INPUT0 data");
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceRequestAppendInputData(
-					irequest, input0, input1_base, input1_size, requested_memory_type,
-					0 /* memory_type_id */),
-				"assigning INPUT1 data to INPUT0");
-		
-			auto p = new std::promise<TRITONSERVER_InferenceResponse*>();
-			std::future<TRITONSERVER_InferenceResponse*> completed = p->get_future();
-		
-			// Using a new promise so have to re-register the callback to set
-			// the promise as the userp.
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceRequestSetResponseCallback(
-					irequest, allocator, null /* response_allocator_userp */,
-					InferResponseComplete, reinterpret_cast<void*>(p)),
-				"setting response callback");
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_ServerInferAsync(
-					server.get(), irequest, null /* trace */),
-				"running inference");
-		
-			// Wait for the inference to complete.
-			TRITONSERVER_InferenceResponse completed_response = completed.get();
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseError(completed_response),
-				"response status");
-		
-			// Both inputs are using input1_data...
-			Check(
-				completed_response, input1_data, input1_data, output0, output1,
-				input0_size, datatype, is_int);
-		
-			FAIL_IF_ERR(
-				TRITONSERVER_InferenceResponseDelete(completed_response),
-				"deleting inference response");
-		  }
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_InferenceRequestDelete(irequest),
-			  "deleting inference request");
-		
-		  FAIL_IF_ERR(
-			  TRITONSERVER_ResponseAllocatorDelete(allocator),
-			  "deleting response allocator");
-		
-		  System.exit(0);
-	}
-    
-
-
-	
-}	
-
-
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
index 97859a1fe6d..c700d34048d 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -31,7 +31,7 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 
 // Parsed from tritonserver.h
 
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -136,7 +136,7 @@ public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver
 public static final int TRITONSERVER_API_VERSION_MAJOR = 1;
 
 ///
-public static final int TRITONSERVER_API_VERSION_MINOR = 3;
+public static final int TRITONSERVER_API_VERSION_MINOR = 4;
 
 /** Get the TRITONBACKEND API version supported by the Triton shared
  *  library. This value can be compared against the
@@ -163,28 +163,22 @@ public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
  * 
  *  Tensor data types recognized by TRITONSERVER.
  *  */
-public enum TRITONSERVER_DataType {
-  TRITONSERVER_TYPE_INVALID(0),
-  TRITONSERVER_TYPE_BOOL(1),
-  TRITONSERVER_TYPE_UINT8(2),
-  TRITONSERVER_TYPE_UINT16(3),
-  TRITONSERVER_TYPE_UINT32(4),
-  TRITONSERVER_TYPE_UINT64(5),
-  TRITONSERVER_TYPE_INT8(6),
-  TRITONSERVER_TYPE_INT16(7),
-  TRITONSERVER_TYPE_INT32(8),
-  TRITONSERVER_TYPE_INT64(9),
-  TRITONSERVER_TYPE_FP16(10),
-  TRITONSERVER_TYPE_FP32(11),
-  TRITONSERVER_TYPE_FP64(12),
-  TRITONSERVER_TYPE_BYTES(13);
-
-    public final int value;
-    private TRITONSERVER_DataType(int v) { this.value = v; }
-    private TRITONSERVER_DataType(TRITONSERVER_DataType e) { this.value = e.value; }
-    public TRITONSERVER_DataType intern() { for (TRITONSERVER_DataType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_DataType */
+public static final int
+  TRITONSERVER_TYPE_INVALID = 0,
+  TRITONSERVER_TYPE_BOOL = 1,
+  TRITONSERVER_TYPE_UINT8 = 2,
+  TRITONSERVER_TYPE_UINT16 = 3,
+  TRITONSERVER_TYPE_UINT32 = 4,
+  TRITONSERVER_TYPE_UINT64 = 5,
+  TRITONSERVER_TYPE_INT8 = 6,
+  TRITONSERVER_TYPE_INT16 = 7,
+  TRITONSERVER_TYPE_INT32 = 8,
+  TRITONSERVER_TYPE_INT64 = 9,
+  TRITONSERVER_TYPE_FP16 = 10,
+  TRITONSERVER_TYPE_FP32 = 11,
+  TRITONSERVER_TYPE_FP64 = 12,
+  TRITONSERVER_TYPE_BYTES = 13;
 
 /** Get the string representation of a data type. The returned string
  *  is not owned by the caller and so should not be modified or freed.
@@ -194,8 +188,6 @@ public enum TRITONSERVER_DataType {
 
 ///
 public static native String TRITONSERVER_DataTypeString(
-    TRITONSERVER_DataType datatype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_DataTypeString(
     @Cast("TRITONSERVER_DataType") int datatype);
 
 /** Get the Triton datatype corresponding to a string representation
@@ -206,7 +198,7 @@ public static native String TRITONSERVER_DataTypeString(
  *  string does not represent a data type. */
 
 ///
-public static native TRITONSERVER_DataType TRITONSERVER_StringToDataType(String dtype);
+public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(String dtype);
 public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(@Cast("const char*") BytePointer dtype);
 
 /** Get the size of a Triton datatype in bytes. Zero is returned for
@@ -218,24 +210,17 @@ public static native String TRITONSERVER_DataTypeString(
 
 ///
 ///
-public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(TRITONSERVER_DataType datatype);
 public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(@Cast("TRITONSERVER_DataType") int datatype);
 
 /** TRITONSERVER_MemoryType
  * 
  *  Types of memory recognized by TRITONSERVER.
  *  */
-public enum TRITONSERVER_MemoryType {
-  TRITONSERVER_MEMORY_CPU(0),
-  TRITONSERVER_MEMORY_CPU_PINNED(1),
-  TRITONSERVER_MEMORY_GPU(2);
-
-    public final int value;
-    private TRITONSERVER_MemoryType(int v) { this.value = v; }
-    private TRITONSERVER_MemoryType(TRITONSERVER_MemoryType e) { this.value = e.value; }
-    public TRITONSERVER_MemoryType intern() { for (TRITONSERVER_MemoryType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_MemoryType */
+public static final int
+  TRITONSERVER_MEMORY_CPU = 0,
+  TRITONSERVER_MEMORY_CPU_PINNED = 1,
+  TRITONSERVER_MEMORY_GPU = 2;
 
 /** Get the string representation of a memory type. The returned
  *  string is not owned by the caller and so should not be modified or
@@ -247,25 +232,17 @@ public enum TRITONSERVER_MemoryType {
 ///
 ///
 public static native String TRITONSERVER_MemoryTypeString(
-    TRITONSERVER_MemoryType memtype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_MemoryTypeString(
     @Cast("TRITONSERVER_MemoryType") int memtype);
 
 /** TRITONSERVER_ParameterType
  * 
  *  Types of parameters recognized by TRITONSERVER.
  *  */
-public enum TRITONSERVER_ParameterType {
-  TRITONSERVER_PARAMETER_STRING(0),
-  TRITONSERVER_PARAMETER_INT(1),
-  TRITONSERVER_PARAMETER_BOOL(2);
-
-    public final int value;
-    private TRITONSERVER_ParameterType(int v) { this.value = v; }
-    private TRITONSERVER_ParameterType(TRITONSERVER_ParameterType e) { this.value = e.value; }
-    public TRITONSERVER_ParameterType intern() { for (TRITONSERVER_ParameterType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ParameterType */
+public static final int
+  TRITONSERVER_PARAMETER_STRING = 0,
+  TRITONSERVER_PARAMETER_INT = 1,
+  TRITONSERVER_PARAMETER_BOOL = 2;
 
 /** Get the string representation of a parmeter type. The returned
  *  string is not owned by the caller and so should not be modified or
@@ -277,26 +254,18 @@ public enum TRITONSERVER_ParameterType {
 ///
 ///
 public static native String TRITONSERVER_ParameterTypeString(
-    TRITONSERVER_ParameterType paramtype);
-public static native @Cast("const char*") BytePointer TRITONSERVER_ParameterTypeString(
     @Cast("TRITONSERVER_ParameterType") int paramtype);
 
 /** TRITONSERVER_InstanceGroupKind
  * 
  *  Kinds of instance groups recognized by TRITONSERVER.
  *  */
-public enum TRITONSERVER_InstanceGroupKind {
-  TRITONSERVER_INSTANCEGROUPKIND_AUTO(0),
-  TRITONSERVER_INSTANCEGROUPKIND_CPU(1),
-  TRITONSERVER_INSTANCEGROUPKIND_GPU(2),
-  TRITONSERVER_INSTANCEGROUPKIND_MODEL(3);
-
-    public final int value;
-    private TRITONSERVER_InstanceGroupKind(int v) { this.value = v; }
-    private TRITONSERVER_InstanceGroupKind(TRITONSERVER_InstanceGroupKind e) { this.value = e.value; }
-    public TRITONSERVER_InstanceGroupKind intern() { for (TRITONSERVER_InstanceGroupKind e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_InstanceGroupKind */
+public static final int
+  TRITONSERVER_INSTANCEGROUPKIND_AUTO = 0,
+  TRITONSERVER_INSTANCEGROUPKIND_CPU = 1,
+  TRITONSERVER_INSTANCEGROUPKIND_GPU = 2,
+  TRITONSERVER_INSTANCEGROUPKIND_MODEL = 3;
 
 /** Get the string representation of an instance-group kind. The
  *  returned string is not owned by the caller and so should not be
@@ -308,26 +277,18 @@ public enum TRITONSERVER_InstanceGroupKind {
 ///
 ///
 public static native String TRITONSERVER_InstanceGroupKindString(
-    TRITONSERVER_InstanceGroupKind kind);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InstanceGroupKindString(
     @Cast("TRITONSERVER_InstanceGroupKind") int kind);
 
 /** TRITONSERVER_Logging
  * 
  *  Types/levels of logging.
  *  */
-public enum TRITONSERVER_LogLevel {
-  TRITONSERVER_LOG_INFO(0),
-  TRITONSERVER_LOG_WARN(1),
-  TRITONSERVER_LOG_ERROR(2),
-  TRITONSERVER_LOG_VERBOSE(3);
-
-    public final int value;
-    private TRITONSERVER_LogLevel(int v) { this.value = v; }
-    private TRITONSERVER_LogLevel(TRITONSERVER_LogLevel e) { this.value = e.value; }
-    public TRITONSERVER_LogLevel intern() { for (TRITONSERVER_LogLevel e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_LogLevel */
+public static final int
+  TRITONSERVER_LOG_INFO = 0,
+  TRITONSERVER_LOG_WARN = 1,
+  TRITONSERVER_LOG_ERROR = 2,
+  TRITONSERVER_LOG_VERBOSE = 3;
 
 /** Is a log level enabled?
  * 
@@ -335,8 +296,6 @@ public enum TRITONSERVER_LogLevel {
  *  @return True if the log level is enabled, false if not enabled. */
 
 ///
-public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
-    TRITONSERVER_LogLevel level);
 public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
     @Cast("TRITONSERVER_LogLevel") int level);
 
@@ -352,7 +311,7 @@ public enum TRITONSERVER_LogLevel {
 ///
 ///
 public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
-    TRITONSERVER_LogLevel level, String filename, int line,
+    @Cast("TRITONSERVER_LogLevel") int level, String filename, int line,
     String msg);
 public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
     @Cast("TRITONSERVER_LogLevel") int level, @Cast("const char*") BytePointer filename, int line,
@@ -370,21 +329,15 @@ public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
  * 
  <p>
  *  The TRITONSERVER_Error error codes */
-public enum TRITONSERVER_Error_Code {
-  TRITONSERVER_ERROR_UNKNOWN(0),
-  TRITONSERVER_ERROR_INTERNAL(1),
-  TRITONSERVER_ERROR_NOT_FOUND(2),
-  TRITONSERVER_ERROR_INVALID_ARG(3),
-  TRITONSERVER_ERROR_UNAVAILABLE(4),
-  TRITONSERVER_ERROR_UNSUPPORTED(5),
-  TRITONSERVER_ERROR_ALREADY_EXISTS(6);
-
-    public final int value;
-    private TRITONSERVER_Error_Code(int v) { this.value = v; }
-    private TRITONSERVER_Error_Code(TRITONSERVER_Error_Code e) { this.value = e.value; }
-    public TRITONSERVER_Error_Code intern() { for (TRITONSERVER_Error_Code e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_Error_Code */
+public static final int
+  TRITONSERVER_ERROR_UNKNOWN = 0,
+  TRITONSERVER_ERROR_INTERNAL = 1,
+  TRITONSERVER_ERROR_NOT_FOUND = 2,
+  TRITONSERVER_ERROR_INVALID_ARG = 3,
+  TRITONSERVER_ERROR_UNAVAILABLE = 4,
+  TRITONSERVER_ERROR_UNSUPPORTED = 5,
+  TRITONSERVER_ERROR_ALREADY_EXISTS = 6;
 
 /** Create a new error object. The caller takes ownership of the
  *  TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
@@ -396,7 +349,7 @@ public enum TRITONSERVER_Error_Code {
 
 ///
 public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
-    TRITONSERVER_Error_Code code, String msg);
+    @Cast("TRITONSERVER_Error_Code") int code, String msg);
 public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
     @Cast("TRITONSERVER_Error_Code") int code, @Cast("const char*") BytePointer msg);
 
@@ -413,7 +366,7 @@ public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
  *  @return The error code. */
 
 ///
-public static native TRITONSERVER_Error_Code TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
+public static native @Cast("TRITONSERVER_Error_Code") int TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
 
 /** Get the string representation of an error code. The returned
  *  string is not owned by the caller and so should not be modified or
@@ -582,15 +535,9 @@ public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
  * 
  <p>
  *  Metric format types */
-public enum TRITONSERVER_MetricFormat {
-  TRITONSERVER_METRIC_PROMETHEUS(0);
-
-    public final int value;
-    private TRITONSERVER_MetricFormat(int v) { this.value = v; }
-    private TRITONSERVER_MetricFormat(TRITONSERVER_MetricFormat e) { this.value = e.value; }
-    public TRITONSERVER_MetricFormat intern() { for (TRITONSERVER_MetricFormat e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_MetricFormat */
+public static final int
+  TRITONSERVER_METRIC_PROMETHEUS = 0;
 
 /** Delete a metrics object.
  * 
@@ -626,23 +573,14 @@ public static native TRITONSERVER_Error TRITONSERVER_MetricsDelete(
 
 ///
 ///
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
 public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
     TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
-    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
-public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
-    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
 public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
     TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
     @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
 public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
-    TRITONSERVER_Metrics metrics, TRITONSERVER_MetricFormat format,
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
     @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
 public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
     TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
@@ -654,17 +592,11 @@ public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
  * 
  <p>
  *  Trace levels */
-public enum TRITONSERVER_InferenceTraceLevel {
-  TRITONSERVER_TRACE_LEVEL_DISABLED(0),
-  TRITONSERVER_TRACE_LEVEL_MIN(1),
-  TRITONSERVER_TRACE_LEVEL_MAX(2);
-
-    public final int value;
-    private TRITONSERVER_InferenceTraceLevel(int v) { this.value = v; }
-    private TRITONSERVER_InferenceTraceLevel(TRITONSERVER_InferenceTraceLevel e) { this.value = e.value; }
-    public TRITONSERVER_InferenceTraceLevel intern() { for (TRITONSERVER_InferenceTraceLevel e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_InferenceTraceLevel */
+public static final int
+  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,
+  TRITONSERVER_TRACE_LEVEL_MIN = 1,
+  TRITONSERVER_TRACE_LEVEL_MAX = 2;
 
 /** Get the string representation of a trace level. The returned
  *  string is not owned by the caller and so should not be modified or
@@ -673,26 +605,18 @@ public enum TRITONSERVER_InferenceTraceLevel {
  *  @param level The trace level.
  *  @return The string representation of the trace level. */
 public static native String TRITONSERVER_InferenceTraceLevelString(
-    TRITONSERVER_InferenceTraceLevel level);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceLevelString(
     @Cast("TRITONSERVER_InferenceTraceLevel") int level);
 
 // Trace activities
-public enum TRITONSERVER_InferenceTraceActivity {
-  TRITONSERVER_TRACE_REQUEST_START(0),
-  TRITONSERVER_TRACE_QUEUE_START(1),
-  TRITONSERVER_TRACE_COMPUTE_START(2),
-  TRITONSERVER_TRACE_COMPUTE_INPUT_END(3),
-  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START(4),
-  TRITONSERVER_TRACE_COMPUTE_END(5),
-  TRITONSERVER_TRACE_REQUEST_END(6);
-
-    public final int value;
-    private TRITONSERVER_InferenceTraceActivity(int v) { this.value = v; }
-    private TRITONSERVER_InferenceTraceActivity(TRITONSERVER_InferenceTraceActivity e) { this.value = e.value; }
-    public TRITONSERVER_InferenceTraceActivity intern() { for (TRITONSERVER_InferenceTraceActivity e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_InferenceTraceActivity */
+public static final int
+  TRITONSERVER_TRACE_REQUEST_START = 0,
+  TRITONSERVER_TRACE_QUEUE_START = 1,
+  TRITONSERVER_TRACE_COMPUTE_START = 2,
+  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,
+  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,
+  TRITONSERVER_TRACE_COMPUTE_END = 5,
+  TRITONSERVER_TRACE_REQUEST_END = 6;
 
 /** Get the string representation of a trace activity. The returned
  *  string is not owned by the caller and so should not be modified or
@@ -701,8 +625,6 @@ public enum TRITONSERVER_InferenceTraceActivity {
  *  @param activity The trace activity.
  *  @return The string representation of the trace activity. */
 public static native String TRITONSERVER_InferenceTraceActivityString(
-    TRITONSERVER_InferenceTraceActivity activity);
-public static native @Cast("const char*") BytePointer TRITONSERVER_InferenceTraceActivityString(
     @Cast("TRITONSERVER_InferenceTraceActivity") int activity);
 // Targeting ../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
 
@@ -737,11 +659,7 @@ public static native String TRITONSERVER_InferenceTraceActivityString(
 
 ///
 public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
-    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, TRITONSERVER_InferenceTraceLevel level,
-    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
-    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
-public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
-    @ByPtrPtr TRITONSERVER_InferenceTrace trace, TRITONSERVER_InferenceTraceLevel level,
+    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
     @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
     TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
@@ -834,40 +752,22 @@ public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
  * 
  <p>
  *  Inference request flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_RequestFlag {
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START(1),
-  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END(2);
-
-    public final int value;
-    private TRITONSERVER_RequestFlag(int v) { this.value = v; }
-    private TRITONSERVER_RequestFlag(TRITONSERVER_RequestFlag e) { this.value = e.value; }
-    public TRITONSERVER_RequestFlag intern() { for (TRITONSERVER_RequestFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_RequestFlag */
+public static final int
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2;
 
 /** Inference request release flags. The enum values must be
  *  power-of-2 values. */
-public enum TRITONSERVER_RequestReleaseFlag {
-  TRITONSERVER_REQUEST_RELEASE_ALL(1);
-
-    public final int value;
-    private TRITONSERVER_RequestReleaseFlag(int v) { this.value = v; }
-    private TRITONSERVER_RequestReleaseFlag(TRITONSERVER_RequestReleaseFlag e) { this.value = e.value; }
-    public TRITONSERVER_RequestReleaseFlag intern() { for (TRITONSERVER_RequestReleaseFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_RequestReleaseFlag */
+public static final int
+  TRITONSERVER_REQUEST_RELEASE_ALL = 1;
 
 /** Inference response complete flags. The enum values must be
  *  power-of-2 values. */
-public enum TRITONSERVER_ResponseCompleteFlag {
-  TRITONSERVER_RESPONSE_COMPLETE_FINAL(1);
-
-    public final int value;
-    private TRITONSERVER_ResponseCompleteFlag(int v) { this.value = v; }
-    private TRITONSERVER_ResponseCompleteFlag(TRITONSERVER_ResponseCompleteFlag e) { this.value = e.value; }
-    public TRITONSERVER_ResponseCompleteFlag intern() { for (TRITONSERVER_ResponseCompleteFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ResponseCompleteFlag */
+public static final int
+  TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1;
 // Targeting ../tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
 
 
@@ -1067,27 +967,27 @@ public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetTimeoutM
 ///
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
     @Cast("uint64_t") long dim_count);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
     @Cast("uint64_t") long dim_count);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
     @Cast("uint64_t") long dim_count);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
     @Cast("uint64_t") long dim_count);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, String name,
-    TRITONSERVER_DataType datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
     @Cast("uint64_t") long dim_count);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
     TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
-    @Cast("TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
     @Cast("uint64_t") long dim_count);
 
 /** Remove an input from a request.
@@ -1127,7 +1027,7 @@ public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllIn
  *  @return a TRITONSERVER_Error indicating success or failure. */
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
     TRITONSERVER_InferenceRequest inference_request, String name,
-    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
     @Cast("int64_t") long memory_type_id);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
     TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
@@ -1155,7 +1055,7 @@ public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInput
  *  @return a TRITONSERVER_Error indicating success or failure. */
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
     TRITONSERVER_InferenceRequest inference_request, String name,
-    @Const Pointer base, @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
     @Cast("int64_t") long memory_type_id, String host_policy_name);
 public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
     TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
@@ -1491,17 +1391,17 @@ public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClas
  * 
  <p>
  *  Model control modes */
-public enum TRITONSERVER_ModelControlMode {
-  TRITONSERVER_MODEL_CONTROL_NONE(0),
-  TRITONSERVER_MODEL_CONTROL_POLL(1),
-  TRITONSERVER_MODEL_CONTROL_EXPLICIT(2);
-
-    public final int value;
-    private TRITONSERVER_ModelControlMode(int v) { this.value = v; }
-    private TRITONSERVER_ModelControlMode(TRITONSERVER_ModelControlMode e) { this.value = e.value; }
-    public TRITONSERVER_ModelControlMode intern() { for (TRITONSERVER_ModelControlMode e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ModelControlMode */
+public static final int
+  TRITONSERVER_MODEL_CONTROL_NONE = 0,
+  TRITONSERVER_MODEL_CONTROL_POLL = 1,
+  TRITONSERVER_MODEL_CONTROL_EXPLICIT = 2;
+
+/** Rate limit modes */
+/** enum TRITONSERVER_RateLimitMode */
+public static final int
+  TRITONSERVER_RATE_LIMIT_OFF = 0,
+  TRITONSERVER_RATE_LIMIT_EXEC_COUNT = 1;
 
 /** Create a new server options object. The caller takes ownership of
  *  the TRITONSERVER_ServerOptions object and must call
@@ -1579,8 +1479,6 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelReposi
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
-    TRITONSERVER_ServerOptions options, TRITONSERVER_ModelControlMode mode);
 public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
     TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_ModelControlMode") int mode);
 
@@ -1608,10 +1506,53 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupMode
  *  false to disable.
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
+///
+///
 ///
 public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictModelConfig(
     TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
 
+/** Set the rate limit mode in a server options.
+ * 
+ *    TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
+ *    inference execution using the number of times each instance has got a
+ *    chance to run. The execution gets to run only when its resource
+ *    constraints are satisfied.
+ * 
+ *    TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the
+ *    inference gets executed whenever an instance is available.
+ * 
+ *  @param options The server options object.
+ *  @param mode The mode to use for the rate limiting. By default, execution
+ *  count is used to determine the priorities.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRateLimiterMode(
+    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_RateLimitMode") int mode);
+
+/** Add resource count for rate limiting.
+ * 
+ *  @param options The server options object.
+ *  @param name The name of the resource.
+ *  @param count The count of the resource.
+ *  @param device The device identifier for the resource. A value of -1
+ *  indicates that the specified number of resources are available on every
+ *  device. The device value is ignored for a global resource. The server
+ *  will use the rate limiter configuration specified for instance groups
+ *  in model config to determine whether resource is global. In case of
+ *  conflicting resource type in different model configurations, server
+ *  will raise an appropriate error while loading model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsAddRateLimiterResource(
+    TRITONSERVER_ServerOptions options, String resource_name,
+    @Cast("const size_t") long resource_count, int device);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsAddRateLimiterResource(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer resource_name,
+    @Cast("const size_t") long resource_count, int device);
+
 /** Set the total pinned memory byte size that the server can allocate
  *  in a server options. The pinned memory pool will be shared across
  *  Triton itself and the backends that use
@@ -1825,40 +1766,22 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
  * 
  <p>
  *  Model batch flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_ModelBatchFlag {
-  TRITONSERVER_BATCH_UNKNOWN(1),
-  TRITONSERVER_BATCH_FIRST_DIM(2);
-
-    public final int value;
-    private TRITONSERVER_ModelBatchFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelBatchFlag(TRITONSERVER_ModelBatchFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelBatchFlag intern() { for (TRITONSERVER_ModelBatchFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ModelBatchFlag */
+public static final int
+  TRITONSERVER_BATCH_UNKNOWN = 1,
+  TRITONSERVER_BATCH_FIRST_DIM = 2;
 
 /** Model index flags. The enum values must be power-of-2 values. */
-public enum TRITONSERVER_ModelIndexFlag {
-  TRITONSERVER_INDEX_FLAG_READY(1);
-
-    public final int value;
-    private TRITONSERVER_ModelIndexFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelIndexFlag(TRITONSERVER_ModelIndexFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelIndexFlag intern() { for (TRITONSERVER_ModelIndexFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ModelIndexFlag */
+public static final int
+  TRITONSERVER_INDEX_FLAG_READY = 1;
 
 /** Model transaction policy flags. The enum values must be
  *  power-of-2 values. */
-public enum TRITONSERVER_ModelTxnPropertyFlag {
-  TRITONSERVER_TXN_ONE_TO_ONE(1),
-  TRITONSERVER_TXN_DECOUPLED(2);
-
-    public final int value;
-    private TRITONSERVER_ModelTxnPropertyFlag(int v) { this.value = v; }
-    private TRITONSERVER_ModelTxnPropertyFlag(TRITONSERVER_ModelTxnPropertyFlag e) { this.value = e.value; }
-    public TRITONSERVER_ModelTxnPropertyFlag intern() { for (TRITONSERVER_ModelTxnPropertyFlag e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONSERVER_ModelTxnPropertyFlag */
+public static final int
+  TRITONSERVER_TXN_ONE_TO_ONE = 1,
+  TRITONSERVER_TXN_DECOUPLED = 2;
 
 /** Create a new server object. The caller takes ownership of the
  *  TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
@@ -1910,10 +1833,10 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerStop(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
-    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
 public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
     TRITONSERVER_Server server, @Cast("bool*") boolean[] live);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
 
 /** Is the server ready?
  * 
@@ -1922,10 +1845,10 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
-    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
 public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
     TRITONSERVER_Server server, @Cast("bool*") boolean[] ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
 
 /** Is the model ready?
  * 
@@ -1942,10 +1865,10 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
 ///
 public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
     TRITONSERVER_Server server, String model_name,
-    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
+    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
 public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
     TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
-    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
+    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
 
 /** Get the batch properties of the model. The properties are
  *  communicated by a flags value and an (optional) object returned by
@@ -2251,7 +2174,7 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
 
 // Parsed from tritonbackend.h
 
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -2359,7 +2282,7 @@ public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
 public static final int TRITONBACKEND_API_VERSION_MAJOR = 1;
 
 ///
-public static final int TRITONBACKEND_API_VERSION_MINOR = 4;
+public static final int TRITONBACKEND_API_VERSION_MINOR = 5;
 
 /** Get the TRITONBACKEND API version supported by Triton. This value
  *  can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
@@ -2392,15 +2315,9 @@ public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
  *      accessible filesystem. The backend can access these files
  *      using an appropriate system API.
  *  */
-public enum TRITONBACKEND_ArtifactType {
-  TRITONBACKEND_ARTIFACT_FILESYSTEM(0);
-
-    public final int value;
-    private TRITONBACKEND_ArtifactType(int v) { this.value = v; }
-    private TRITONBACKEND_ArtifactType(TRITONBACKEND_ArtifactType e) { this.value = e.value; }
-    public TRITONBACKEND_ArtifactType intern() { for (TRITONBACKEND_ArtifactType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONBACKEND_ArtifactType */
+public static final int
+  TRITONBACKEND_ARTIFACT_FILESYSTEM = 0;
 
 
 /**
@@ -2439,15 +2356,11 @@ public enum TRITONBACKEND_ArtifactType {
 ///
 public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
     TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
-    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
-    @Cast("const uint64_t") long byte_size);
-public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
-    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
-    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
     @Cast("const uint64_t") long byte_size);
 public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
     TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
-    @Cast("TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
     @Cast("const uint64_t") long byte_size);
 
 /** Free a buffer that was previously allocated with
@@ -2469,10 +2382,7 @@ public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
 ///
 public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
     TRITONBACKEND_MemoryManager manager, Pointer buffer,
-    TRITONSERVER_MemoryType memory_type, @Cast("const int64_t") long memory_type_id);
-public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
-    TRITONBACKEND_MemoryManager manager, Pointer buffer,
-    @Cast("TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id);
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id);
 
 /**
  *  TRITONBACKEND_Input
@@ -3079,31 +2989,31 @@ public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
 ///
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
-    String name, TRITONSERVER_DataType datatype,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, TRITONSERVER_DataType datatype,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, TRITONSERVER_DataType datatype,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    String name, TRITONSERVER_DataType datatype,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
     TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
-    @Cast("const char*") BytePointer name, @Cast("TRITONSERVER_DataType") int datatype,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
     @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
 
 /** Send a response. Calling this function transfers ownership of the
@@ -3125,6 +3035,7 @@ public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
 ///
 ///
 ///
+///
 public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
     TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
     TRITONSERVER_Error error);
@@ -3149,16 +3060,26 @@ public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
  *      TRITONBACKEND_ModelInstanceExecute until all responses have
  *      been sent and all requests have been released. This is the
  *      default execution policy.
+ * 
+ *    TRITONBACKEND_EXECUTION_DEVICE_BLOCKING: An instance, A, of the
+ *      model blocks in TRITONBACKEND_ModelInstanceExecute if the
+ *      device associated with the instance is unable to handle
+ *      another inference. Even if another instance, B, associated
+ *      with the device, is available and ready to perform an
+ *      inference, Triton will not invoke
+ *      TRITONBACKEND_ModeInstanceExecute for B until A returns from
+ *      TRITONBACKEND_ModelInstanceExecute. Triton will not be blocked
+ *      from calling TRITONBACKEND_ModelInstanceExecute for instance
+ *      C, which is associated with a different device than A and B,
+ *      even if A or B has not returned from
+ *      TRITONBACKEND_ModelInstanceExecute. This execution policy is
+ *      typically used by a backend that can cooperatively execute
+ *      multiple model instances on the same device.
  *  */
-public enum TRITONBACKEND_ExecutionPolicy {
-  TRITONBACKEND_EXECUTION_BLOCKING(0);
-
-    public final int value;
-    private TRITONBACKEND_ExecutionPolicy(int v) { this.value = v; }
-    private TRITONBACKEND_ExecutionPolicy(TRITONBACKEND_ExecutionPolicy e) { this.value = e.value; }
-    public TRITONBACKEND_ExecutionPolicy intern() { for (TRITONBACKEND_ExecutionPolicy e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONBACKEND_ExecutionPolicy */
+public static final int
+  TRITONBACKEND_EXECUTION_BLOCKING = 0,
+  TRITONBACKEND_EXECUTION_DEVICE_BLOCKING = 1;
 
 /** Get the name of the backend. The caller does not own the returned
  *  string and must not modify or delete it. The lifetime of the
@@ -3229,8 +3150,6 @@ public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
 
 ///
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
-    TRITONBACKEND_Backend backend, TRITONBACKEND_ExecutionPolicy policy);
 public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
     TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
 
@@ -3410,10 +3329,10 @@ public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
-    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
 public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
     TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
 
 /** Set the model configuration in Triton server. Only the inputs, outputs,
  *  and max batch size can be changed. Any other changes to the model
@@ -3575,10 +3494,10 @@ public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 
 ///
-public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
-    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
 public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
     TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
 
 /** Get the number of optimization profiles to be loaded for the instance.
  * 
@@ -3620,6 +3539,47 @@ public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
     TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
     @Cast("const char**") @ByPtrPtr byte[] profile_name);
 
+/** Get the number of secondary devices configured for the instance.
+ * 
+ *  @param instance The model instance.
+ *  @param count Returns the number of secondary devices.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
+
+/** Get the properties of indexed secondary device. The returned
+ *  strings and other properties are owned by the instance, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param instance The model instance.
+ *  @param index The index of the secondary device. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_ModelInstanceSecondaryDeviceCount.
+ *  @param kind Returns the kind of secondary device corresponding
+ *  to the index.
+ *  @param id Returns the id of secondary device corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") PointerPointer kind,
+    @Cast("int64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer kind,
+    @Cast("int64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer kind,
+    @Cast("int64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] kind,
+    @Cast("int64_t*") long[] id);
+
 /** Get the model associated with a model instance.
  * 
  *  @param instance The model instance.
@@ -3856,7 +3816,13 @@ public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceFinalize(
  * 
  *  If success is returned, ownership of the request objects is
  *  transferred to the backend and it is then responsible for creating
- *  responses and releasing the request objects.
+ *  responses and releasing the request objects. Note that even though
+ *  ownership of the request objects is transferred to the backend, the
+ *  ownership of the buffer holding request pointers is returned back
+ *  to Triton upon return from TRITONBACKEND_ModelInstanceExecute. If
+ *  any request objects need to be maintained beyond
+ *  TRITONBACKEND_ModelInstanceExecute, then the pointers must be copied
+ *  out of the array within TRITONBACKEND_ModelInstanceExecute.
  * 
  *  @param instance The model instance.
  *  @param requests The requests.
@@ -4003,16 +3969,10 @@ public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
  *      The remote filesystem path follows the same convention as is used for
  *      repository paths, for example, "s3://" prefix indicates an S3 path.
  *  */
-public enum TRITONREPOAGENT_ArtifactType {
-  TRITONREPOAGENT_ARTIFACT_FILESYSTEM(0),
-  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM(1);
-
-    public final int value;
-    private TRITONREPOAGENT_ArtifactType(int v) { this.value = v; }
-    private TRITONREPOAGENT_ArtifactType(TRITONREPOAGENT_ArtifactType e) { this.value = e.value; }
-    public TRITONREPOAGENT_ArtifactType intern() { for (TRITONREPOAGENT_ArtifactType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONREPOAGENT_ArtifactType */
+public static final int
+  TRITONREPOAGENT_ARTIFACT_FILESYSTEM = 0,
+  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM = 1;
 
 /** TRITONREPOAGENT_ActionType
  * 
@@ -4038,19 +3998,13 @@ public enum TRITONREPOAGENT_ArtifactType {
  * 
  *    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
  *  */
-public enum TRITONREPOAGENT_ActionType {
-  TRITONREPOAGENT_ACTION_LOAD(0),
-  TRITONREPOAGENT_ACTION_LOAD_COMPLETE(1),
-  TRITONREPOAGENT_ACTION_LOAD_FAIL(2),
-  TRITONREPOAGENT_ACTION_UNLOAD(3),
-  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE(4);
-
-    public final int value;
-    private TRITONREPOAGENT_ActionType(int v) { this.value = v; }
-    private TRITONREPOAGENT_ActionType(TRITONREPOAGENT_ActionType e) { this.value = e.value; }
-    public TRITONREPOAGENT_ActionType intern() { for (TRITONREPOAGENT_ActionType e : values()) if (e.value == value) return e; return this; }
-    @Override public String toString() { return intern().name(); }
-}
+/** enum TRITONREPOAGENT_ActionType */
+public static final int
+  TRITONREPOAGENT_ACTION_LOAD = 0,
+  TRITONREPOAGENT_ACTION_LOAD_COMPLETE = 1,
+  TRITONREPOAGENT_ACTION_LOAD_FAIL = 2,
+  TRITONREPOAGENT_ACTION_UNLOAD = 3,
+  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE = 4;
 
 /** Get the location of the files that make up the model. The
  *  'location' communicated depends on how the model is being
@@ -4121,25 +4075,16 @@ public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
 ///
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") PointerPointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") PointerPointer location);
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
 
 /** Discard and release ownership of a previously acquired location
  *  and its contents. The agent must not access or modify the location
@@ -4192,10 +4137,10 @@ public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationR
 ///
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ArtifactType artifact_type, String location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, String location);
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
 
 /** Get the number of agent parameters defined for a model.
  * 
@@ -4402,10 +4347,7 @@ public static native TRITONSERVER_Error TRITONREPOAGENT_ModelFinalize(
  *  @return a TRITONSERVER_Error indicating success or failure. */
 public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
     TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    TRITONREPOAGENT_ActionType action_type);
-public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
-    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
-    @Cast("TRITONREPOAGENT_ActionType") int action_type);
+    @Cast("const TRITONREPOAGENT_ActionType") int action_type);
 
 // #ifdef __cplusplus
 // #endif
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
index 4b0fad3bd72..7003a3d63c1 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
@@ -42,6 +42,6 @@ public class TRITONSERVER_InferenceTraceActivityFn_t extends FunctionPointer {
     private native void allocate();
     public native void call(
     TRITONSERVER_InferenceTrace trace,
-    TRITONSERVER_InferenceTraceActivity activity, @Cast("uint64_t") long timestamp_ns,
+    @Cast("TRITONSERVER_InferenceTraceActivity") int activity, @Cast("uint64_t") long timestamp_ns,
     Pointer userp);
 }
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
index 3a645526d6c..18ce20fc215 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
@@ -74,7 +74,7 @@ public class TRITONSERVER_ResponseAllocatorAllocFn_t extends FunctionPointer {
     private native void allocate();
     public native TRITONSERVER_Error call(
     TRITONSERVER_ResponseAllocator allocator, String tensor_name,
-    @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
     @Cast("int64_t") long memory_type_id, Pointer userp, @Cast("void**") PointerPointer buffer, @Cast("void**") PointerPointer buffer_userp,
     @Cast("TRITONSERVER_MemoryType*") IntPointer actual_memory_type,
     @Cast("int64_t*") LongPointer actual_memory_type_id);
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
index b4fd4977476..f9d2214dbb6 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
@@ -55,6 +55,6 @@ public class TRITONSERVER_ResponseAllocatorReleaseFn_t extends FunctionPointer {
     private native void allocate();
     public native TRITONSERVER_Error call(
     TRITONSERVER_ResponseAllocator allocator, Pointer buffer, Pointer buffer_userp,
-    @Cast("size_t") long byte_size, TRITONSERVER_MemoryType memory_type,
+    @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
     @Cast("int64_t") long memory_type_id);
 }
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
index 87049e6ed6c..f2f8929ac47 100644
--- a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -52,7 +52,8 @@
             value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"},
             compiler = "cpp11",
             include = {"tritonserver.h", "tritonbackend.h", "tritonrepoagent.h"},
-			link = "tritonserver"
+            exclude = {"<cudaGL.h>", "<cuda_gl_interop.h>"},
+            link = "tritonserver"
         ),
         @Platform(
             value = "linux-arm64",
@@ -66,7 +67,7 @@
         ),
         @Platform(
             value = "linux-x86_64",
-            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/"},
+            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/", "/usr/include"},
             linkpath = {"/opt/tritonserver/lib/"}
         ),
         @Platform(
@@ -98,29 +99,30 @@ public class tritonserver implements LoadEnabled, InfoMapper {
         for (String lib : libs) {
             if (platform.startsWith("linux")) {
                 lib += lib.startsWith("cudnn") ? "@.8" : lib.equals("cudart") ? "@.11.0" : lib.equals("nvrtc") ? "@.11.2" : "@.11";
-				lib += lib.startsWith("nvinfer") ? "@.8" : lib.equals("nvonnxparser") ? "@.8" : lib.equals("nvparsers") ? "@.8" :"@.8";
+                lib += lib.startsWith("nvinfer") ? "@.8" : lib.equals("nvonnxparser") ? "@.8" : lib.equals("nvparsers") ? "@.8" :"@.8";
             } else if (platform.startsWith("windows")) {
                 lib += lib.startsWith("cudnn") ? "64_8" : lib.equals("cudart") ? "64_110" : lib.equals("nvrtc") ? "64_112_0" : "64_11";
-				lib += lib.startsWith("nvinfer") ? "64_8" : lib.equals("nvonnxparser") ? "64_8" : lib.equals("nvparsers") ? "64_8" :"64_8";
+                lib += lib.startsWith("nvinfer") ? "64_8" : lib.equals("nvonnxparser") ? "64_8" : lib.equals("nvparsers") ? "64_8" :"64_8";
             } else {
                 continue; // no CUDA
             }
-			if (!preloads.contains(lib)) {
+            if (!preloads.contains(lib)) {
                 preloads.add(i++, lib);
             }
         }
         if (i > 0) {
             resources.add("/org/bytedeco/cuda/");
-			resources.add("/org/bytedeco/tensorrt/");
+            resources.add("/org/bytedeco/tensorrt/");
         }
     }
 
     public void map(InfoMap infoMap) {
-        infoMap.put(new Info().enumerate())
-			   .put(new Info("TRITONSERVER_EXPORT").cppTypes().annotations())
-			   .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
-			   .put(new Info("TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
-			   .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
+        infoMap.putFirst(new Info().enumerate(false))
+               .put(new Info("bool").cast().valueTypes("boolean").pointerTypes("boolean[]", "BoolPointer"))
+               .put(new Info("TRITONSERVER_EXPORT").cppTypes().annotations())
+               .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
+               .put(new Info("TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
+               .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
         ;
     }
 }

From 130b7e2e68ed53a98a6a61c48e944a06583d4888 Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Sat, 16 Oct 2021 16:22:13 +0800
Subject: [PATCH 08/21] add README

---
 tritonserver/README.md           | 70 ++++++++++++++++++++++++++++++++
 tritonserver/cppbuild.sh         |  6 +--
 tritonserver/samples/Simple.java |  2 +-
 3 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index 8b137891791..12a0bff5f4b 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -1 +1,71 @@
+JavaCPP Presets for Tritonserver
+============================
+
+[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/tritonserver.svg)](http://bytedeco.org/builds/)  
+<sup>Build status for all platforms:</sup> [![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)  <sup>Commercial support:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
+
+
+License Agreements
+------------------
+By downloading these archives, you agree to the terms of the license agreements for NVIDIA software included in the archives.
+
+### Tritonserver
+To view the license for Tritonserver included in these archives, click [here](https://github.com/triton-inference-server/server)
+
+ * Tritonserver is widely used software package for inference service
+ * Triton supports almost all kinds of model generated by different DL frameworks or tools, such as TensorFLow, PyTorch, ONNX, TensorRT, OpenVINO...
+ * Triton supports both CPU and GPU
+ * Triton can be used both as an application and as a shared library. In case you already have your own inference service framework but want to add more features, just try Triton as a shared library.
+ * Triton supports Java as a shared library through JavaAPP Presets
+
+Introduction
+------------
+This directory contains the JavaCPP Presets module for:
+
+ * Tritonserver 2.12  https://github.com/triton-inference-server/server
+
+Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
+
+
+Documentation
+-------------
+Java API documentation is available here:
+
+ * http://bytedeco.org/javacpp-presets/tritonserver/apidocs/
+
+
+Sample Usage
+------------
+Here is a example of Tritonserver ported to Java from the `simple.cc` sample file available at:
+
+ * https://github.com/triton-inference-server/server/tree/main/src/servers
+
+This sample intends to show how to call JAVA based Triton API to execute inference requests.
+We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, simply execute on the command line:
+```bash
+ $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.07_source/server-2.12.0/docs/examples/model_repository/models"
+```
+
+### Steps to run this sample
+
+**1, Get the source code of Tritonserver to set the model repository up:
+```bash
+ $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.12.0.tar.gz
+ $ tar czvf v2.12.0.tar.gz
+ $ cd server-2.12.0/docs/examples/model_repository
+ $ mkdir models
+ $ cd models; cp -a ../simple .
+```
+Now, this models directory will be our model repository.
+
+**2, Start the Docker container to run the sample (Note: 1, now we are under the "model_repository" dir as above):
+```bash
+ $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.07-py3 bash
+ $ git clone https://github.com/bytedeco/javacpp-presets.git
+ $ cd javacpp-presets/tritonserver/samples
+ $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.07_source/server-2.12.0/docs/examples/model_repository/models"
+```
+
+This sample is the JAVA implementation of [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api) 
+
 
diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
index 01261c2e45f..e09c39c4094 100644
--- a/tritonserver/cppbuild.sh
+++ b/tritonserver/cppbuild.sh
@@ -21,10 +21,8 @@ case $PLATFORM in
         fi
         ;;
     windows-x86_64)
-        if [[ ! -f "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/include/NvInfer.h" ]]; then
-            echo "Please install TensorRT in C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/"
-            exit 1
-        fi
+        echo "Windows are not supported yet"
+        exit 1
         ;;
     *)
         echo "Error: Platform \"$PLATFORM\" is not supported"
diff --git a/tritonserver/samples/Simple.java b/tritonserver/samples/Simple.java
index f3e854228a7..1295c331a5b 100644
--- a/tritonserver/samples/Simple.java
+++ b/tritonserver/samples/Simple.java
@@ -1,4 +1,4 @@
-// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions

From 831176698e004bb8de3a3ea89ed8b06474afe675 Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Sat, 16 Oct 2021 16:27:20 +0800
Subject: [PATCH 09/21] fix typo

---
 tritonserver/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index 12a0bff5f4b..71a233639af 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -48,7 +48,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
 
 ### Steps to run this sample
 
-**1, Get the source code of Tritonserver to set the model repository up:
+**1, Get the source code of Tritonserver to set the model repository up:**
 ```bash
  $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.12.0.tar.gz
  $ tar czvf v2.12.0.tar.gz
@@ -58,7 +58,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
 ```
 Now, this models directory will be our model repository.
 
-**2, Start the Docker container to run the sample (Note: 1, now we are under the "model_repository" dir as above):
+**2, Start the Docker container to run the sample (Note: 1, now we are under the "model_repository" dir as above):**
 ```bash
  $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.07-py3 bash
  $ git clone https://github.com/bytedeco/javacpp-presets.git

From 0a60f186494191a74456bc1078cd2af50b5e1dd5 Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Sat, 16 Oct 2021 21:05:17 +0800
Subject: [PATCH 10/21] some modifications for overall platform

---
 README.md                                    | 2 ++
 platform/pom.xml                             | 6 ++++++
 tritonserver/src/main/java9/module-info.java | 6 +++---
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index b169f79263f..52cceea8f55 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ JavaCPP Presets
 [![tensorflow](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow)
 [![tensorflow-lite](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow-lite/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow-lite)
 [![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)
+[![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)
 [![ale](https://github.com/bytedeco/javacpp-presets/workflows/ale/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aale)
 [![depthai](https://github.com/bytedeco/javacpp-presets/workflows/depthai/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adepthai)
 [![onnx](https://github.com/bytedeco/javacpp-presets/workflows/onnx/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnx)
@@ -214,6 +215,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
  * TensorRT 8.x  https://developer.nvidia.com/tensorrt
+ * tritonserver 2.12 https://github.com/triton-inference-server/server
  * The Arcade Learning Environment 0.6.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * DepthAI 2.10.x  https://github.com/luxonis/depthai-core
  * ONNX 1.10.x  https://github.com/onnx/onnx
diff --git a/platform/pom.xml b/platform/pom.xml
index 9c5584480ff..6dd704bf033 100644
--- a/platform/pom.xml
+++ b/platform/pom.xml
@@ -58,6 +58,7 @@
     <module>../tensorflow/platform</module>
     <module>../tensorflow-lite/platform</module>
     <module>../tensorrt/platform</module>
+    <module>../tritonserver/platform</module>
     <module>../ale/platform</module>
     <module>../depthai/platform</module>
     <module>../onnx/platform</module>
@@ -294,6 +295,11 @@
       <artifactId>tensorrt-platform</artifactId>
       <version>8.0-${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tritonserver-platform</artifactId>
+      <version>2.12-${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>ale-platform</artifactId>
diff --git a/tritonserver/src/main/java9/module-info.java b/tritonserver/src/main/java9/module-info.java
index cd29e14ccd8..3fea113a11a 100644
--- a/tritonserver/src/main/java9/module-info.java
+++ b/tritonserver/src/main/java9/module-info.java
@@ -2,7 +2,7 @@
   requires transitive org.bytedeco.javacpp;
   requires transitive org.bytedeco.cuda;
   requires transitive org.bytedeco.tensorrt;
-  exports org.bytedeco.tensorrt.global;
-  exports org.bytedeco.tensorrt.presets;
-  exports org.bytedeco.tensorrt.tritonserver;
+  exports org.bytedeco.tritonserver.global;
+  exports org.bytedeco.tritonserver.presets;
+  exports org.bytedeco.tritonserver;
 }

From 51a191023aeebba3a570bbca5c0c22646eed0500 Mon Sep 17 00:00:00 2001
From: jackhe <jackhe@nvidia.com>
Date: Sat, 16 Oct 2021 21:08:33 +0800
Subject: [PATCH 11/21] update version

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 52cceea8f55..e713055337a 100644
--- a/README.md
+++ b/README.md
@@ -216,8 +216,8 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
  * TensorRT 8.x  https://developer.nvidia.com/tensorrt
  * tritonserver 2.12 https://github.com/triton-inference-server/server
- * The Arcade Learning Environment 0.6.x  https://github.com/mgbellemare/Arcade-Learning-Environment
- * DepthAI 2.10.x  https://github.com/luxonis/depthai-core
+ * The Arcade Learning Environment 0.7.x  https://github.com/mgbellemare/Arcade-Learning-Environment
+ * DepthAI 2.11.x  https://github.com/luxonis/depthai-core
  * ONNX 1.10.x  https://github.com/onnx/onnx
  * nGraph 0.26.0  https://github.com/NervanaSystems/ngraph
  * ONNX Runtime 1.8.x  https://github.com/microsoft/onnxruntime

From 9e39b6b2deb1510becee46e22b874e2680176481 Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Sun, 17 Oct 2021 01:27:45 +0000
Subject: [PATCH 12/21] Add workflow for GitHub Actions

---
 .github/workflows/tritonserver.yml | 22 ++++++++++++++++++++++
 cppbuild.sh                        |  2 +-
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/tritonserver.yml

diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml
new file mode 100644
index 00000000000..987b1a62471
--- /dev/null
+++ b/.github/workflows/tritonserver.yml
@@ -0,0 +1,22 @@
+name: tritonserver
+on:
+  push:
+    paths:
+      - tritonserver/**
+  pull_request:
+    paths:
+      - tritonserver/**
+  workflow_dispatch:
+env:
+  CI_DEPLOY_MODULE: ${{ github.workflow }}
+  CI_DEPLOY_PLATFORM: ${{ github.job }}
+  CI_DEPLOY_SETTINGS: ${{ secrets.CI_DEPLOY_SETTINGS }}
+  CI_DEPLOY_USERNAME: ${{ secrets.CI_DEPLOY_USERNAME }}
+  CI_DEPLOY_PASSWORD: ${{ secrets.CI_DEPLOY_PASSWORD }}
+  STAGING_REPOSITORY: ${{ secrets.STAGING_REPOSITORY }}
+jobs:
+  linux-x86_64:
+    runs-on: ubuntu-18.04
+    container: nvcr.io/nvidia/tritonserver:21.09-py3
+    steps:
+      - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions
diff --git a/cppbuild.sh b/cppbuild.sh
index 895065044d8..ec7a0fb6c66 100755
--- a/cppbuild.sh
+++ b/cppbuild.sh
@@ -164,7 +164,7 @@ function sedinplace {
 }
 
 if [[ -z ${PROJECTS:-} ]]; then
-    PROJECTS=(opencv ffmpeg flycapture spinnaker libdc1394 libfreenect libfreenect2 librealsense librealsense2 videoinput artoolkitplus chilitags flandmark arrow hdf5 hyperscan mkl mkl-dnn dnnl openblas arpack-ng cminpack fftw gsl cpython numpy scipy gym llvm libpostal leptonica tesseract caffe openpose cuda nvcodec opencl mxnet pytorch tensorflow tensorflow-lite tensorrt depthai ale onnx ngraph onnxruntime tvm liquidfun qt skia cpu_features modsecurity systems)
+    PROJECTS=(opencv ffmpeg flycapture spinnaker libdc1394 libfreenect libfreenect2 librealsense librealsense2 videoinput artoolkitplus chilitags flandmark arrow hdf5 hyperscan mkl mkl-dnn dnnl openblas arpack-ng cminpack fftw gsl cpython numpy scipy gym llvm libpostal leptonica tesseract caffe openpose cuda nvcodec opencl mxnet pytorch tensorflow tensorflow-lite tensorrt tritonserver depthai ale onnx ngraph onnxruntime tvm liquidfun qt skia cpu_features modsecurity systems)
 fi
 
 for PROJECT in ${PROJECTS[@]}; do

From c13d3961ee4dafb8a48c90213c6163ad41c59db6 Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Sun, 17 Oct 2021 01:29:34 +0000
Subject: [PATCH 13/21] Update version to 1.5.7-SNAPSHOT

---
 tritonserver/platform/pom.xml                                 | 2 +-
 tritonserver/platform/redist/pom.xml                          | 2 +-
 tritonserver/pom.xml                                          | 2 +-
 tritonserver/samples/pom.xml                                  | 4 ++--
 .../java/org/bytedeco/tritonserver/global/tritonserver.java   | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Backend.java      | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Input.java        | 2 +-
 .../tritonserver/TRITONBACKEND_MemoryManager.java             | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Model.java        | 2 +-
 .../tritonserver/TRITONBACKEND_ModelInstance.java             | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Output.java       | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Request.java      | 2 +-
 .../tritonserver/tritonserver/TRITONBACKEND_Response.java     | 2 +-
 .../tritonserver/TRITONBACKEND_ResponseFactory.java           | 2 +-
 .../tritonserver/tritonserver/TRITONREPOAGENT_Agent.java      | 2 +-
 .../tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java | 2 +-
 .../tritonserver/tritonserver/TRITONSERVER_Error.java         | 2 +-
 .../tritonserver/TRITONSERVER_InferenceRequest.java           | 2 +-
 .../TRITONSERVER_InferenceRequestReleaseFn_t.java             | 2 +-
 .../tritonserver/TRITONSERVER_InferenceResponse.java          | 2 +-
 .../TRITONSERVER_InferenceResponseCompleteFn_t.java           | 2 +-
 .../tritonserver/TRITONSERVER_InferenceTrace.java             | 2 +-
 .../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java | 2 +-
 .../tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java  | 2 +-
 .../tritonserver/tritonserver/TRITONSERVER_Message.java       | 2 +-
 .../tritonserver/tritonserver/TRITONSERVER_Metrics.java       | 2 +-
 .../tritonserver/TRITONSERVER_ResponseAllocator.java          | 2 +-
 .../tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java | 2 +-
 .../TRITONSERVER_ResponseAllocatorReleaseFn_t.java            | 2 +-
 .../tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java | 2 +-
 .../tritonserver/tritonserver/TRITONSERVER_Server.java        | 2 +-
 .../tritonserver/tritonserver/TRITONSERVER_ServerOptions.java | 2 +-
 32 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
index 329ed1da3b5..26fe33aca54 100644
--- a/tritonserver/platform/pom.xml
+++ b/tritonserver/platform/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.bytedeco</groupId>
     <artifactId>javacpp-presets</artifactId>
-    <version>1.5.6</version>
+    <version>1.5.7-SNAPSHOT</version>
     <relativePath>../../</relativePath>
   </parent>
 
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
index 81064b97268..b63b0b15a9b 100644
--- a/tritonserver/platform/redist/pom.xml
+++ b/tritonserver/platform/redist/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.bytedeco</groupId>
     <artifactId>javacpp-presets</artifactId>
-    <version>1.5.6</version>
+    <version>1.5.7-SNAPSHOT</version>
     <relativePath>../../../</relativePath>
   </parent>
 
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index 92fa782a462..9560d94e426 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -6,7 +6,7 @@
   <parent>
     <groupId>org.bytedeco</groupId>
     <artifactId>javacpp-presets</artifactId>
-    <version>1.5.6</version>
+    <version>1.5.7-SNAPSHOT</version>
   </parent>
 
   <groupId>org.bytedeco</groupId>
diff --git a/tritonserver/samples/pom.xml b/tritonserver/samples/pom.xml
index 73741806ea4..ea1d08df38f 100644
--- a/tritonserver/samples/pom.xml
+++ b/tritonserver/samples/pom.xml
@@ -2,7 +2,7 @@
     <modelVersion>4.0.0</modelVersion>
     <groupId>org.bytedeco.tritonserver</groupId>
     <artifactId>simple</artifactId>
-    <version>1.5.6</version>
+    <version>1.5.7-SNAPSHOT</version>
     <properties>
         <exec.mainClass>Simple</exec.mainClass>
         <maven.compiler.source>1.8</maven.compiler.source>
@@ -12,7 +12,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tritonserver-platform</artifactId>
-            <version>2.12-1.5.6</version>
+            <version>2.12-1.5.7-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>com.google.code.gson</groupId>
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
index c700d34048d..af6a220e876 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.global;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
index fd26f3af2e6..f67238a4484 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
index 38af26dcaf8..d5733453fef 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
index 30889ef0c39..365b5f33ab6 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
index d8ce8278d6c..900f251464f 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
index aba2d9db0dc..66fbfbf7901 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
index 0aa168c357e..83fa1b7d53a 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
index 7977233caec..d8a5e96510c 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
index 04a8e715fa3..9acbf56eb49 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
index a6985c55627..bdecf0e62bb 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
index b644bb6fcf4..dec4c76ade6 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
index 07bf751648f..e1a494c1036 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
index 07037256c2b..f8774fc5b6a 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
index afc9981227a..30c463566b9 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
index c45b14de79d..a35db367e31 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
index 9fdf58ec38f..6f28a7329b3 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
index 76d544843c2..0e964d3b075 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
index ba190840c1d..9cd501b42ba 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
index 7003a3d63c1..38cae917a28 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
index c035b0838d1..ef311def896 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
index 54cc3e19e4c..fd1a25c8413 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
index 7eb8af9bbe6..e1a050cf843 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
index a4d00fdc4d4..01449657612 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
index 18ce20fc215..ede55743410 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
index f9d2214dbb6..eaa984d8f70 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
index fc0b4fb948c..62253b4bed1 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
index df7d02cc3d2..ab656568bd4 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
index a40c7cb81f6..4bb5d2c8892 100644
--- a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
@@ -1,4 +1,4 @@
-// Targeted by JavaCPP version 1.5.6: DO NOT EDIT THIS FILE
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
 
 package org.bytedeco.tritonserver.tritonserver;
 

From bd0c77f6b7315c33221dfaf6c6d7247ec92ffb96 Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Sun, 17 Oct 2021 01:39:34 +0000
Subject: [PATCH 14/21] Upgrade for Triton Inference Server 2.14

---
 README.md                            |  2 +-
 platform/pom.xml                     |  2 +-
 tritonserver/README.md               | 14 +++++++-------
 tritonserver/platform/pom.xml        |  2 +-
 tritonserver/platform/redist/pom.xml |  2 +-
 tritonserver/pom.xml                 |  2 +-
 tritonserver/samples/pom.xml         |  2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index a7d17025968..4a6e0b1d72f 100644
--- a/README.md
+++ b/README.md
@@ -215,7 +215,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
  * TensorRT 8.x  https://developer.nvidia.com/tensorrt
- * tritonserver 2.12 https://github.com/triton-inference-server/server
+ * Triton Inference Server 2.14  https://developer.nvidia.com/nvidia-triton-inference-server
  * The Arcade Learning Environment 0.7.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * DepthAI 2.11.x  https://github.com/luxonis/depthai-core
  * ONNX 1.10.x  https://github.com/onnx/onnx
diff --git a/platform/pom.xml b/platform/pom.xml
index 5a41af5ba26..21ce92b8e80 100644
--- a/platform/pom.xml
+++ b/platform/pom.xml
@@ -298,7 +298,7 @@
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>tritonserver-platform</artifactId>
-      <version>2.12-${project.version}</version>
+      <version>2.14-${project.version}</version>
     </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
diff --git a/tritonserver/README.md b/tritonserver/README.md
index 71a233639af..cb4f4628729 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -22,7 +22,7 @@ Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * Tritonserver 2.12  https://github.com/triton-inference-server/server
+ * Tritonserver 2.14  https://github.com/triton-inference-server/server
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -43,16 +43,16 @@ Here is a example of Tritonserver ported to Java from the `simple.cc` sample fil
 This sample intends to show how to call JAVA based Triton API to execute inference requests.
 We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, simply execute on the command line:
 ```bash
- $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.07_source/server-2.12.0/docs/examples/model_repository/models"
+ $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.09_source/server-2.14.0/docs/examples/model_repository/models"
 ```
 
 ### Steps to run this sample
 
 **1, Get the source code of Tritonserver to set the model repository up:**
 ```bash
- $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.12.0.tar.gz
- $ tar czvf v2.12.0.tar.gz
- $ cd server-2.12.0/docs/examples/model_repository
+ $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.14.0.tar.gz
+ $ tar czvf v2.14.0.tar.gz
+ $ cd server-2.14.0/docs/examples/model_repository
  $ mkdir models
  $ cd models; cp -a ../simple .
 ```
@@ -60,10 +60,10 @@ Now, this models directory will be our model repository.
 
 **2, Start the Docker container to run the sample (Note: 1, now we are under the "model_repository" dir as above):**
 ```bash
- $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.07-py3 bash
+ $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.09-py3 bash
  $ git clone https://github.com/bytedeco/javacpp-presets.git
  $ cd javacpp-presets/tritonserver/samples
- $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.07_source/server-2.12.0/docs/examples/model_repository/models"
+ $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.09_source/server-2.14.0/docs/examples/model_repository/models"
 ```
 
 This sample is the JAVA implementation of [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api) 
diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
index 26fe33aca54..6ae11f29b5a 100644
--- a/tritonserver/platform/pom.xml
+++ b/tritonserver/platform/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform</artifactId>
-  <version>2.12-${project.parent.version}</version>
+  <version>2.14-${project.parent.version}</version>
   <name>JavaCPP Presets Platform for Tritonserver</name>
 
   <properties>
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
index b63b0b15a9b..097c0132c3d 100644
--- a/tritonserver/platform/redist/pom.xml
+++ b/tritonserver/platform/redist/pom.xml
@@ -12,7 +12,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver-platform-redist</artifactId>
-  <version>2.12-${project.parent.version}</version>
+  <version>2.14-${project.parent.version}</version>
   <name>JavaCPP Presets Platform Redist for Tritonserver</name>
 
   <properties>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index 9560d94e426..7134aad61e2 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -11,7 +11,7 @@
 
   <groupId>org.bytedeco</groupId>
   <artifactId>tritonserver</artifactId>
-  <version>2.12-${project.parent.version}</version>
+  <version>2.14-${project.parent.version}</version>
   <name>JavaCPP Presets for Tritonserver</name>
 
   <dependencies>
diff --git a/tritonserver/samples/pom.xml b/tritonserver/samples/pom.xml
index ea1d08df38f..42d28fe6658 100644
--- a/tritonserver/samples/pom.xml
+++ b/tritonserver/samples/pom.xml
@@ -12,7 +12,7 @@
         <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tritonserver-platform</artifactId>
-            <version>2.12-1.5.7-SNAPSHOT</version>
+            <version>2.14-1.5.7-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>com.google.code.gson</groupId>

From 228989e62c77672aa3e92326abcc25bda5d32dd3 Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Sun, 17 Oct 2021 18:13:38 +0800
Subject: [PATCH 15/21] Update README.md

fix typo
---
 tritonserver/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index cb4f4628729..0cf4360e393 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -51,7 +51,7 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
 **1, Get the source code of Tritonserver to set the model repository up:**
 ```bash
  $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.14.0.tar.gz
- $ tar czvf v2.14.0.tar.gz
+ $ tar zxvf v2.14.0.tar.gz
  $ cd server-2.14.0/docs/examples/model_repository
  $ mkdir models
  $ cd models; cp -a ../simple .

From 1314e5108af3d0cc6188bedd5c9e64624ca9b15a Mon Sep 17 00:00:00 2001
From: jackyh <franc78@126.com>
Date: Sun, 17 Oct 2021 18:50:47 +0800
Subject: [PATCH 16/21] Update README.md

some cmd line update
---
 tritonserver/README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index 0cf4360e393..b22b04f2849 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -63,7 +63,12 @@ Now, this models directory will be our model repository.
  $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.09-py3 bash
  $ git clone https://github.com/bytedeco/javacpp-presets.git
  $ cd javacpp-presets/tritonserver/samples
- $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.09_source/server-2.14.0/docs/examples/model_repository/models"
+ $ wget https://dlcdn.apache.org/maven/maven-3/3.8.3/binaries/apache-maven-3.8.3-bin.tar.gz
+ $ tar zxvf apache-maven-3.8.3-bin.tar.gz
+ $ export PATH=/opt/tritonserver/apache-maven-3.8.2/bin:$PATH
+ $ apt update
+ $ apt install -y openjdk-11-jdk
+ $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/models"
 ```
 
 This sample is the JAVA implementation of [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api) 

From a2ba3f95b97324cccc609ba0e7e69274dace4273 Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Sun, 17 Oct 2021 13:57:48 +0000
Subject: [PATCH 17/21] Refine README.md file

---
 tritonserver/README.md | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index b22b04f2849..52151fcd8e4 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -1,5 +1,5 @@
-JavaCPP Presets for Tritonserver
-============================
+JavaCPP Presets for Triton Inference Server
+===========================================
 
 [![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/tritonserver.svg)](http://bytedeco.org/builds/)  
 <sup>Build status for all platforms:</sup> [![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)  <sup>Commercial support:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
@@ -9,20 +9,21 @@ License Agreements
 ------------------
 By downloading these archives, you agree to the terms of the license agreements for NVIDIA software included in the archives.
 
-### Tritonserver
-To view the license for Tritonserver included in these archives, click [here](https://github.com/triton-inference-server/server)
+### Triton Inference Server
+To view the license for Triton Inference Server included in these archives, click [here](https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license)
 
- * Tritonserver is widely used software package for inference service
- * Triton supports almost all kinds of model generated by different DL frameworks or tools, such as TensorFLow, PyTorch, ONNX, TensorRT, OpenVINO...
+ * Triton Inference Server is widely used software package for inference service
+ * Triton supports almost all kinds of model generated by different DL frameworks or tools, such as TensorFlow, PyTorch, ONNX Runtime, TensorRT, OpenVINO...
  * Triton supports both CPU and GPU
  * Triton can be used both as an application and as a shared library. In case you already have your own inference service framework but want to add more features, just try Triton as a shared library.
- * Triton supports Java as a shared library through JavaAPP Presets
+ * Triton supports Java as a shared library through JavaCPP Presets
+
 
 Introduction
 ------------
 This directory contains the JavaCPP Presets module for:
 
- * Tritonserver 2.14  https://github.com/triton-inference-server/server
+ * Triton Inference Server 2.14  https://github.com/triton-inference-server/server
 
 Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
 
@@ -36,19 +37,19 @@ Java API documentation is available here:
 
 Sample Usage
 ------------
-Here is a example of Tritonserver ported to Java from the `simple.cc` sample file available at:
+Here is a simple example of Triton Inference Server ported to Java from the `simple.cc` sample file available at:
 
  * https://github.com/triton-inference-server/server/tree/main/src/servers
 
-This sample intends to show how to call JAVA based Triton API to execute inference requests.
-We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, simply execute on the command line:
+We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the `pom.xml` and `Simple.java` source files from the [`samples/`](samples/) subdirectory, simply execute on the command line:
 ```bash
- $ mvn exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/tritonserver_21.09_source/server-2.14.0/docs/examples/model_repository/models"
+ $ mvn compile exec:java -Dexec.args="-r /path/to/models"
 ```
+This sample intends to show how to call the Java-mapped C API of Triton to execute inference requests.
 
-### Steps to run this sample
+### Steps to run this sample inside an NGC container
 
-**1, Get the source code of Tritonserver to set the model repository up:**
+ 1. Get the source code of Triton Inference Server to prepare the model repository:
 ```bash
  $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.14.0.tar.gz
  $ tar zxvf v2.14.0.tar.gz
@@ -56,9 +57,9 @@ We can use [Maven 3](http://maven.apache.org/) to download and install automatic
  $ mkdir models
  $ cd models; cp -a ../simple .
 ```
-Now, this models directory will be our model repository.
+Now, this `models` directory will be our model repository.
 
-**2, Start the Docker container to run the sample (Note: 1, now we are under the "model_repository" dir as above):**
+ 2. Start the Docker container to run the sample (assuming we are under the `models` directory created above):
 ```bash
  $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.09-py3 bash
  $ git clone https://github.com/bytedeco/javacpp-presets.git
@@ -71,6 +72,6 @@ Now, this models directory will be our model repository.
  $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/models"
 ```
 
-This sample is the JAVA implementation of [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api) 
+This sample is the Java implementation of the simple example written for the [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api).
 
 

From 76b1eb4a3f58cb04b0705b55fa34fac6403229ac Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Mon, 18 Oct 2021 00:56:15 +0000
Subject: [PATCH 18/21] Refine the presets some more

---
 tritonserver/cppbuild.sh                      |  2 +-
 .../tritonserver/presets/tritonserver.java    | 30 +++++--------------
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
index e09c39c4094..64f033033ff 100644
--- a/tritonserver/cppbuild.sh
+++ b/tritonserver/cppbuild.sh
@@ -21,7 +21,7 @@ case $PLATFORM in
         fi
         ;;
     windows-x86_64)
-        echo "Windows are not supported yet"
+        echo "Windows is not supported yet"
         exit 1
         ;;
     *)
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
index f2f8929ac47..d8ab9a806b1 100644
--- a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018-2021 Samuel Audet
+ * Copyright (C) 2021 Jack He, Samuel Audet
  *
  * Licensed either under the Apache License, Version 2.0, or (at your option)
  * under the terms of the GNU General Public License as published by
@@ -50,30 +50,17 @@
     value = {
         @Platform(
             value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"},
-            compiler = "cpp11",
             include = {"tritonserver.h", "tritonbackend.h", "tritonrepoagent.h"},
             exclude = {"<cudaGL.h>", "<cuda_gl_interop.h>"},
-            link = "tritonserver"
-        ),
-        @Platform(
-            value = "linux-arm64",
-            includepath = {"/opt/tritonserver/include/triton/core/"},
-            linkpath = {"/opt/tritonserver/lib/"}
-        ),
-        @Platform(
-            value = "linux-ppc64le",
-            includepath = {"/opt/tritonserver/include/triton/core/"},
-            linkpath = {"/opt/tritonserver/lib/"}
-        ),
-        @Platform(
-            value = "linux-x86_64",
+            link = "tritonserver",
             includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/", "/usr/include"},
             linkpath = {"/opt/tritonserver/lib/"}
         ),
         @Platform(
             value = "windows-x86_64",
-            includepath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/include",
-            linkpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TensorRT/lib/"
+            includepath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/include/triton/core/",
+            linkpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/lib/",
+            preloadpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/bin/"
         )
     },
     target = "org.bytedeco.tritonserver.tritonserver",
@@ -119,10 +106,9 @@ public class tritonserver implements LoadEnabled, InfoMapper {
     public void map(InfoMap infoMap) {
         infoMap.putFirst(new Info().enumerate(false))
                .put(new Info("bool").cast().valueTypes("boolean").pointerTypes("boolean[]", "BoolPointer"))
-               .put(new Info("TRITONSERVER_EXPORT").cppTypes().annotations())
-               .put(new Info("TRITONSERVER_DECLSPEC").cppTypes().annotations())
-               .put(new Info("TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC").cppTypes().annotations())
-               .put(new Info("TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
+               .put(new Info("TRITONSERVER_EXPORT", "TRITONSERVER_DECLSPEC",
+                             "TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC",
+                             "TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
         ;
     }
 }

From 7d52a8ff720e7c965fece3fe6c7ad8cf0c74122c Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Mon, 18 Oct 2021 01:07:20 +0000
Subject: [PATCH 19/21] Update CHANGELOG.md

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9aac0cbb0f..7ad15e16e28 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 
+ * Add presets for Triton Inference Server 2.14 ([pull #1085](https://github.com/bytedeco/javacpp-presets/pull/1085))
  * Add presets for the NvToolsExt (NVTX) module of CUDA ([issue #1068](https://github.com/bytedeco/javacpp-presets/issues/1068))
  * Increase the amount of function pointers available for callbacks in presets for Qt ([pull #1080](https://github.com/bytedeco/javacpp-presets/pull/1080))
  * Map C++ JIT classes and functions of TorchScript in presets for PyTorch ([issue #1068](https://github.com/bytedeco/javacpp-presets/issues/1068))

From 21d8f878c727fbfc04b76c23eadf54922bf2f5f2 Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Mon, 18 Oct 2021 12:32:51 +0000
Subject: [PATCH 20/21] Revert README.md and pom.xml formatting changes

---
 README.md            | 584 +++++++++++++++++++++----------------------
 pom.xml              |  10 +-
 tritonserver/pom.xml |   2 +-
 3 files changed, 298 insertions(+), 298 deletions(-)

diff --git a/README.md b/README.md
index 4a6e0b1d72f..964edb3b630 100644
--- a/README.md
+++ b/README.md
@@ -1,292 +1,292 @@
-JavaCPP Presets
-===============
-
-[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/javacpp-presets/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/javacpp-presets) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/javacpp-presets.svg)](http://bytedeco.org/builds/)  
-<sup>Build status for all platforms (Android, iOS, Linux, Mac OS X, Windows):</sup>
-[![opencv](https://github.com/bytedeco/javacpp-presets/workflows/opencv/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopencv)
-[![ffmpeg](https://github.com/bytedeco/javacpp-presets/workflows/ffmpeg/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Affmpeg)
-[![flycapture](https://github.com/bytedeco/javacpp-presets/workflows/flycapture/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aflycapture)
-[![spinnaker](https://github.com/bytedeco/javacpp-presets/workflows/spinnaker/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aspinnaker)
-[![libdc1394](https://github.com/bytedeco/javacpp-presets/workflows/libdc1394/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibdc1394)
-[![libfreenect](https://github.com/bytedeco/javacpp-presets/workflows/libfreenect/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibfreenect)
-[![libfreenect2](https://github.com/bytedeco/javacpp-presets/workflows/libfreenect2/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibfreenect2)
-[![librealsense](https://github.com/bytedeco/javacpp-presets/workflows/librealsense/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibrealsense)
-[![librealsense2](https://github.com/bytedeco/javacpp-presets/workflows/librealsense2/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibrealsense2)
-[![videoinput](https://github.com/bytedeco/javacpp-presets/workflows/videoinput/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Avideoinput)
-[![artoolkitplus](https://github.com/bytedeco/javacpp-presets/workflows/artoolkitplus/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aartoolkitplus)
-[![chilitags](https://github.com/bytedeco/javacpp-presets/workflows/chilitags/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Achilitags)
-[![flandmark](https://github.com/bytedeco/javacpp-presets/workflows/flandmark/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aflandmark)
-[![arrow](https://github.com/bytedeco/javacpp-presets/workflows/arrow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aarrow)
-[![hdf5](https://github.com/bytedeco/javacpp-presets/workflows/hdf5/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ahdf5)
-[![hyperscan](https://github.com/bytedeco/javacpp-presets/workflows/hyperscan/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ahyperscan)
-[![mkl](https://github.com/bytedeco/javacpp-presets/workflows/mkl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amkl)
-[![mkl-dnn](https://github.com/bytedeco/javacpp-presets/workflows/mkl-dnn/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amkl-dnn)
-[![dnnl](https://github.com/bytedeco/javacpp-presets/workflows/dnnl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adnnl)
-[![openblas](https://github.com/bytedeco/javacpp-presets/workflows/openblas/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopenblas)
-[![arpack-ng](https://github.com/bytedeco/javacpp-presets/workflows/arpack-ng/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aarpack-ng)
-[![cminpack](https://github.com/bytedeco/javacpp-presets/workflows/cminpack/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acminpack)
-[![fftw](https://github.com/bytedeco/javacpp-presets/workflows/fftw/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Afftw)
-[![gsl](https://github.com/bytedeco/javacpp-presets/workflows/gsl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Agsl)
-[![cpython](https://github.com/bytedeco/javacpp-presets/workflows/cpython/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acpython)
-[![numpy](https://github.com/bytedeco/javacpp-presets/workflows/numpy/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Anumpy)
-[![scipy](https://github.com/bytedeco/javacpp-presets/workflows/scipy/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ascipy)
-[![gym](https://github.com/bytedeco/javacpp-presets/workflows/gym/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Agym)
-[![llvm](https://github.com/bytedeco/javacpp-presets/workflows/llvm/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Allvm)
-[![libffi](https://github.com/bytedeco/javacpp-presets/workflows/libffi/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibffi)
-[![libpostal](https://github.com/bytedeco/javacpp-presets/workflows/libpostal/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibpostal)
-[![leptonica](https://github.com/bytedeco/javacpp-presets/workflows/leptonica/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aleptonica)
-[![tesseract](https://github.com/bytedeco/javacpp-presets/workflows/tesseract/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atesseract)
-[![caffe](https://github.com/bytedeco/javacpp-presets/workflows/caffe/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acaffe)
-[![openpose](https://github.com/bytedeco/javacpp-presets/workflows/openpose/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopenpose)
-[![cuda](https://github.com/bytedeco/javacpp-presets/workflows/cuda/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acuda)
-[![nvcodec](https://github.com/bytedeco/javacpp-presets/workflows/nvcodec/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Anvcodec)
-[![opencl](https://github.com/bytedeco/javacpp-presets/workflows/opencl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopencl)
-[![mxnet](https://github.com/bytedeco/javacpp-presets/workflows/mxnet/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amxnet)
-[![pytorch](https://github.com/bytedeco/javacpp-presets/workflows/pytorch/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Apytorch)
-[![tensorflow](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow)
-[![tensorflow-lite](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow-lite/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow-lite)
-[![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)
-[![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)
-[![ale](https://github.com/bytedeco/javacpp-presets/workflows/ale/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aale)
-[![depthai](https://github.com/bytedeco/javacpp-presets/workflows/depthai/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adepthai)
-[![onnx](https://github.com/bytedeco/javacpp-presets/workflows/onnx/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnx)
-[![ngraph](https://github.com/bytedeco/javacpp-presets/workflows/ngraph/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Angraph)
-[![onnxruntime](https://github.com/bytedeco/javacpp-presets/workflows/onnxruntime/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnxruntime)
-[![tvm](https://github.com/bytedeco/javacpp-presets/workflows/tvm/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atvm)
-[![liquidfun](https://github.com/bytedeco/javacpp-presets/workflows/liquidfun/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aliquidfun)
-[![qt](https://github.com/bytedeco/javacpp-presets/workflows/qt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aqt)
-[![skia](https://github.com/bytedeco/javacpp-presets/workflows/skia/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Askia)
-[![cpu_features](https://github.com/bytedeco/javacpp-presets/workflows/cpu_features/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acpu_features)
-[![modsecurity](https://github.com/bytedeco/javacpp-presets/workflows/modsecurity/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amodsecurity)
-[![systems](https://github.com/bytedeco/javacpp-presets/workflows/systems/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Asystems)  
-<sup>Commercial support and paid services for custom presets:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
-
-Introduction
-------------
-The JavaCPP Presets modules contain Java configuration and interface classes for widely used C/C++ libraries. The configuration files in the `org.bytedeco.<moduleName>.presets` packages are used by the `Parser` to create from C/C++ header files the Java interface files targeting the `org.bytedeco.<moduleName>` packages, which is turn are used by the `Generator` and the native C++ compiler to produce the required JNI libraries. Moreover, helper classes make their functionality easier to use on the Java platform, including Android.
-
-For orientation purposes, the documentation of this repository can be thought of as being split into 2 levels:
-
- 1. The directory where this parent README.md document is located belongs to the upper level, the `javacpp-presets` module itself, and
- 2. The subdirectories in it are the actual child modules, with their README.md files, presets, and packages, as the lower level.
-
-This parent README.md file contains general information applicable to all modules to solve issues concerning downloads, common requirements, installation procedures, and build instructions. The respective README.md files in each subdirectory contain extra information, links, and notes concerning API documentation, specific requirements, dependencies for `pom.xml` files, and sample usage code, for the presets and packages of each module. Please be aware that not all of the presets have the same level of maturity, and the style and layout may also differ from one to another since they are based on the APIs of third-party libraries.
-
-Please refer to the wiki page for more information about how to [create new presets](https://github.com/bytedeco/javacpp-presets/wiki/Create-New-Presets). Since additional documentation is currently lacking, please also feel free to ask questions on [the mailing list](http://groups.google.com/group/javacpp-project) or [the discussion forum](https://github.com/bytedeco/javacpp-presets/discussions).
-
-
-Downloads
----------
-JAR files containing binaries for all child modules and builds for all supported platforms (Android, iOS, Linux, Mac OS X, and Windows) can be obtained from the [Maven Central Repository](http://search.maven.org/#search|ga|1|bytedeco). Archives containing these JAR files are also available as [releases](https://github.com/bytedeco/javacpp-presets/releases).
-
-To install manually the JAR files, follow the instructions in the [Manual Installation](#manual-installation) section below.
-
-We can also have everything downloaded and installed automatically with:
-
- * Maven (inside the `pom.xml` file)
-```xml
-  <dependency>
-    <groupId>org.bytedeco</groupId>
-    <artifactId>${moduleName}-platform</artifactId>
-    <version>${moduleVersion}-1.5.6</version>
-  </dependency>
-```
-
- * Gradle (inside the `build.gradle` file)
-```groovy
-  dependencies {
-    implementation group: 'org.bytedeco', name: moduleName + '-platform', version: moduleVersion + '-1.5.6'
-  }
-```
-
- * Leiningen (inside the `project.clj` file)
-```clojure
-  :dependencies [
-    [~(symbol (str "org.bytedeco/" moduleName "-platform")) ~(str moduleVersion "-1.5.6")]
-  ]
-```
-
- * sbt (inside the `build.sbt` file)
-```scala
-  libraryDependencies += "org.bytedeco" % moduleName + "-platform" % moduleVersion + "-1.5.6"
-```
-
-where the `moduleName` and `moduleVersion` variables correspond to the desired module. This downloads binaries for all platforms, but to get binaries for only one platform we can set the `javacpp.platform` system property (via the `-D` command line option) to something like `android-arm`, `linux-x86_64`, `macosx-x86_64`, `windows-x86_64`, etc. We can also specify more than one platform, see the examples at [Reducing the Number of Dependencies](https://github.com/bytedeco/javacpp-presets/wiki/Reducing-the-Number-of-Dependencies). Another option available to Gradle users is [Gradle JavaCPP](https://github.com/bytedeco/gradle-javacpp), and similarly for Scala users there is [SBT-JavaCPP](https://github.com/bytedeco/sbt-javacpp).
-
-
-Required Software
------------------
-To use the JavaCPP Presets, you will need to download and install the following software:
-
- * An implementation of Java SE 7 or newer:
-   * OpenJDK  http://openjdk.java.net/install/  or
-   * Oracle JDK  http://www.oracle.com/technetwork/java/javase/downloads/  or
-   * IBM JDK  http://www.ibm.com/developerworks/java/jdk/
-
-Further, in the case of Android, the JavaCPP Presets also rely on:
-
- * Android SDK API 21 or newer  http://developer.android.com/sdk/
-
-
-Manual Installation
--------------------
-Simply put all the desired JAR files (`opencv*.jar`, `ffmpeg*.jar`, etc.), in addition to `javacpp.jar`, somewhere in your class path. The JAR files available as pre-built artifacts are meant to be used with [JavaCPP](https://github.com/bytedeco/javacpp). The binaries for Linux were built for CentOS 6 and 7, so they should work on most distributions currently in use. The ones for Android were compiled for ARMv7 processors featuring an FPU, so they will not work on ancient devices such as the HTC Magic or some others with an ARMv6 CPU. Here are some more specific instructions for common cases:
-
-NetBeans (Java SE 7 or newer):
-
- 1. In the Projects window, right-click the Libraries node of your project, and select "Add JAR/Folder...".
- 2. Locate the JAR files, select them, and click OK.
-
-Eclipse (Java SE 7 or newer):
-
- 1. Navigate to Project > Properties > Java Build Path > Libraries and click "Add External JARs...".
- 2. Locate the JAR files, select them, and click OK.
-
-IntelliJ IDEA (Android 5.0 or newer):
-
- 1. Follow the instructions on this page: http://developer.android.com/training/basics/firstapp/
- 2. Copy all the JAR files into the `app/libs` subdirectory.
- 3. Navigate to File > Project Structure > app > Dependencies, click `+`, and select "2 File dependency".
- 4. Select all the JAR files from the `libs` subdirectory.
-
-After that, we can access almost transparently the corresponding C/C++ APIs through the interface classes found in the `org.bytedeco.<moduleName>` packages. Indeed, the `Parser` translates the code comments from the C/C++ header files into the Java interface files, (almost) ready to be consumed by Javadoc. However, since their translation still leaves to be desired, one may wish to refer to the original documentation pages. For instance, the ones for OpenCV and FFmpeg can be found online at:
-
- * [OpenCV documentation](http://docs.opencv.org/master/)
- * [FFmpeg documentation](http://ffmpeg.org/doxygen/trunk/)
-
-
-Build Instructions
-------------------
-If the binary files available above are not enough for your needs, you might need to rebuild them from the source code. To this end, project files on the Java side were created as [Maven modules](#the-maven-modules). By default, the Maven build also installs the native libraries on the native C/C++ side with the [`cppbuild.sh` scripts](#the-cppbuildsh-scripts), but they can also be installed by other means.
-
-Additionally, one can find on the wiki page additional information about the recommended [build environments](https://github.com/bytedeco/javacpp-presets/wiki/Build-Environments) for the major platforms.
-
-
-### The Maven modules
-The JavaCPP Presets depend on Maven, a powerful build system for Java, so before attempting a build, be sure to install and read up on:
-
- * Maven 3.x  http://maven.apache.org/download.html
- * JavaCPP 1.5.6  https://github.com/bytedeco/javacpp
-
-Each child module in turn relies by default on the included [`cppbuild.sh` scripts](#the-cppbuildsh-scripts), explained below, to install its corresponding native libraries in the `cppbuild` subdirectory. To use native libraries already installed somewhere else on the system, other installation directories than `cppbuild` can also be specified either in the `pom.xml` files or in the `.java` configuration files. The following versions are supported:
-
- * OpenCV 4.5.4  https://opencv.org/releases.html
- * FFmpeg 4.4.x  http://ffmpeg.org/download.html
- * FlyCapture 2.13.x  https://www.flir.com/products/flycapture-sdk
- * Spinnaker 2.4.x https://www.flir.com/products/spinnaker-sdk
- * libdc1394 2.2.6  http://sourceforge.net/projects/libdc1394/files/
- * libfreenect 0.5.7  https://github.com/OpenKinect/libfreenect
- * libfreenect2 0.2.0  https://github.com/OpenKinect/libfreenect2
- * librealsense 1.12.x  https://github.com/IntelRealSense/librealsense
- * librealsense2 2.44.x  https://github.com/IntelRealSense/librealsense
- * videoInput 0.200  https://github.com/ofTheo/videoInput/
- * ARToolKitPlus 2.3.1  https://launchpad.net/artoolkitplus
- * Chilitags  https://github.com/chili-epfl/chilitags
- * flandmark 1.07  https://github.com/uricamic/flandmark
- * Arrow 5.0.x  https://arrow.apache.org/install/
- * HDF5 1.12.x  https://www.hdfgroup.org/downloads/
- * Hyperscan 5.4.x  https://github.com/intel/hyperscan
- * MKL 2021.x  https://software.intel.com/mkl
- * MKL-DNN 0.21.x  https://github.com/oneapi-src/oneDNN
- * DNNL 2.4.x  https://github.com/oneapi-src/oneDNN
- * OpenBLAS 0.3.18  http://www.openblas.net/
- * ARPACK-NG 3.8.0  https://github.com/opencollab/arpack-ng
- * CMINPACK 1.3.8  https://github.com/devernay/cminpack
- * FFTW 3.3.10  http://www.fftw.org/download.html
- * GSL 2.7  http://www.gnu.org/software/gsl/#downloading
- * CPython 3.10.x  https://www.python.org/downloads/
- * NumPy 1.21.x  https://github.com/numpy/numpy
- * SciPy 1.7.x  https://github.com/scipy/scipy
- * Gym 0.21.x  https://github.com/openai/gym
- * LLVM 13.0.x  http://llvm.org/releases/download.html
- * libffi 3.4.x  https://github.com/libffi/libffi
- * libpostal 1.1-alpha  https://github.com/openvenues/libpostal
- * Leptonica 1.82.x  http://www.leptonica.org/download.html
- * Tesseract 4.1.1  https://github.com/tesseract-ocr/tesseract
- * Caffe 1.0  https://github.com/BVLC/caffe
- * OpenPose 1.7.0  https://github.com/CMU-Perceptual-Computing-Lab/openpose
- * CUDA 11.4.x  https://developer.nvidia.com/cuda-downloads
-   * cuDNN 8.2.x  https://developer.nvidia.com/cudnn
-   * NCCL 2.11.x  https://developer.nvidia.com/nccl
- * NVIDIA Video Codec SDK 11.1.x  https://developer.nvidia.com/nvidia-video-codec-sdk
- * OpenCL 3.0  https://github.com/KhronosGroup/OpenCL-ICD-Loader
- * MXNet 1.8.0  https://github.com/apache/incubator-mxnet
- * PyTorch 1.9.x  https://github.com/pytorch/pytorch
- * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
- * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
- * TensorRT 8.x  https://developer.nvidia.com/tensorrt
- * Triton Inference Server 2.14  https://developer.nvidia.com/nvidia-triton-inference-server
- * The Arcade Learning Environment 0.7.x  https://github.com/mgbellemare/Arcade-Learning-Environment
- * DepthAI 2.11.x  https://github.com/luxonis/depthai-core
- * ONNX 1.10.x  https://github.com/onnx/onnx
- * nGraph 0.26.0  https://github.com/NervanaSystems/ngraph
- * ONNX Runtime 1.9.x  https://github.com/microsoft/onnxruntime
- * TVM 0.7.0  https://github.com/apache/tvm
- * LiquidFun  http://google.github.io/liquidfun/
- * Qt 5.15.x  https://download.qt.io/archive/qt/
- * Mono/Skia 2.80.x  https://github.com/mono/skia
- * cpu_features 0.6.0  https://github.com/google/cpu_features
- * ModSecurity 3.0.5  https://github.com/SpiderLabs/ModSecurity
- * System APIs of the build environments:
-   * Linux (glibc)  https://www.gnu.org/software/libc/
-   * Mac OS X (XNU libc)  https://opensource.apple.com/
-   * Windows (Win32)  https://developer.microsoft.com/en-us/windows/
-
-Once everything installed and configured, simply execute
-```bash
-$ mvn install --projects .,opencv,ffmpeg,etc. -Djavacpp.platform.root=/path/to/android-ndk/
-```
-inside the directory containing the parent `pom.xml` file, by specifying only the desired child modules in the command, but **without the leading period "." in the comma-separated list of projects, the parent `pom.xml` file itself might not get installed.** (The `-Djavacpp.platform.root=...` option is required only for Android builds.) Also specify `-Djavacpp.cppbuild.skip` as option to skip the execution of the `cppbuild.sh` scripts. In addition to `-Djavacpp.platform=...`, some of the presets can also be built against CUDA with `-Djavacpp.platform.extension=-gpu` or CPython with `-Djavacpp.platform.extension=-python`. Please refer to the comments inside the `pom.xml` file for further details. From the "platform" subdirectory, we can also install the "platform" artifacts with a similar command:
-
-```bash
-$ cd platform
-$ mvn install --projects ../opencv/platform,../ffmpeg/platform,etc. -Djavacpp.platform.host
-```
-
-
-### The `cppbuild.sh` scripts
-Running the scripts allows us to install easily the native libraries on multiple platforms, but additional software is required:
-
- * A recent version of Linux, Mac OS X, or Windows with MSYS and Visual Studio
- * Android NDK r18 or newer  http://developer.android.com/ndk/downloads/  (required only for Android builds)
-
-With the above in working order, the scripts get launched automatically as part of the Maven build lifecycle, but we can also manually execute
-```bash
-$ ANDROID_NDK=/path/to/android-ndk/ bash cppbuild.sh [-platform <name>] [-extension <name>] <install | clean> [projects]
-```
-where possible platform names are: `android-arm`, `android-x86`, `linux-x86`, `linux-x86_64`, `linux-armhf`, `linux-ppc64le`, `linux-mips64el`, `macosx-x86_64`, `windows-x86`, `windows-x86_64`, etc. The `-gpu` extension as supported by some builds also require CUDA to be installed. (The `ANDROID_NDK` variable is required only for Android builds.) Please note that the scripts download source archives from appropriate sites as necessary.
-
-To compile binaries for an Android device with no FPU, first make sure this is what you want. Without FPU, the performance of either OpenCV or FFmpeg is bound to be unacceptable. If you still wish to continue down that road, then replace "armeabi-v7a" by "armeabi" and "-march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16" with "-march=armv5te -mtune=xscale -msoft-float", inside various files.
-
-Although JavaCPP can pick up native libraries installed on the system, the scripts exist to facilitate the build process across multiple platforms. They also allow JavaCPP to copy the native libraries and load them at runtime from the JAR files created above by Maven, a useful feature for standalone applications or Java applets. Moreover, tricks such as the following work with JNLP:
-```xml
-    <resources os="Linux" arch="x86 i386 i486 i586 i686">
-        <jar href="lib/opencv-linux-x86.jar"/>
-        <jar href="lib/ffmpeg-linux-x86.jar"/>
-    </resources>
-    <resources os="Linux" arch="x86_64 amd64">
-        <jar href="lib/opencv-linux-x86_64.jar"/>
-        <jar href="lib/ffmpeg-linux-x86_64.jar"/>
-    </resources>
-```
-
-Thanks to Jose Gómez for testing this out!
-
-
-How Can I Help?
----------------
-Contributions of any kind are highly welcome! At the moment, the `Parser` has limited capabilities, so I plan to improve it gradually to the point where it can successfully parse large C++ header files that are even more convoluted than the ones from OpenCV, Caffe, or TensorFlow, but the build system could also be improved. Consequently, I am looking for help especially with the five following tasks, in no particular order:
-
- * Setting up continuous integration, preferably free on the cloud ([Travis CI](https://travis-ci.org/)?)
- * Improving the `Parser` (by using the [presets for LLVM and Clang](llvm)?)
- * Providing builds for more platforms, as with `linux-armhf` for [Raspberry Pi](https://www.raspberrypi.org/), etc.
- * Replacing the Bash/Maven build combo by something easier to use ([Gradle](http://gradle.org/)?)
- * Adding new presets as child modules for other C/C++ libraries (Caffe2, OpenNI, OpenMesh, PCL, etc.)
-
-To contribute, please fork and create pull requests, or post your suggestions [as a new "issue"](https://github.com/bytedeco/javacpp-presets/issues). Thank you very much in advance for your contribution!
-
-
-----
-Project lead: Samuel Audet [samuel.audet `at` gmail.com](mailto:samuel.audet&nbsp;at&nbsp;gmail.com)  
-Developer site: https://github.com/bytedeco/javacpp-presets  
-Discussion group: http://groups.google.com/group/javacpp-project
+JavaCPP Presets
+===============
+
+[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/javacpp-presets/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/javacpp-presets) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/javacpp-presets.svg)](http://bytedeco.org/builds/)  
+<sup>Build status for all platforms (Android, iOS, Linux, Mac OS X, Windows):</sup>
+[![opencv](https://github.com/bytedeco/javacpp-presets/workflows/opencv/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopencv)
+[![ffmpeg](https://github.com/bytedeco/javacpp-presets/workflows/ffmpeg/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Affmpeg)
+[![flycapture](https://github.com/bytedeco/javacpp-presets/workflows/flycapture/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aflycapture)
+[![spinnaker](https://github.com/bytedeco/javacpp-presets/workflows/spinnaker/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aspinnaker)
+[![libdc1394](https://github.com/bytedeco/javacpp-presets/workflows/libdc1394/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibdc1394)
+[![libfreenect](https://github.com/bytedeco/javacpp-presets/workflows/libfreenect/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibfreenect)
+[![libfreenect2](https://github.com/bytedeco/javacpp-presets/workflows/libfreenect2/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibfreenect2)
+[![librealsense](https://github.com/bytedeco/javacpp-presets/workflows/librealsense/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibrealsense)
+[![librealsense2](https://github.com/bytedeco/javacpp-presets/workflows/librealsense2/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibrealsense2)
+[![videoinput](https://github.com/bytedeco/javacpp-presets/workflows/videoinput/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Avideoinput)
+[![artoolkitplus](https://github.com/bytedeco/javacpp-presets/workflows/artoolkitplus/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aartoolkitplus)
+[![chilitags](https://github.com/bytedeco/javacpp-presets/workflows/chilitags/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Achilitags)
+[![flandmark](https://github.com/bytedeco/javacpp-presets/workflows/flandmark/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aflandmark)
+[![arrow](https://github.com/bytedeco/javacpp-presets/workflows/arrow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aarrow)
+[![hdf5](https://github.com/bytedeco/javacpp-presets/workflows/hdf5/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ahdf5)
+[![hyperscan](https://github.com/bytedeco/javacpp-presets/workflows/hyperscan/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ahyperscan)
+[![mkl](https://github.com/bytedeco/javacpp-presets/workflows/mkl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amkl)
+[![mkl-dnn](https://github.com/bytedeco/javacpp-presets/workflows/mkl-dnn/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amkl-dnn)
+[![dnnl](https://github.com/bytedeco/javacpp-presets/workflows/dnnl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adnnl)
+[![openblas](https://github.com/bytedeco/javacpp-presets/workflows/openblas/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopenblas)
+[![arpack-ng](https://github.com/bytedeco/javacpp-presets/workflows/arpack-ng/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aarpack-ng)
+[![cminpack](https://github.com/bytedeco/javacpp-presets/workflows/cminpack/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acminpack)
+[![fftw](https://github.com/bytedeco/javacpp-presets/workflows/fftw/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Afftw)
+[![gsl](https://github.com/bytedeco/javacpp-presets/workflows/gsl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Agsl)
+[![cpython](https://github.com/bytedeco/javacpp-presets/workflows/cpython/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acpython)
+[![numpy](https://github.com/bytedeco/javacpp-presets/workflows/numpy/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Anumpy)
+[![scipy](https://github.com/bytedeco/javacpp-presets/workflows/scipy/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Ascipy)
+[![gym](https://github.com/bytedeco/javacpp-presets/workflows/gym/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Agym)
+[![llvm](https://github.com/bytedeco/javacpp-presets/workflows/llvm/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Allvm)
+[![libffi](https://github.com/bytedeco/javacpp-presets/workflows/libffi/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibffi)
+[![libpostal](https://github.com/bytedeco/javacpp-presets/workflows/libpostal/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Alibpostal)
+[![leptonica](https://github.com/bytedeco/javacpp-presets/workflows/leptonica/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aleptonica)
+[![tesseract](https://github.com/bytedeco/javacpp-presets/workflows/tesseract/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atesseract)
+[![caffe](https://github.com/bytedeco/javacpp-presets/workflows/caffe/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acaffe)
+[![openpose](https://github.com/bytedeco/javacpp-presets/workflows/openpose/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopenpose)
+[![cuda](https://github.com/bytedeco/javacpp-presets/workflows/cuda/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acuda)
+[![nvcodec](https://github.com/bytedeco/javacpp-presets/workflows/nvcodec/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Anvcodec)
+[![opencl](https://github.com/bytedeco/javacpp-presets/workflows/opencl/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aopencl)
+[![mxnet](https://github.com/bytedeco/javacpp-presets/workflows/mxnet/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amxnet)
+[![pytorch](https://github.com/bytedeco/javacpp-presets/workflows/pytorch/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Apytorch)
+[![tensorflow](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow)
+[![tensorflow-lite](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow-lite/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow-lite)
+[![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)
+[![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)
+[![ale](https://github.com/bytedeco/javacpp-presets/workflows/ale/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aale)
+[![depthai](https://github.com/bytedeco/javacpp-presets/workflows/depthai/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adepthai)
+[![onnx](https://github.com/bytedeco/javacpp-presets/workflows/onnx/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnx)
+[![ngraph](https://github.com/bytedeco/javacpp-presets/workflows/ngraph/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Angraph)
+[![onnxruntime](https://github.com/bytedeco/javacpp-presets/workflows/onnxruntime/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnxruntime)
+[![tvm](https://github.com/bytedeco/javacpp-presets/workflows/tvm/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atvm)
+[![liquidfun](https://github.com/bytedeco/javacpp-presets/workflows/liquidfun/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aliquidfun)
+[![qt](https://github.com/bytedeco/javacpp-presets/workflows/qt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aqt)
+[![skia](https://github.com/bytedeco/javacpp-presets/workflows/skia/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Askia)
+[![cpu_features](https://github.com/bytedeco/javacpp-presets/workflows/cpu_features/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Acpu_features)
+[![modsecurity](https://github.com/bytedeco/javacpp-presets/workflows/modsecurity/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Amodsecurity)
+[![systems](https://github.com/bytedeco/javacpp-presets/workflows/systems/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Asystems)  
+<sup>Commercial support and paid services for custom presets:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
+
+Introduction
+------------
+The JavaCPP Presets modules contain Java configuration and interface classes for widely used C/C++ libraries. The configuration files in the `org.bytedeco.<moduleName>.presets` packages are used by the `Parser` to create from C/C++ header files the Java interface files targeting the `org.bytedeco.<moduleName>` packages, which is turn are used by the `Generator` and the native C++ compiler to produce the required JNI libraries. Moreover, helper classes make their functionality easier to use on the Java platform, including Android.
+
+For orientation purposes, the documentation of this repository can be thought of as being split into 2 levels:
+
+ 1. The directory where this parent README.md document is located belongs to the upper level, the `javacpp-presets` module itself, and
+ 2. The subdirectories in it are the actual child modules, with their README.md files, presets, and packages, as the lower level.
+
+This parent README.md file contains general information applicable to all modules to solve issues concerning downloads, common requirements, installation procedures, and build instructions. The respective README.md files in each subdirectory contain extra information, links, and notes concerning API documentation, specific requirements, dependencies for `pom.xml` files, and sample usage code, for the presets and packages of each module. Please be aware that not all of the presets have the same level of maturity, and the style and layout may also differ from one to another since they are based on the APIs of third-party libraries.
+
+Please refer to the wiki page for more information about how to [create new presets](https://github.com/bytedeco/javacpp-presets/wiki/Create-New-Presets). Since additional documentation is currently lacking, please also feel free to ask questions on [the mailing list](http://groups.google.com/group/javacpp-project) or [the discussion forum](https://github.com/bytedeco/javacpp-presets/discussions).
+
+
+Downloads
+---------
+JAR files containing binaries for all child modules and builds for all supported platforms (Android, iOS, Linux, Mac OS X, and Windows) can be obtained from the [Maven Central Repository](http://search.maven.org/#search|ga|1|bytedeco). Archives containing these JAR files are also available as [releases](https://github.com/bytedeco/javacpp-presets/releases).
+
+To install manually the JAR files, follow the instructions in the [Manual Installation](#manual-installation) section below.
+
+We can also have everything downloaded and installed automatically with:
+
+ * Maven (inside the `pom.xml` file)
+```xml
+  <dependency>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>${moduleName}-platform</artifactId>
+    <version>${moduleVersion}-1.5.6</version>
+  </dependency>
+```
+
+ * Gradle (inside the `build.gradle` file)
+```groovy
+  dependencies {
+    implementation group: 'org.bytedeco', name: moduleName + '-platform', version: moduleVersion + '-1.5.6'
+  }
+```
+
+ * Leiningen (inside the `project.clj` file)
+```clojure
+  :dependencies [
+    [~(symbol (str "org.bytedeco/" moduleName "-platform")) ~(str moduleVersion "-1.5.6")]
+  ]
+```
+
+ * sbt (inside the `build.sbt` file)
+```scala
+  libraryDependencies += "org.bytedeco" % moduleName + "-platform" % moduleVersion + "-1.5.6"
+```
+
+where the `moduleName` and `moduleVersion` variables correspond to the desired module. This downloads binaries for all platforms, but to get binaries for only one platform we can set the `javacpp.platform` system property (via the `-D` command line option) to something like `android-arm`, `linux-x86_64`, `macosx-x86_64`, `windows-x86_64`, etc. We can also specify more than one platform, see the examples at [Reducing the Number of Dependencies](https://github.com/bytedeco/javacpp-presets/wiki/Reducing-the-Number-of-Dependencies). Another option available to Gradle users is [Gradle JavaCPP](https://github.com/bytedeco/gradle-javacpp), and similarly for Scala users there is [SBT-JavaCPP](https://github.com/bytedeco/sbt-javacpp).
+
+
+Required Software
+-----------------
+To use the JavaCPP Presets, you will need to download and install the following software:
+
+ * An implementation of Java SE 7 or newer:
+   * OpenJDK  http://openjdk.java.net/install/  or
+   * Oracle JDK  http://www.oracle.com/technetwork/java/javase/downloads/  or
+   * IBM JDK  http://www.ibm.com/developerworks/java/jdk/
+
+Further, in the case of Android, the JavaCPP Presets also rely on:
+
+ * Android SDK API 21 or newer  http://developer.android.com/sdk/
+
+
+Manual Installation
+-------------------
+Simply put all the desired JAR files (`opencv*.jar`, `ffmpeg*.jar`, etc.), in addition to `javacpp.jar`, somewhere in your class path. The JAR files available as pre-built artifacts are meant to be used with [JavaCPP](https://github.com/bytedeco/javacpp). The binaries for Linux were built for CentOS 6 and 7, so they should work on most distributions currently in use. The ones for Android were compiled for ARMv7 processors featuring an FPU, so they will not work on ancient devices such as the HTC Magic or some others with an ARMv6 CPU. Here are some more specific instructions for common cases:
+
+NetBeans (Java SE 7 or newer):
+
+ 1. In the Projects window, right-click the Libraries node of your project, and select "Add JAR/Folder...".
+ 2. Locate the JAR files, select them, and click OK.
+
+Eclipse (Java SE 7 or newer):
+
+ 1. Navigate to Project > Properties > Java Build Path > Libraries and click "Add External JARs...".
+ 2. Locate the JAR files, select them, and click OK.
+
+IntelliJ IDEA (Android 5.0 or newer):
+
+ 1. Follow the instructions on this page: http://developer.android.com/training/basics/firstapp/
+ 2. Copy all the JAR files into the `app/libs` subdirectory.
+ 3. Navigate to File > Project Structure > app > Dependencies, click `+`, and select "2 File dependency".
+ 4. Select all the JAR files from the `libs` subdirectory.
+
+After that, we can access almost transparently the corresponding C/C++ APIs through the interface classes found in the `org.bytedeco.<moduleName>` packages. Indeed, the `Parser` translates the code comments from the C/C++ header files into the Java interface files, (almost) ready to be consumed by Javadoc. However, since their translation still leaves to be desired, one may wish to refer to the original documentation pages. For instance, the ones for OpenCV and FFmpeg can be found online at:
+
+ * [OpenCV documentation](http://docs.opencv.org/master/)
+ * [FFmpeg documentation](http://ffmpeg.org/doxygen/trunk/)
+
+
+Build Instructions
+------------------
+If the binary files available above are not enough for your needs, you might need to rebuild them from the source code. To this end, project files on the Java side were created as [Maven modules](#the-maven-modules). By default, the Maven build also installs the native libraries on the native C/C++ side with the [`cppbuild.sh` scripts](#the-cppbuildsh-scripts), but they can also be installed by other means.
+
+Additionally, one can find on the wiki page additional information about the recommended [build environments](https://github.com/bytedeco/javacpp-presets/wiki/Build-Environments) for the major platforms.
+
+
+### The Maven modules
+The JavaCPP Presets depend on Maven, a powerful build system for Java, so before attempting a build, be sure to install and read up on:
+
+ * Maven 3.x  http://maven.apache.org/download.html
+ * JavaCPP 1.5.6  https://github.com/bytedeco/javacpp
+
+Each child module in turn relies by default on the included [`cppbuild.sh` scripts](#the-cppbuildsh-scripts), explained below, to install its corresponding native libraries in the `cppbuild` subdirectory. To use native libraries already installed somewhere else on the system, other installation directories than `cppbuild` can also be specified either in the `pom.xml` files or in the `.java` configuration files. The following versions are supported:
+
+ * OpenCV 4.5.4  https://opencv.org/releases.html
+ * FFmpeg 4.4.x  http://ffmpeg.org/download.html
+ * FlyCapture 2.13.x  https://www.flir.com/products/flycapture-sdk
+ * Spinnaker 2.4.x https://www.flir.com/products/spinnaker-sdk
+ * libdc1394 2.2.6  http://sourceforge.net/projects/libdc1394/files/
+ * libfreenect 0.5.7  https://github.com/OpenKinect/libfreenect
+ * libfreenect2 0.2.0  https://github.com/OpenKinect/libfreenect2
+ * librealsense 1.12.x  https://github.com/IntelRealSense/librealsense
+ * librealsense2 2.44.x  https://github.com/IntelRealSense/librealsense
+ * videoInput 0.200  https://github.com/ofTheo/videoInput/
+ * ARToolKitPlus 2.3.1  https://launchpad.net/artoolkitplus
+ * Chilitags  https://github.com/chili-epfl/chilitags
+ * flandmark 1.07  https://github.com/uricamic/flandmark
+ * Arrow 5.0.x  https://arrow.apache.org/install/
+ * HDF5 1.12.x  https://www.hdfgroup.org/downloads/
+ * Hyperscan 5.4.x  https://github.com/intel/hyperscan
+ * MKL 2021.x  https://software.intel.com/mkl
+ * MKL-DNN 0.21.x  https://github.com/oneapi-src/oneDNN
+ * DNNL 2.4.x  https://github.com/oneapi-src/oneDNN
+ * OpenBLAS 0.3.18  http://www.openblas.net/
+ * ARPACK-NG 3.8.0  https://github.com/opencollab/arpack-ng
+ * CMINPACK 1.3.8  https://github.com/devernay/cminpack
+ * FFTW 3.3.10  http://www.fftw.org/download.html
+ * GSL 2.7  http://www.gnu.org/software/gsl/#downloading
+ * CPython 3.10.x  https://www.python.org/downloads/
+ * NumPy 1.21.x  https://github.com/numpy/numpy
+ * SciPy 1.7.x  https://github.com/scipy/scipy
+ * Gym 0.21.x  https://github.com/openai/gym
+ * LLVM 13.0.x  http://llvm.org/releases/download.html
+ * libffi 3.4.x  https://github.com/libffi/libffi
+ * libpostal 1.1-alpha  https://github.com/openvenues/libpostal
+ * Leptonica 1.82.x  http://www.leptonica.org/download.html
+ * Tesseract 4.1.1  https://github.com/tesseract-ocr/tesseract
+ * Caffe 1.0  https://github.com/BVLC/caffe
+ * OpenPose 1.7.0  https://github.com/CMU-Perceptual-Computing-Lab/openpose
+ * CUDA 11.4.x  https://developer.nvidia.com/cuda-downloads
+   * cuDNN 8.2.x  https://developer.nvidia.com/cudnn
+   * NCCL 2.11.x  https://developer.nvidia.com/nccl
+ * NVIDIA Video Codec SDK 11.1.x  https://developer.nvidia.com/nvidia-video-codec-sdk
+ * OpenCL 3.0  https://github.com/KhronosGroup/OpenCL-ICD-Loader
+ * MXNet 1.8.0  https://github.com/apache/incubator-mxnet
+ * PyTorch 1.9.x  https://github.com/pytorch/pytorch
+ * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
+ * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
+ * TensorRT 8.x  https://developer.nvidia.com/tensorrt
+ * Triton Inference Server 2.14  https://developer.nvidia.com/nvidia-triton-inference-server
+ * The Arcade Learning Environment 0.7.x  https://github.com/mgbellemare/Arcade-Learning-Environment
+ * DepthAI 2.11.x  https://github.com/luxonis/depthai-core
+ * ONNX 1.10.x  https://github.com/onnx/onnx
+ * nGraph 0.26.0  https://github.com/NervanaSystems/ngraph
+ * ONNX Runtime 1.9.x  https://github.com/microsoft/onnxruntime
+ * TVM 0.7.0  https://github.com/apache/tvm
+ * LiquidFun  http://google.github.io/liquidfun/
+ * Qt 5.15.x  https://download.qt.io/archive/qt/
+ * Mono/Skia 2.80.x  https://github.com/mono/skia
+ * cpu_features 0.6.0  https://github.com/google/cpu_features
+ * ModSecurity 3.0.5  https://github.com/SpiderLabs/ModSecurity
+ * System APIs of the build environments:
+   * Linux (glibc)  https://www.gnu.org/software/libc/
+   * Mac OS X (XNU libc)  https://opensource.apple.com/
+   * Windows (Win32)  https://developer.microsoft.com/en-us/windows/
+
+Once everything installed and configured, simply execute
+```bash
+$ mvn install --projects .,opencv,ffmpeg,etc. -Djavacpp.platform.root=/path/to/android-ndk/
+```
+inside the directory containing the parent `pom.xml` file, by specifying only the desired child modules in the command, but **without the leading period "." in the comma-separated list of projects, the parent `pom.xml` file itself might not get installed.** (The `-Djavacpp.platform.root=...` option is required only for Android builds.) Also specify `-Djavacpp.cppbuild.skip` as option to skip the execution of the `cppbuild.sh` scripts. In addition to `-Djavacpp.platform=...`, some of the presets can also be built against CUDA with `-Djavacpp.platform.extension=-gpu` or CPython with `-Djavacpp.platform.extension=-python`. Please refer to the comments inside the `pom.xml` file for further details. From the "platform" subdirectory, we can also install the "platform" artifacts with a similar command:
+
+```bash
+$ cd platform
+$ mvn install --projects ../opencv/platform,../ffmpeg/platform,etc. -Djavacpp.platform.host
+```
+
+
+### The `cppbuild.sh` scripts
+Running the scripts allows us to install easily the native libraries on multiple platforms, but additional software is required:
+
+ * A recent version of Linux, Mac OS X, or Windows with MSYS and Visual Studio
+ * Android NDK r18 or newer  http://developer.android.com/ndk/downloads/  (required only for Android builds)
+
+With the above in working order, the scripts get launched automatically as part of the Maven build lifecycle, but we can also manually execute
+```bash
+$ ANDROID_NDK=/path/to/android-ndk/ bash cppbuild.sh [-platform <name>] [-extension <name>] <install | clean> [projects]
+```
+where possible platform names are: `android-arm`, `android-x86`, `linux-x86`, `linux-x86_64`, `linux-armhf`, `linux-ppc64le`, `linux-mips64el`, `macosx-x86_64`, `windows-x86`, `windows-x86_64`, etc. The `-gpu` extension as supported by some builds also require CUDA to be installed. (The `ANDROID_NDK` variable is required only for Android builds.) Please note that the scripts download source archives from appropriate sites as necessary.
+
+To compile binaries for an Android device with no FPU, first make sure this is what you want. Without FPU, the performance of either OpenCV or FFmpeg is bound to be unacceptable. If you still wish to continue down that road, then replace "armeabi-v7a" by "armeabi" and "-march=armv7-a -mfloat-abi=softfp -mfpu=vfpv3-d16" with "-march=armv5te -mtune=xscale -msoft-float", inside various files.
+
+Although JavaCPP can pick up native libraries installed on the system, the scripts exist to facilitate the build process across multiple platforms. They also allow JavaCPP to copy the native libraries and load them at runtime from the JAR files created above by Maven, a useful feature for standalone applications or Java applets. Moreover, tricks such as the following work with JNLP:
+```xml
+    <resources os="Linux" arch="x86 i386 i486 i586 i686">
+        <jar href="lib/opencv-linux-x86.jar"/>
+        <jar href="lib/ffmpeg-linux-x86.jar"/>
+    </resources>
+    <resources os="Linux" arch="x86_64 amd64">
+        <jar href="lib/opencv-linux-x86_64.jar"/>
+        <jar href="lib/ffmpeg-linux-x86_64.jar"/>
+    </resources>
+```
+
+Thanks to Jose Gómez for testing this out!
+
+
+How Can I Help?
+---------------
+Contributions of any kind are highly welcome! At the moment, the `Parser` has limited capabilities, so I plan to improve it gradually to the point where it can successfully parse large C++ header files that are even more convoluted than the ones from OpenCV, Caffe, or TensorFlow, but the build system could also be improved. Consequently, I am looking for help especially with the five following tasks, in no particular order:
+
+ * Setting up continuous integration, preferably free on the cloud ([Travis CI](https://travis-ci.org/)?)
+ * Improving the `Parser` (by using the [presets for LLVM and Clang](llvm)?)
+ * Providing builds for more platforms, as with `linux-armhf` for [Raspberry Pi](https://www.raspberrypi.org/), etc.
+ * Replacing the Bash/Maven build combo by something easier to use ([Gradle](http://gradle.org/)?)
+ * Adding new presets as child modules for other C/C++ libraries (Caffe2, OpenNI, OpenMesh, PCL, etc.)
+
+To contribute, please fork and create pull requests, or post your suggestions [as a new "issue"](https://github.com/bytedeco/javacpp-presets/issues). Thank you very much in advance for your contribution!
+
+
+----
+Project lead: Samuel Audet [samuel.audet `at` gmail.com](mailto:samuel.audet&nbsp;at&nbsp;gmail.com)  
+Developer site: https://github.com/bytedeco/javacpp-presets  
+Discussion group: http://groups.google.com/group/javacpp-project
diff --git a/pom.xml b/pom.xml
index 04afed7e31c..ab0040fb66d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -617,8 +617,8 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-	<module>tensorrt</module>
-	<module>tritonserver</module>
+        <module>tensorrt</module>
+        <module>tritonserver</module>
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
@@ -1368,8 +1368,8 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-	<module>tensorrt</module>
-        <module>tritonserver</module>		
+        <module>tensorrt</module>
+        <module>tritonserver</module>
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
@@ -1594,7 +1594,7 @@
         <module>pytorch</module>
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
-	<module>tensorrt</module>
+        <module>tensorrt</module>
         <module>ale</module>
         <module>onnx</module>
         <module>onnxruntime</module>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
index 7134aad61e2..0c8c6ebe12e 100644
--- a/tritonserver/pom.xml
+++ b/tritonserver/pom.xml
@@ -51,7 +51,7 @@
             <artifactId>cuda</artifactId>
             <version>11.4-8.2-${project.parent.version}</version>
           </dependency>
-	  <dependency>
+          <dependency>
             <groupId>org.bytedeco</groupId>
             <artifactId>tensorrt</artifactId>
             <version>8.0-${project.parent.version}</version>

From fbdfc5f6fefc94c1f9421e0537cbb1b79945eeec Mon Sep 17 00:00:00 2001
From: Samuel Audet <samuel.audet@gmail.com>
Date: Mon, 18 Oct 2021 12:38:08 +0000
Subject: [PATCH 21/21] Fix order of commands in README.md

---
 tritonserver/README.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tritonserver/README.md b/tritonserver/README.md
index 52151fcd8e4..c340b191242 100644
--- a/tritonserver/README.md
+++ b/tritonserver/README.md
@@ -62,13 +62,13 @@ Now, this `models` directory will be our model repository.
  2. Start the Docker container to run the sample (assuming we are under the `models` directory created above):
 ```bash
  $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.09-py3 bash
- $ git clone https://github.com/bytedeco/javacpp-presets.git
- $ cd javacpp-presets/tritonserver/samples
+ $ apt update
+ $ apt install -y openjdk-11-jdk
  $ wget https://dlcdn.apache.org/maven/maven-3/3.8.3/binaries/apache-maven-3.8.3-bin.tar.gz
  $ tar zxvf apache-maven-3.8.3-bin.tar.gz
  $ export PATH=/opt/tritonserver/apache-maven-3.8.2/bin:$PATH
- $ apt update
- $ apt install -y openjdk-11-jdk
+ $ git clone https://github.com/bytedeco/javacpp-presets.git
+ $ cd javacpp-presets/tritonserver/samples
  $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/models"
 ```