diff --git a/.github/workflows/tritonserver.yml b/.github/workflows/tritonserver.yml
new file mode 100644
index 00000000000..987b1a62471
--- /dev/null
+++ b/.github/workflows/tritonserver.yml
@@ -0,0 +1,22 @@
+name: tritonserver
+on:
+  push:
+    paths:
+      - tritonserver/**
+  pull_request:
+    paths:
+      - tritonserver/**
+  workflow_dispatch:
+env:
+  CI_DEPLOY_MODULE: ${{ github.workflow }}
+  CI_DEPLOY_PLATFORM: ${{ github.job }}
+  CI_DEPLOY_SETTINGS: ${{ secrets.CI_DEPLOY_SETTINGS }}
+  CI_DEPLOY_USERNAME: ${{ secrets.CI_DEPLOY_USERNAME }}
+  CI_DEPLOY_PASSWORD: ${{ secrets.CI_DEPLOY_PASSWORD }}
+  STAGING_REPOSITORY: ${{ secrets.STAGING_REPOSITORY }}
+jobs:
+  linux-x86_64:
+    runs-on: ubuntu-18.04
+    container: nvcr.io/nvidia/tritonserver:21.09-py3
+    steps:
+      - uses: bytedeco/javacpp-presets/.github/actions/deploy-ubuntu@actions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c9aac0cbb0f..7ad15e16e28 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,5 @@
 
+ * Add presets for Triton Inference Server 2.14 ([pull #1085](https://github.com/bytedeco/javacpp-presets/pull/1085))
  * Add presets for the NvToolsExt (NVTX) module of CUDA ([issue #1068](https://github.com/bytedeco/javacpp-presets/issues/1068))
  * Increase the amount of function pointers available for callbacks in presets for Qt ([pull #1080](https://github.com/bytedeco/javacpp-presets/pull/1080))
  * Map C++ JIT classes and functions of TorchScript in presets for PyTorch ([issue #1068](https://github.com/bytedeco/javacpp-presets/issues/1068))
diff --git a/README.md b/README.md
index c5a05b6e83a..964edb3b630 100644
--- a/README.md
+++ b/README.md
@@ -46,6 +46,7 @@ JavaCPP Presets
 [![tensorflow](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow)
 [![tensorflow-lite](https://github.com/bytedeco/javacpp-presets/workflows/tensorflow-lite/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorflow-lite)
 [![tensorrt](https://github.com/bytedeco/javacpp-presets/workflows/tensorrt/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atensorrt)
+[![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)
 [![ale](https://github.com/bytedeco/javacpp-presets/workflows/ale/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aale)
 [![depthai](https://github.com/bytedeco/javacpp-presets/workflows/depthai/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Adepthai)
 [![onnx](https://github.com/bytedeco/javacpp-presets/workflows/onnx/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Aonnx)
@@ -214,6 +215,7 @@ Each child module in turn relies by default on the included [`cppbuild.sh` scrip
  * TensorFlow 1.15.x  https://github.com/tensorflow/tensorflow
  * TensorFlow Lite 2.6.x  https://github.com/tensorflow/tensorflow
  * TensorRT 8.x  https://developer.nvidia.com/tensorrt
+ * Triton Inference Server 2.14  https://developer.nvidia.com/nvidia-triton-inference-server
  * The Arcade Learning Environment 0.7.x  https://github.com/mgbellemare/Arcade-Learning-Environment
  * DepthAI 2.11.x  https://github.com/luxonis/depthai-core
  * ONNX 1.10.x  https://github.com/onnx/onnx
diff --git a/cppbuild.sh b/cppbuild.sh
index 895065044d8..ec7a0fb6c66 100755
--- a/cppbuild.sh
+++ b/cppbuild.sh
@@ -164,7 +164,7 @@ function sedinplace {
 }
 
 if [[ -z ${PROJECTS:-} ]]; then
-    PROJECTS=(opencv ffmpeg flycapture spinnaker libdc1394 libfreenect libfreenect2 librealsense librealsense2 videoinput artoolkitplus chilitags flandmark arrow hdf5 hyperscan mkl mkl-dnn dnnl openblas arpack-ng cminpack fftw gsl cpython numpy scipy gym llvm libpostal leptonica tesseract caffe openpose cuda nvcodec opencl mxnet pytorch tensorflow tensorflow-lite tensorrt depthai ale onnx ngraph onnxruntime tvm liquidfun qt skia cpu_features modsecurity systems)
+    PROJECTS=(opencv ffmpeg flycapture spinnaker libdc1394 libfreenect libfreenect2 librealsense librealsense2 videoinput artoolkitplus chilitags flandmark arrow hdf5 hyperscan mkl mkl-dnn dnnl openblas arpack-ng cminpack fftw gsl cpython numpy scipy gym llvm libpostal leptonica tesseract caffe openpose cuda nvcodec opencl mxnet pytorch tensorflow tensorflow-lite tensorrt tritonserver depthai ale onnx ngraph onnxruntime tvm liquidfun qt skia cpu_features modsecurity systems)
 fi
 
 for PROJECT in ${PROJECTS[@]}; do
diff --git a/platform/pom.xml b/platform/pom.xml
index f5b018eec88..21ce92b8e80 100644
--- a/platform/pom.xml
+++ b/platform/pom.xml
@@ -58,6 +58,7 @@
     <module>../tensorflow/platform</module>
     <module>../tensorflow-lite/platform</module>
     <module>../tensorrt/platform</module>
+    <module>../tritonserver/platform</module>
     <module>../ale/platform</module>
     <module>../depthai/platform</module>
     <module>../onnx/platform</module>
@@ -294,6 +295,11 @@
       <artifactId>tensorrt-platform</artifactId>
       <version>8.0-${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tritonserver-platform</artifactId>
+      <version>2.14-${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.bytedeco</groupId>
       <artifactId>ale-platform</artifactId>
diff --git a/pom.xml b/pom.xml
index 8577272a06f..ab0040fb66d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -618,6 +618,7 @@
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
         <module>tensorrt</module>
+        <module>tritonserver</module>
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
@@ -1368,6 +1369,7 @@
         <module>tensorflow</module>
         <module>tensorflow-lite</module>
         <module>tensorrt</module>
+        <module>tritonserver</module>
         <module>ale</module>
         <module>depthai</module>
         <module>onnx</module>
diff --git a/tritonserver/README.md b/tritonserver/README.md
new file mode 100644
index 00000000000..c340b191242
--- /dev/null
+++ b/tritonserver/README.md
@@ -0,0 +1,77 @@
+JavaCPP Presets for Triton Inference Server
+===========================================
+
+[![Gitter](https://badges.gitter.im/bytedeco/javacpp.svg)](https://gitter.im/bytedeco/javacpp) [![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver/badge.svg)](https://maven-badges.herokuapp.com/maven-central/org.bytedeco/tritonserver) [![Sonatype Nexus (Snapshots)](https://img.shields.io/nexus/s/https/oss.sonatype.org/org.bytedeco/tritonserver.svg)](http://bytedeco.org/builds/)  
+<sup>Build status for all platforms:</sup> [![tritonserver](https://github.com/bytedeco/javacpp-presets/workflows/tritonserver/badge.svg)](https://github.com/bytedeco/javacpp-presets/actions?query=workflow%3Atritonserver)  <sup>Commercial support:</sup> [![xscode](https://img.shields.io/badge/Available%20on-xs%3Acode-blue?style=?style=plastic&logo=appveyor&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEAAAABACAMAAACdt4HsAAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAAZQTFRF////////VXz1bAAAAAJ0Uk5T/wDltzBKAAAAlUlEQVR42uzXSwqAMAwE0Mn9L+3Ggtgkk35QwcnSJo9S+yGwM9DCooCbgn4YrJ4CIPUcQF7/XSBbx2TEz4sAZ2q1RAECBAiYBlCtvwN+KiYAlG7UDGj59MViT9hOwEqAhYCtAsUZvL6I6W8c2wcbd+LIWSCHSTeSAAECngN4xxIDSK9f4B9t377Wd7H5Nt7/Xz8eAgwAvesLRjYYPuUAAAAASUVORK5CYII=)](https://xscode.com/bytedeco/javacpp-presets)
+
+
+License Agreements
+------------------
+By downloading these archives, you agree to the terms of the license agreements for NVIDIA software included in the archives.
+
+### Triton Inference Server
+To view the license for Triton Inference Server included in these archives, click [here](https://developer.nvidia.com/ngc/nvidia-deep-learning-container-license)
+
+ * Triton Inference Server is widely used software package for inference service
+ * Triton supports almost all kinds of model generated by different DL frameworks or tools, such as TensorFlow, PyTorch, ONNX Runtime, TensorRT, OpenVINO...
+ * Triton supports both CPU and GPU
+ * Triton can be used both as an application and as a shared library. In case you already have your own inference service framework but want to add more features, just try Triton as a shared library.
+ * Triton supports Java as a shared library through JavaCPP Presets
+
+
+Introduction
+------------
+This directory contains the JavaCPP Presets module for:
+
+ * Triton Inference Server 2.14  https://github.com/triton-inference-server/server
+
+Please refer to the parent README.md file for more detailed information about the JavaCPP Presets.
+
+
+Documentation
+-------------
+Java API documentation is available here:
+
+ * http://bytedeco.org/javacpp-presets/tritonserver/apidocs/
+
+
+Sample Usage
+------------
+Here is a simple example of Triton Inference Server ported to Java from the `simple.cc` sample file available at:
+
+ * https://github.com/triton-inference-server/server/tree/main/src/servers
+
+We can use [Maven 3](http://maven.apache.org/) to download and install automatically all the class files as well as the native binaries. To run this sample code, after creating the `pom.xml` and `Simple.java` source files from the [`samples/`](samples/) subdirectory, simply execute on the command line:
+```bash
+ $ mvn compile exec:java -Dexec.args="-r /path/to/models"
+```
+This sample intends to show how to call the Java-mapped C API of Triton to execute inference requests.
+
+### Steps to run this sample inside an NGC container
+
+ 1. Get the source code of Triton Inference Server to prepare the model repository:
+```bash
+ $ wget https://github.com/triton-inference-server/server/archive/refs/tags/v2.14.0.tar.gz
+ $ tar zxvf v2.14.0.tar.gz
+ $ cd server-2.14.0/docs/examples/model_repository
+ $ mkdir models
+ $ cd models; cp -a ../simple .
+```
+Now, this `models` directory will be our model repository.
+
+ 2. Start the Docker container to run the sample (assuming we are under the `models` directory created above):
+```bash
+ $ docker run -it --gpus=all -v $(pwd):/workspace nvcr.io/nvidia/tritonserver:21.09-py3 bash
+ $ apt update
+ $ apt install -y openjdk-11-jdk
+ $ wget https://dlcdn.apache.org/maven/maven-3/3.8.3/binaries/apache-maven-3.8.3-bin.tar.gz
+ $ tar zxvf apache-maven-3.8.3-bin.tar.gz
+ $ export PATH=/opt/tritonserver/apache-maven-3.8.2/bin:$PATH
+ $ git clone https://github.com/bytedeco/javacpp-presets.git
+ $ cd javacpp-presets/tritonserver/samples
+ $ mvn compile exec:java -Djavacpp.platform=linux-x86_64 -Dexec.args="-r /workspace/models"
+```
+
+This sample is the Java implementation of the simple example written for the [C API](https://github.com/triton-inference-server/server/blob/main/docs/inference_protocols.md#c-api).
+
+
diff --git a/tritonserver/cppbuild.sh b/tritonserver/cppbuild.sh
new file mode 100644
index 00000000000..64f033033ff
--- /dev/null
+++ b/tritonserver/cppbuild.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# This file is meant to be included by the parent cppbuild.sh script
+if [[ -z "$PLATFORM" ]]; then
+    pushd ..
+    bash cppbuild.sh "$@" tritonserver
+    popd
+    exit
+fi
+
+case $PLATFORM in
+    linux-arm64)
+        if [[ ! -f "/opt/tritonserver/include/triton/core/tritonserver.h" ]] && [[ ! -d "/opt/tritonserver/lib/" ]]; then
+            echo "Please make sure library and include files exist"
+            exit 1
+        fi
+        ;;
+    linux-x86_64)
+        if [[ ! -f "/opt/tritonserver/include/triton/core/tritonserver.h" ]] && [[ ! -d "/opt/tritonserver/lib/" ]]; then
+            echo "Please make sure library and include files exist"
+            exit 1
+        fi
+        ;;
+    windows-x86_64)
+        echo "Windows is not supported yet"
+        exit 1
+        ;;
+    *)
+        echo "Error: Platform \"$PLATFORM\" is not supported"
+        ;;
+esac
diff --git a/tritonserver/platform/pom.xml b/tritonserver/platform/pom.xml
new file mode 100644
index 00000000000..6ae11f29b5a
--- /dev/null
+++ b/tritonserver/platform/pom.xml
@@ -0,0 +1,127 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.7-SNAPSHOT</version>
+    <relativePath>../../</relativePath>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver-platform</artifactId>
+  <version>2.14-${project.parent.version}</version>
+  <name>JavaCPP Presets Platform for Tritonserver</name>
+
+  <properties>
+    <javacpp.moduleId>tritonserver</javacpp.moduleId>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>cuda-platform</artifactId>
+      <version>11.4-8.2-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tensorrt-platform</artifactId>
+      <version>8.0-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-arm64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-x86_64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.windows-x86_64}</classifier>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <configuration>
+              <archive>
+                <manifestEntries>
+                  <Class-Path>${javacpp.moduleId}.jar ${javacpp.moduleId}-linux-arm64.jar ${javacpp.moduleId}-linux-x86_64.jar ${javacpp.moduleId}-windows-x86_64.jar</Class-Path>
+                </manifestEntries>
+              </archive>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-javadoc-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>javadoc</classifier>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-sources-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>sources</classifier>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-infos</id>
+            <phase>none</phase>
+          </execution>
+          <execution>
+            <id>add-platform-module-info</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}.jar</file>
+                  <moduleInfoSource>
+                    module org.bytedeco.${javacpp.moduleId}.platform {
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.arm64;
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.x86_64;
+                      requires static org.bytedeco.${javacpp.moduleId}.windows.x86_64;
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/platform/redist/pom.xml b/tritonserver/platform/redist/pom.xml
new file mode 100644
index 00000000000..097c0132c3d
--- /dev/null
+++ b/tritonserver/platform/redist/pom.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.7-SNAPSHOT</version>
+    <relativePath>../../../</relativePath>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver-platform-redist</artifactId>
+  <version>2.14-${project.parent.version}</version>
+  <name>JavaCPP Presets Platform Redist for Tritonserver</name>
+
+  <properties>
+    <javacpp.moduleId>tritonserver</javacpp.moduleId>
+    <javacpp.platform.extension>-redist</javacpp.platform.extension>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}-platform</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-arm64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.linux-x86_64}</classifier>
+    </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>${javacpp.moduleId}</artifactId>
+      <version>${project.version}</version>
+      <classifier>${javacpp.platform.windows-x86_64}</classifier>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>default-jar</id>
+            <configuration>
+              <archive>
+                <manifestEntries>
+                  <Class-Path>${javacpp.moduleId}.jar ${javacpp.moduleId}-linux-arm64-redist.jar ${javacpp.moduleId}-linux-x86_64-redist.jar ${javacpp.moduleId}-windows-x86_64-redist.jar</Class-Path>
+                </manifestEntries>
+              </archive>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-javadoc-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>javadoc</classifier>
+            </configuration>
+          </execution>
+          <execution>
+            <id>empty-sources-jar</id>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>sources</classifier>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-infos</id>
+            <phase>none</phase>
+          </execution>
+          <execution>
+            <id>add-platform-module-info</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}.jar</file>
+                  <moduleInfoSource>
+                    module org.bytedeco.${javacpp.moduleId}.platform.redist {
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.arm64.redist;
+                      requires static org.bytedeco.${javacpp.moduleId}.linux.x86_64.redist;
+                      requires static org.bytedeco.${javacpp.moduleId}.windows.x86_64.redist;
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/pom.xml b/tritonserver/pom.xml
new file mode 100644
index 00000000000..0c8c6ebe12e
--- /dev/null
+++ b/tritonserver/pom.xml
@@ -0,0 +1,138 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.bytedeco</groupId>
+    <artifactId>javacpp-presets</artifactId>
+    <version>1.5.7-SNAPSHOT</version>
+  </parent>
+
+  <groupId>org.bytedeco</groupId>
+  <artifactId>tritonserver</artifactId>
+  <version>2.14-${project.parent.version}</version>
+  <name>JavaCPP Presets for Tritonserver</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>cuda</artifactId>
+      <version>11.4-8.2-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>tensorrt</artifactId>
+      <version>8.0-${project.parent.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.bytedeco</groupId>
+      <artifactId>javacpp</artifactId>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <groupId>org.bytedeco</groupId>
+        <artifactId>javacpp</artifactId>
+        <configuration>
+          <encoding>ISO-8859-1</encoding>
+        </configuration>
+        <dependencies>
+          <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>cuda</artifactId>
+            <version>11.4-8.2-${project.parent.version}</version>
+          </dependency>
+          <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tensorrt</artifactId>
+            <version>8.0-${project.parent.version}</version>
+          </dependency>
+        </dependencies>
+      </plugin>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>javacpp-${javacpp.platform}</id>
+            <phase>package</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>${javacpp.platform}</classifier>
+              <includes>
+                <include>org/bytedeco/tritonserver/${javacpp.platform}/*jni*</include>
+                <include>META-INF/native-image/${javacpp.platform}/</include>
+              </includes>
+            </configuration>
+          </execution>
+          <execution>
+            <id>javacpp-${javacpp.platform}-redist</id>
+            <phase>package</phase>
+            <goals>
+              <goal>jar</goal>
+            </goals>
+            <configuration>
+              <classifier>${javacpp.platform}-redist</classifier>
+              <classesDirectory>${project.build.directory}/native</classesDirectory>
+              <includes>
+                <include>org/bytedeco/tritonserver/${javacpp.platform}/</include>
+                <include>META-INF/native-image/${javacpp.platform}/</include>
+              </includes>
+              <excludes>
+                <exclude>org/bytedeco/tritonserver/${javacpp.platform}/*jni*</exclude>
+              </excludes>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.moditect</groupId>
+        <artifactId>moditect-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-module-info-redist</id>
+            <phase>package</phase>
+            <goals>
+              <goal>add-module-info</goal>
+            </goals>
+            <configuration>
+              <modules>
+                <module>
+                  <file>${project.build.directory}/${project.artifactId}-${javacpp.platform}-redist.jar</file>
+                  <moduleInfoSource>
+                    open module org.bytedeco.${javacpp.packageName}.${javacpp.platform.module}.redist {
+                      requires transitive org.bytedeco.${javacpp.packageName};
+                    }
+                  </moduleInfoSource>
+                </module>
+              </modules>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-dependency-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-source-plugin</artifactId>
+      </plugin>
+      <plugin>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <configuration>
+          <encoding>ISO-8859-1</encoding>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+
+</project>
diff --git a/tritonserver/samples/Simple.java b/tritonserver/samples/Simple.java
new file mode 100644
index 00000000000..1295c331a5b
--- /dev/null
+++ b/tritonserver/samples/Simple.java
@@ -0,0 +1,970 @@
+// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.*;
+import com.google.gson.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import org.bytedeco.tritonserver.tritonserver.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+public class Simple {
+    static final double TRITON_MIN_COMPUTE_CAPABILITY = 6.0;
+
+    static void FAIL(String MSG) {
+        System.err.println("Cuda failure: " + MSG);
+        System.exit(1);
+    }
+
+    static void FAIL_IF_ERR(TRITONSERVER_Error err__, String MSG) {
+        if (err__ != null) {
+            System.err.println("error: " + MSG + ":"
+                             + TRITONSERVER_ErrorCodeString(err__) + " - "
+                             + TRITONSERVER_ErrorMessage(err__));
+            TRITONSERVER_ErrorDelete(err__);
+            System.exit(1);
+        }
+    }
+
+    static void FAIL_IF_CUDA_ERR(int err__, String MSG) {
+        if (err__ != cudaSuccess) {
+            System.err.println("error: " + MSG + ": " + cudaGetErrorString(err__));
+            System.exit(1);
+        }
+    }
+
+    static boolean enforce_memory_type = false;
+    static int requested_memory_type;
+
+    static class CudaDataDeleter extends Pointer {
+        public CudaDataDeleter() { super((Pointer)null); }
+        public void reset(Pointer p) {
+            this.address = p.address();
+            this.deallocator(new FreeDeallocator(this));
+        }
+        protected static class FreeDeallocator extends Pointer implements Deallocator {
+            FreeDeallocator(Pointer p) { super(p); }
+            @Override public void deallocate() {
+                if (!isNull()) {
+                  cudaPointerAttributes attr = new cudaPointerAttributes(null);
+                  int cuerr = cudaPointerGetAttributes(attr, this);
+                  if (cuerr != cudaSuccess) {
+                    System.err.println("error: failed to get CUDA pointer attribute of " + this
+                                     + ": " + cudaGetErrorString(cuerr).getString());
+                  }
+                  if (attr.type() == cudaMemoryTypeDevice) {
+                    cuerr = cudaFree(this);
+                  } else if (attr.type() == cudaMemoryTypeHost) {
+                    cuerr = cudaFreeHost(this);
+                  }
+                  if (cuerr != cudaSuccess) {
+                    System.err.println("error: failed to release CUDA pointer " + this
+                                     + ": " + cudaGetErrorString(cuerr).getString());
+                  }
+                }
+            }
+        }
+    }
+
+    static class TRITONSERVER_ServerDeleter extends TRITONSERVER_Server {
+        public TRITONSERVER_ServerDeleter(TRITONSERVER_Server p) { super(p); deallocator(new DeleteDeallocator(this)); }
+        protected static class DeleteDeallocator extends TRITONSERVER_Server implements Deallocator {
+            DeleteDeallocator(Pointer p) { super(p); }
+            @Override public void deallocate() { TRITONSERVER_ServerDelete(this); }
+        }
+    }
+
+    static void
+    Usage(String msg)
+    {
+      if (msg != null) {
+        System.err.println(msg);
+      }
+
+      System.err.println("Usage: java " + Simple.class.getSimpleName() + " [options]");
+      System.err.println("\t-m <\"system\"|\"pinned\"|gpu>"
+                       + " Enforce the memory type for input and output tensors."
+                       + " If not specified, inputs will be in system memory and outputs"
+                       + " will be based on the model's preferred type.");
+      System.err.println("\t-v Enable verbose logging");
+      System.err.println("\t-r [model repository absolute path]");
+
+      System.exit(1);
+    }
+
+    static class ResponseAlloc extends TRITONSERVER_ResponseAllocatorAllocFn_t {
+        @Override public TRITONSERVER_Error call (
+            TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+            long byte_size, int preferred_memory_type,
+            long preferred_memory_type_id, Pointer userp, PointerPointer buffer,
+            PointerPointer buffer_userp, IntPointer actual_memory_type,
+            LongPointer actual_memory_type_id)
+        {
+          // Initially attempt to make the actual memory type and id that we
+          // allocate be the same as preferred memory type
+          actual_memory_type.put(0, preferred_memory_type);
+          actual_memory_type_id.put(0, preferred_memory_type_id);
+
+          // If 'byte_size' is zero just return 'buffer' == nullptr, we don't
+          // need to do any other book-keeping.
+          if (byte_size == 0) {
+            buffer.put(0, null);
+            buffer_userp.put(0, null);
+            System.out.println("allocated " + byte_size + " bytes for result tensor " + tensor_name);
+          } else {
+            Pointer allocated_ptr = new Pointer();
+            if (enforce_memory_type) {
+              actual_memory_type.put(0, requested_memory_type);
+            }
+
+            switch (actual_memory_type.get()) {
+              case TRITONSERVER_MEMORY_CPU_PINNED: {
+                int err = cudaSetDevice((int)actual_memory_type_id.get());
+                if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+                    (err != cudaErrorInsufficientDriver)) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unable to recover current CUDA device: " +
+                          cudaGetErrorString(err).getString());
+                }
+
+                err = cudaHostAlloc(allocated_ptr, byte_size, cudaHostAllocPortable);
+                if (err != cudaSuccess) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "cudaHostAlloc failed: " +
+                          cudaGetErrorString(err).getString());
+                }
+                break;
+              }
+
+              case TRITONSERVER_MEMORY_GPU: {
+                int err = cudaSetDevice((int)actual_memory_type_id.get());
+                if ((err != cudaSuccess) && (err != cudaErrorNoDevice) &&
+                    (err != cudaErrorInsufficientDriver)) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "unable to recover current CUDA device: " +
+                          cudaGetErrorString(err).getString());
+                }
+
+                err = cudaMalloc(allocated_ptr, byte_size);
+                if (err != cudaSuccess) {
+                  return TRITONSERVER_ErrorNew(
+                      TRITONSERVER_ERROR_INTERNAL,
+                      "cudaMalloc failed: " + cudaGetErrorString(err).getString());
+                }
+                break;
+              }
+
+              // Use CPU memory if the requested memory type is unknown
+              // (default case).
+              case TRITONSERVER_MEMORY_CPU:
+              default: {
+                actual_memory_type.put(0, TRITONSERVER_MEMORY_CPU);
+                allocated_ptr = Pointer.malloc(byte_size);
+                break;
+              }
+            }
+
+            // Pass the tensor name with buffer_userp so we can show it when
+            // releasing the buffer.
+            if (!allocated_ptr.isNull()) {
+              buffer.put(0, allocated_ptr);
+              buffer_userp.put(0, new BytePointer(tensor_name));
+              System.out.println("allocated " + byte_size + " bytes in "
+                               + TRITONSERVER_MemoryTypeString(actual_memory_type.get())
+                               + " for result tensor " + tensor_name);
+            }
+          }
+
+          return null;  // Success
+        }
+    }
+
+    static class ResponseRelease extends TRITONSERVER_ResponseAllocatorReleaseFn_t {
+        @Override public TRITONSERVER_Error call (
+            TRITONSERVER_ResponseAllocator allocator, Pointer buffer, Pointer buffer_userp,
+            long byte_size, int memory_type, long memory_type_id)
+        {
+          BytePointer name = null;
+          if (buffer_userp != null) {
+            name = new BytePointer(buffer_userp);
+          } else {
+            name = new BytePointer("<unknown>");
+          }
+
+          System.out.println("Releasing buffer " + buffer + " of size " + byte_size
+                           + " in " + TRITONSERVER_MemoryTypeString(memory_type)
+                           + " for result '" + name.getString() + "'");
+          switch (memory_type) {
+            case TRITONSERVER_MEMORY_CPU:
+              Pointer.free(buffer);
+              break;
+            case TRITONSERVER_MEMORY_CPU_PINNED: {
+              int err = cudaSetDevice((int)memory_type_id);
+              if (err == cudaSuccess) {
+                err = cudaFreeHost(buffer);
+              }
+              if (err != cudaSuccess) {
+                System.err.println("error: failed to cudaFree " + buffer + ": "
+                                 + cudaGetErrorString(err));
+              }
+              break;
+            }
+            case TRITONSERVER_MEMORY_GPU: {
+              int err = cudaSetDevice((int)memory_type_id);
+              if (err == cudaSuccess) {
+                err = cudaFree(buffer);
+              }
+              if (err != cudaSuccess) {
+                System.err.println("error: failed to cudaFree " + buffer + ": "
+                                 + cudaGetErrorString(err));
+              }
+              break;
+            }
+            default:
+              System.err.println("error: unexpected buffer allocated in CUDA managed memory");
+              break;
+          }
+
+          name.deallocate();
+
+          return null;  // Success
+        }
+    }
+
+    static class InferRequestComplete extends TRITONSERVER_InferenceRequestReleaseFn_t {
+        @Override public void call (
+            TRITONSERVER_InferenceRequest request, int flags, Pointer userp)
+        {
+          // We reuse the request so we don't delete it here.
+        }
+    }
+
+    static class InferResponseComplete extends TRITONSERVER_InferenceResponseCompleteFn_t {
+        @Override public void call (
+            TRITONSERVER_InferenceResponse response, int flags, Pointer userp)
+        {
+          if (response != null) {
+            // Send 'response' to the future.
+            futures.get(userp).complete(response);
+          }
+        }
+    }
+
+    static ConcurrentHashMap<Pointer, CompletableFuture<TRITONSERVER_InferenceResponse>> futures = new ConcurrentHashMap<>();
+    static ResponseAlloc responseAlloc = new ResponseAlloc();
+    static ResponseRelease responseRelease = new ResponseRelease();
+    static InferRequestComplete inferRequestComplete = new InferRequestComplete();
+    static InferResponseComplete inferResponseComplete = new InferResponseComplete();
+
+    static TRITONSERVER_Error
+    ParseModelMetadata(
+        JsonObject model_metadata, boolean[] is_int,
+        boolean[] is_torch_model)
+    {
+      String seen_data_type = null;
+      for (JsonElement input_element : model_metadata.get("inputs").getAsJsonArray()) {
+        JsonObject input = input_element.getAsJsonObject();
+        if (!input.get("datatype").getAsString().equals("INT32") &&
+            !input.get("datatype").getAsString().equals("FP32")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "simple lib example only supports model with data type INT32 or " +
+              "FP32");
+        }
+        if (seen_data_type == null) {
+          seen_data_type = input.get("datatype").getAsString();
+        } else if (!seen_data_type.equals(input.get("datatype").getAsString())) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "the inputs and outputs of 'simple' model must have the data type");
+        }
+      }
+      for (JsonElement output_element : model_metadata.get("outputs").getAsJsonArray()) {
+        JsonObject output = output_element.getAsJsonObject();
+        if (!output.get("datatype").getAsString().equals("INT32") &&
+            !output.get("datatype").getAsString().equals("FP32")) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_UNSUPPORTED,
+              "simple lib example only supports model with data type INT32 or " +
+              "FP32");
+        } else if (!seen_data_type.equals(output.get("datatype").getAsString())) {
+          return TRITONSERVER_ErrorNew(
+              TRITONSERVER_ERROR_INVALID_ARG,
+              "the inputs and outputs of 'simple' model must have the data type");
+        }
+      }
+
+      is_int[0] = seen_data_type.equals("INT32");
+      is_torch_model[0] =
+          model_metadata.get("platform").getAsString().equals("pytorch_libtorch");
+      return null;
+    }
+
+    static void
+    GenerateInputData(
+        IntPointer[] input0_data, IntPointer[] input1_data)
+    {
+      input0_data[0] = new IntPointer(16);
+      input1_data[0] = new IntPointer(16);
+      for (int i = 0; i < 16; ++i) {
+        input0_data[0].put(i, i);
+        input1_data[0].put(i, 1);
+      }
+    }
+
+    static void
+    GenerateInputData(
+        FloatPointer[] input0_data, FloatPointer[] input1_data)
+    {
+      input0_data[0] = new FloatPointer(16);
+      input1_data[0] = new FloatPointer(16);
+      for (int i = 0; i < 16; ++i) {
+        input0_data[0].put(i, i);
+        input1_data[0].put(i, 1);
+      }
+    }
+
+    static void
+    CompareResult(
+        String output0_name, String output1_name,
+        IntPointer input0, IntPointer input1, IntPointer output0,
+        IntPointer output1)
+    {
+      for (int i = 0; i < 16; ++i) {
+        System.out.println(input0.get(i) + " + " + input1.get(i) + " = "
+                         + output0.get(i));
+        System.out.println(input0.get(i) + " - " + input1.get(i) + " = "
+                         + output1.get(i));
+
+        if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+          FAIL("incorrect sum in " + output0_name);
+        }
+        if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+          FAIL("incorrect difference in " + output1_name);
+        }
+      }
+    }
+
+    static void
+    CompareResult(
+        String output0_name, String output1_name,
+        FloatPointer input0, FloatPointer input1, FloatPointer output0,
+        FloatPointer output1)
+    {
+      for (int i = 0; i < 16; ++i) {
+        System.out.println(input0.get(i) + " + " + input1.get(i) + " = "
+                         + output0.get(i));
+        System.out.println(input0.get(i) + " - " + input1.get(i) + " = "
+                         + output1.get(i));
+
+        if ((input0.get(i) + input1.get(i)) != output0.get(i)) {
+          FAIL("incorrect sum in " + output0_name);
+        }
+        if ((input0.get(i) - input1.get(i)) != output1.get(i)) {
+          FAIL("incorrect difference in " + output1_name);
+        }
+      }
+    }
+
+    static void
+    Check(
+        TRITONSERVER_InferenceResponse response,
+        Pointer input0_data, Pointer input1_data,
+        String output0, String output1,
+        long expected_byte_size,
+        int expected_datatype, boolean is_int)
+    {
+      HashMap<String, Pointer> output_data = new HashMap<>();
+
+      int[] output_count = {0};
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceResponseOutputCount(response, output_count),
+          "getting number of response outputs");
+      if (output_count[0] != 2) {
+        FAIL("expecting 2 response outputs, got " + output_count[0]);
+      }
+
+      for (int idx = 0; idx < output_count[0]; ++idx) {
+        BytePointer cname = new BytePointer((Pointer)null);
+        IntPointer datatype = new IntPointer(1);
+        LongPointer shape = new LongPointer((Pointer)null);
+        LongPointer dim_count = new LongPointer(1);
+        Pointer base = new Pointer();
+        SizeTPointer byte_size = new SizeTPointer(1);
+        IntPointer memory_type = new IntPointer(1);
+        LongPointer memory_type_id = new LongPointer(1);
+        Pointer userp = new Pointer();
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseOutput(
+                response, idx, cname, datatype, shape, dim_count, base,
+                byte_size, memory_type, memory_type_id, userp),
+            "getting output info");
+
+        if (cname.isNull()) {
+          FAIL("unable to get output name");
+        }
+
+        String name = cname.getString();
+        if ((!name.equals(output0)) && (!name.equals(output1))) {
+          FAIL("unexpected output '" + name + "'");
+        }
+
+        if ((dim_count.get() != 2) || (shape.get(0) != 1) || (shape.get(1) != 16)) {
+          FAIL("unexpected shape for '" + name + "'");
+        }
+
+        if (datatype.get() != expected_datatype) {
+          FAIL(
+              "unexpected datatype '" +
+              TRITONSERVER_DataTypeString(datatype.get()) + "' for '" +
+              name + "'");
+        }
+
+        if (byte_size.get() != expected_byte_size) {
+          FAIL(
+              "unexpected byte-size, expected " +
+              expected_byte_size + ", got " +
+              byte_size.get() + " for " + name);
+        }
+
+        if (enforce_memory_type && (memory_type.get() != requested_memory_type)) {
+          FAIL(
+              "unexpected memory type, expected to be allocated in " +
+              TRITONSERVER_MemoryTypeString(requested_memory_type) +
+              ", got " + TRITONSERVER_MemoryTypeString(memory_type.get()) +
+              ", id " + memory_type_id.get() + " for " + name);
+        }
+
+        // We make a copy of the data here... which we could avoid for
+        // performance reasons but ok for this simple example.
+        BytePointer odata = new BytePointer(byte_size);
+        output_data.put(name, odata);
+        switch (memory_type.get()) {
+          case TRITONSERVER_MEMORY_CPU: {
+            System.out.println(name + " is stored in system memory");
+            odata.put(base.limit(byte_size.get()));
+            break;
+          }
+
+          case TRITONSERVER_MEMORY_CPU_PINNED: {
+            System.out.println(name + " is stored in pinned memory");
+            odata.put(base.limit(byte_size.get()));
+            break;
+          }
+
+          case TRITONSERVER_MEMORY_GPU: {
+            System.out.println(name + " is stored in GPU memory");
+            FAIL_IF_CUDA_ERR(
+                cudaMemcpy(odata, base, byte_size.get(), cudaMemcpyDeviceToHost),
+                "getting " + name + " data from GPU memory");
+            break;
+          }
+
+          default:
+            FAIL("unexpected memory type");
+        }
+      }
+
+      if (is_int) {
+        CompareResult(
+            output0, output1, new IntPointer(input0_data), new IntPointer(input1_data),
+            new IntPointer(output_data.get(output0)), new IntPointer(output_data.get(output1)));
+      } else {
+        CompareResult(
+            output0, output1, new FloatPointer(input0_data), new FloatPointer(input1_data),
+            new FloatPointer(output_data.get(output0)), new FloatPointer(output_data.get(output1)));
+      }
+    }
+
+    public static void
+    main(String[] args) throws Exception
+    {
+      String model_repository_path = null;
+      int verbose_level = 0;
+
+      // Parse commandline...
+      for (int i = 0; i < args.length; i++) {
+        switch (args[i]) {
+          case "-m": {
+            enforce_memory_type = true;
+            i++;
+            if (args[i].equals("system")) {
+              requested_memory_type = TRITONSERVER_MEMORY_CPU;
+            } else if (args[i].equals("pinned")) {
+              requested_memory_type = TRITONSERVER_MEMORY_CPU_PINNED;
+            } else if (args[i].equals("gpu")) {
+              requested_memory_type = TRITONSERVER_MEMORY_GPU;
+            } else {
+              Usage(
+                  "-m must be used to specify one of the following types:" +
+                  " <\"system\"|\"pinned\"|gpu>");
+            }
+            break;
+          }
+          case "-r":
+            model_repository_path = args[++i];
+            break;
+          case "-v":
+            verbose_level = 1;
+            break;
+          case "-?":
+            Usage(null);
+            break;
+        }
+      }
+
+      if (model_repository_path == null) {
+        Usage("-r must be used to specify model repository path");
+      }
+      if (enforce_memory_type && requested_memory_type != TRITONSERVER_MEMORY_CPU) {
+        Usage("-m can only be set to \"system\" without enabling GPU");
+      }
+
+      // Check API version.
+      int[] api_version_major = {0}, api_version_minor = {0};
+      FAIL_IF_ERR(
+          TRITONSERVER_ApiVersion(api_version_major, api_version_minor),
+          "getting Triton API version");
+      if ((TRITONSERVER_API_VERSION_MAJOR != api_version_major[0]) ||
+          (TRITONSERVER_API_VERSION_MINOR > api_version_minor[0])) {
+        FAIL("triton server API version mismatch");
+      }
+
+      // Create the server...
+      TRITONSERVER_ServerOptions server_options = new TRITONSERVER_ServerOptions(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsNew(server_options),
+          "creating server options");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+              server_options, model_repository_path),
+          "setting model repository path");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetLogVerbose(server_options, verbose_level),
+          "setting verbose logging level");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetBackendDirectory(
+              server_options, "/opt/tritonserver/backends"),
+          "setting backend directory");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+              server_options, "/opt/tritonserver/repoagents"),
+          "setting repository agent directory");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetStrictModelConfig(server_options, true),
+          "setting strict model configuration");
+      double min_compute_capability = TRITON_MIN_COMPUTE_CAPABILITY;
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+              server_options, min_compute_capability),
+          "setting minimum supported CUDA compute capability");
+
+      TRITONSERVER_Server server_ptr = new TRITONSERVER_Server(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerNew(server_ptr, server_options), "creating server");
+      FAIL_IF_ERR(
+          TRITONSERVER_ServerOptionsDelete(server_options),
+          "deleting server options");
+
+      TRITONSERVER_ServerDeleter server = new TRITONSERVER_ServerDeleter(server_ptr);
+
+      // Wait until the server is both live and ready.
+      int health_iters = 0;
+      while (true) {
+        boolean[] live = {false}, ready = {false};
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerIsLive(server, live),
+            "unable to get server liveness");
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerIsReady(server, ready),
+            "unable to get server readiness");
+        System.out.println("Server Health: live " + live[0] + ", ready " + ready[0]);
+        if (live[0] && ready[0]) {
+          break;
+        }
+
+        if (++health_iters >= 10) {
+          FAIL("failed to find healthy inference server");
+        }
+
+        Thread.sleep(500);
+      }
+
+      // Print status of the server.
+      {
+        TRITONSERVER_Message server_metadata_message = new TRITONSERVER_Message(null);
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerMetadata(server, server_metadata_message),
+            "unable to get server metadata message");
+        BytePointer buffer = new BytePointer((Pointer)null);
+        SizeTPointer byte_size = new SizeTPointer(1);
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageSerializeToJson(
+                server_metadata_message, buffer, byte_size),
+            "unable to serialize server metadata message");
+
+        System.out.println("Server Status:");
+        System.out.println(buffer.limit(byte_size.get()).getString());
+
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageDelete(server_metadata_message),
+            "deleting status metadata");
+      }
+
+      String model_name = "simple";
+
+      // Wait for the model to become available.
+      boolean[] is_torch_model = {false};
+      boolean[] is_int = {true};
+      boolean[] is_ready = {false};
+      health_iters = 0;
+      while (!is_ready[0]) {
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerModelIsReady(
+                server, model_name, 1, is_ready),
+            "unable to get model readiness");
+        if (!is_ready[0]) {
+          if (++health_iters >= 10) {
+            FAIL("model failed to be ready in 10 iterations");
+          }
+          Thread.sleep(500);
+          continue;
+        }
+
+        TRITONSERVER_Message model_metadata_message = new TRITONSERVER_Message(null);
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerModelMetadata(
+                server, model_name, 1, model_metadata_message),
+            "unable to get model metadata message");
+        BytePointer buffer = new BytePointer((Pointer)null);
+        SizeTPointer byte_size = new SizeTPointer(1);
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageSerializeToJson(
+                model_metadata_message, buffer, byte_size),
+            "unable to serialize model status protobuf");
+
+        JsonParser parser = new JsonParser();
+        JsonObject model_metadata = null;
+        try {
+          model_metadata = parser.parse(buffer.limit(byte_size.get()).getString()).getAsJsonObject();
+        } catch (Exception e) {
+          FAIL("error: failed to parse model metadata from JSON: " + e);
+        }
+
+        FAIL_IF_ERR(
+            TRITONSERVER_MessageDelete(model_metadata_message),
+            "deleting status protobuf");
+
+        if (!model_metadata.get("name").getAsString().equals(model_name)) {
+          FAIL("unable to find metadata for model");
+        }
+
+        boolean found_version = false;
+        if (model_metadata.has("versions")) {
+          for (JsonElement version : model_metadata.get("versions").getAsJsonArray()) {
+            if (version.getAsString().equals("1")) {
+              found_version = true;
+              break;
+            }
+          }
+        }
+        if (!found_version) {
+          FAIL("unable to find version 1 status for model");
+        }
+
+        FAIL_IF_ERR(
+            ParseModelMetadata(model_metadata, is_int, is_torch_model),
+            "parsing model metadata");
+      }
+
+      // Create the allocator that will be used to allocate buffers for
+      // the result tensors.
+      TRITONSERVER_ResponseAllocator allocator = new TRITONSERVER_ResponseAllocator(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_ResponseAllocatorNew(
+              allocator, responseAlloc, responseRelease, null /* start_fn */),
+          "creating response allocator");
+
+      // Inference
+      TRITONSERVER_InferenceRequest irequest = new TRITONSERVER_InferenceRequest(null);
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestNew(
+              irequest, server, model_name, -1 /* model_version */),
+          "creating inference request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetId(irequest, "my_request_id"),
+          "setting ID for the request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestSetReleaseCallback(
+              irequest, inferRequestComplete, null /* request_release_userp */),
+          "setting request release callback");
+
+      // Inputs
+      String input0 = is_torch_model[0] ? "INPUT__0" : "INPUT0";
+      String input1 = is_torch_model[0] ? "INPUT__1" : "INPUT1";
+
+      long[] input0_shape = {1, 16};
+      long[] input1_shape = {1, 16};
+
+      int datatype =
+          (is_int[0]) ? TRITONSERVER_TYPE_INT32 : TRITONSERVER_TYPE_FP32;
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddInput(
+              irequest, input0, datatype, input0_shape, input0_shape.length),
+          "setting input 0 meta-data for the request");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddInput(
+              irequest, input1, datatype, input1_shape, input1_shape.length),
+          "setting input 1 meta-data for the request");
+
+      String output0 = is_torch_model[0] ? "OUTPUT__0" : "OUTPUT0";
+      String output1 = is_torch_model[0] ? "OUTPUT__1" : "OUTPUT1";
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output0),
+          "requesting output 0 for the request");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAddRequestedOutput(irequest, output1),
+          "requesting output 1 for the request");
+
+      // Create the data for the two input tensors. Initialize the first
+      // to unique values and the second to all ones.
+      BytePointer input0_data;
+      BytePointer input1_data;
+      if (is_int[0]) {
+        IntPointer[] p0 = {null}, p1 = {null};
+        GenerateInputData(p0, p1);
+        input0_data = p0[0].getPointer(BytePointer.class);
+        input1_data = p1[0].getPointer(BytePointer.class);
+      } else {
+        FloatPointer[] p0 = {null}, p1 = {null};
+        GenerateInputData(p0, p1);
+        input0_data = p0[0].getPointer(BytePointer.class);
+        input1_data = p1[0].getPointer(BytePointer.class);
+      }
+
+      long input0_size = input0_data.limit();
+      long input1_size = input1_data.limit();
+
+      Pointer input0_base = input0_data;
+      Pointer input1_base = input1_data;
+      CudaDataDeleter input0_gpu = new CudaDataDeleter();
+      CudaDataDeleter input1_gpu = new CudaDataDeleter();
+      boolean use_cuda_memory =
+          (enforce_memory_type &&
+           (requested_memory_type != TRITONSERVER_MEMORY_CPU));
+      if (use_cuda_memory) {
+        FAIL_IF_CUDA_ERR(cudaSetDevice(0), "setting CUDA device to device 0");
+        if (requested_memory_type != TRITONSERVER_MEMORY_CPU_PINNED) {
+          Pointer dst = new Pointer();
+          FAIL_IF_CUDA_ERR(
+              cudaMalloc(dst, input0_size),
+              "allocating GPU memory for INPUT0 data");
+          input0_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input0_data, input0_size, cudaMemcpyHostToDevice),
+              "setting INPUT0 data in GPU memory");
+          FAIL_IF_CUDA_ERR(
+              cudaMalloc(dst, input1_size),
+              "allocating GPU memory for INPUT1 data");
+          input1_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input1_data, input1_size, cudaMemcpyHostToDevice),
+              "setting INPUT1 data in GPU memory");
+        } else {
+          Pointer dst = new Pointer();
+          FAIL_IF_CUDA_ERR(
+              cudaHostAlloc(dst, input0_size, cudaHostAllocPortable),
+              "allocating pinned memory for INPUT0 data");
+          input0_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input0_data, input0_size, cudaMemcpyHostToHost),
+              "setting INPUT0 data in pinned memory");
+          FAIL_IF_CUDA_ERR(
+              cudaHostAlloc(dst, input1_size, cudaHostAllocPortable),
+              "allocating pinned memory for INPUT1 data");
+          input1_gpu.reset(dst);
+          FAIL_IF_CUDA_ERR(
+              cudaMemcpy(dst, input1_data, input1_size, cudaMemcpyHostToHost),
+              "setting INPUT1 data in pinned memory");
+        }
+      }
+
+      input0_base = use_cuda_memory ? input0_gpu : input0_data;
+      input1_base = use_cuda_memory ? input1_gpu : input1_data;
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input0, input0_base, input0_size, requested_memory_type,
+              0 /* memory_type_id */),
+          "assigning INPUT0 data");
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestAppendInputData(
+              irequest, input1, input1_base, input1_size, requested_memory_type,
+              0 /* memory_type_id */),
+          "assigning INPUT1 data");
+
+      // Perform inference...
+      {
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      // Modify some input data in place and then reuse the request
+      // object. For simplicity we only do this when the input tensors are
+      // in non-pinned system memory.
+      if (!enforce_memory_type ||
+          (requested_memory_type == TRITONSERVER_MEMORY_CPU)) {
+        if (is_int[0]) {
+          new IntPointer(input0_data).put(0, 27);
+        } else {
+          new FloatPointer(input0_data).put(0, 27.0f);
+        }
+
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        // Using a new promise so have to re-register the callback to set
+        // the promise as the userp.
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        Check(
+            completed_response, input0_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      // Remove input data and then add back different data.
+      {
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestRemoveAllInputData(irequest, input0),
+            "removing INPUT0 data");
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestAppendInputData(
+                irequest, input0, input1_base, input1_size, requested_memory_type,
+                0 /* memory_type_id */),
+            "assigning INPUT1 data to INPUT0");
+
+        CompletableFuture<TRITONSERVER_InferenceResponse> completed = new CompletableFuture<>();
+        futures.put(irequest, completed);
+
+        // Using a new promise so have to re-register the callback to set
+        // the promise as the userp.
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceRequestSetResponseCallback(
+                irequest, allocator, null /* response_allocator_userp */,
+                inferResponseComplete, irequest),
+            "setting response callback");
+
+        FAIL_IF_ERR(
+            TRITONSERVER_ServerInferAsync(
+                server, irequest, null /* trace */),
+            "running inference");
+
+        // Wait for the inference to complete.
+        TRITONSERVER_InferenceResponse completed_response = completed.get();
+        futures.remove(irequest);
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseError(completed_response),
+            "response status");
+
+        // Both inputs are using input1_data...
+        Check(
+            completed_response, input1_data, input1_data, output0, output1,
+            input0_size, datatype, is_int[0]);
+
+        FAIL_IF_ERR(
+            TRITONSERVER_InferenceResponseDelete(completed_response),
+            "deleting inference response");
+      }
+
+      FAIL_IF_ERR(
+          TRITONSERVER_InferenceRequestDelete(irequest),
+          "deleting inference request");
+
+      FAIL_IF_ERR(
+          TRITONSERVER_ResponseAllocatorDelete(allocator),
+          "deleting response allocator");
+
+      System.exit(0);
+    }
+}
diff --git a/tritonserver/samples/pom.xml b/tritonserver/samples/pom.xml
new file mode 100644
index 00000000000..42d28fe6658
--- /dev/null
+++ b/tritonserver/samples/pom.xml
@@ -0,0 +1,26 @@
+<project>
+    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.bytedeco.tritonserver</groupId>
+    <artifactId>simple</artifactId>
+    <version>1.5.7-SNAPSHOT</version>
+    <properties>
+        <exec.mainClass>Simple</exec.mainClass>
+        <maven.compiler.source>1.8</maven.compiler.source>
+        <maven.compiler.target>1.8</maven.compiler.target>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.bytedeco</groupId>
+            <artifactId>tritonserver-platform</artifactId>
+            <version>2.14-1.5.7-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>com.google.code.gson</groupId>
+            <artifactId>gson</artifactId>
+            <version>2.8.8</version>
+        </dependency>
+    </dependencies>
+    <build>
+        <sourceDirectory>.</sourceDirectory>
+    </build>
+</project>
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
new file mode 100644
index 00000000000..af6a220e876
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/global/tritonserver.java
@@ -0,0 +1,4356 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.global;
+
+import org.bytedeco.tritonserver.tritonserver.*;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+public class tritonserver extends org.bytedeco.tritonserver.presets.tritonserver {
+    static { Loader.load(); }
+
+// Parsed from tritonserver.h
+
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+/** \file */
+
+// #include <stdbool.h>
+// #include <stddef.h>
+// #include <stdint.h>
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONSERVER
+// #if defined(_MSC_VER)
+// #define TRITONSERVER_DECLSPEC __declspec(dllexport)
+// #elif defined(__GNUC__)
+// #define TRITONSERVER_DECLSPEC __attribute__((__visibility__("default")))
+// #else
+// #define TRITONSERVER_DECLSPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONSERVER_DECLSPEC __declspec(dllimport)
+// #else
+// #define TRITONSERVER_DECLSPEC
+// Targeting ../tritonserver/TRITONSERVER_Error.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequest.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponse.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceTrace.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Message.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Metrics.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocator.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_Server.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ServerOptions.java
+
+
+
+/**
+ *  TRITONSERVER API Version
+ * 
+ *  The TRITONSERVER API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  client should check that the API version used to compile the
+ *  client is compatible with the API version of the Triton shared
+ *  library that it is linking against. This is typically done by code
+ *  similar to the following which makes sure that the major versions
+ *  are equal and that the minor version of the Triton shared library
+ *  is >= the minor version used to build the client.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONSERVER_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONSERVER_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONSERVER_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton server API version does not support this client");
+ *    }
+ *  */
+public static final int TRITONSERVER_API_VERSION_MAJOR = 1;
+
+///
+public static final int TRITONSERVER_API_VERSION_MINOR = 4;
+
+/** Get the TRITONBACKEND API version supported by the Triton shared
+ *  library. This value can be compared against the
+ *  TRITONSERVER_API_VERSION_MAJOR and TRITONSERVER_API_VERSION_MINOR
+ *  used to build the client to ensure that Triton shared library is
+ *  compatible with the client.
+ * 
+ *  @param major Returns the TRITONSERVER API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONSERVER API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONSERVER_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONSERVER_DataType
+ * 
+ *  Tensor data types recognized by TRITONSERVER.
+ *  */
+/** enum TRITONSERVER_DataType */
+public static final int
+  TRITONSERVER_TYPE_INVALID = 0,
+  TRITONSERVER_TYPE_BOOL = 1,
+  TRITONSERVER_TYPE_UINT8 = 2,
+  TRITONSERVER_TYPE_UINT16 = 3,
+  TRITONSERVER_TYPE_UINT32 = 4,
+  TRITONSERVER_TYPE_UINT64 = 5,
+  TRITONSERVER_TYPE_INT8 = 6,
+  TRITONSERVER_TYPE_INT16 = 7,
+  TRITONSERVER_TYPE_INT32 = 8,
+  TRITONSERVER_TYPE_INT64 = 9,
+  TRITONSERVER_TYPE_FP16 = 10,
+  TRITONSERVER_TYPE_FP32 = 11,
+  TRITONSERVER_TYPE_FP64 = 12,
+  TRITONSERVER_TYPE_BYTES = 13;
+
+/** Get the string representation of a data type. The returned string
+ *  is not owned by the caller and so should not be modified or freed.
+ * 
+ *  @param datatype The data type.
+ *  @return The string representation of the data type. */
+
+///
+public static native String TRITONSERVER_DataTypeString(
+    @Cast("TRITONSERVER_DataType") int datatype);
+
+/** Get the Triton datatype corresponding to a string representation
+ *  of a datatype.
+ * 
+ *  @param dtype The datatype string representation.
+ *  @return The Triton data type or TRITONSERVER_TYPE_INVALID if the
+ *  string does not represent a data type. */
+
+///
+public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(String dtype);
+public static native @Cast("TRITONSERVER_DataType") int TRITONSERVER_StringToDataType(@Cast("const char*") BytePointer dtype);
+
+/** Get the size of a Triton datatype in bytes. Zero is returned for
+ *  TRITONSERVER_TYPE_BYTES because it have variable size. Zero is
+ *  returned for TRITONSERVER_TYPE_INVALID.
+ * 
+ *  @param dtype The datatype.
+ *  @return The size of the datatype. */
+
+///
+///
+public static native @Cast("uint32_t") int TRITONSERVER_DataTypeByteSize(@Cast("TRITONSERVER_DataType") int datatype);
+
+/** TRITONSERVER_MemoryType
+ * 
+ *  Types of memory recognized by TRITONSERVER.
+ *  */
+/** enum TRITONSERVER_MemoryType */
+public static final int
+  TRITONSERVER_MEMORY_CPU = 0,
+  TRITONSERVER_MEMORY_CPU_PINNED = 1,
+  TRITONSERVER_MEMORY_GPU = 2;
+
+/** Get the string representation of a memory type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param memtype The memory type.
+ *  @return The string representation of the memory type. */
+
+///
+///
+public static native String TRITONSERVER_MemoryTypeString(
+    @Cast("TRITONSERVER_MemoryType") int memtype);
+
+/** TRITONSERVER_ParameterType
+ * 
+ *  Types of parameters recognized by TRITONSERVER.
+ *  */
+/** enum TRITONSERVER_ParameterType */
+public static final int
+  TRITONSERVER_PARAMETER_STRING = 0,
+  TRITONSERVER_PARAMETER_INT = 1,
+  TRITONSERVER_PARAMETER_BOOL = 2;
+
+/** Get the string representation of a parmeter type. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param paramtype The parameter type.
+ *  @return The string representation of the parameter type. */
+
+///
+///
+public static native String TRITONSERVER_ParameterTypeString(
+    @Cast("TRITONSERVER_ParameterType") int paramtype);
+
+/** TRITONSERVER_InstanceGroupKind
+ * 
+ *  Kinds of instance groups recognized by TRITONSERVER.
+ *  */
+/** enum TRITONSERVER_InstanceGroupKind */
+public static final int
+  TRITONSERVER_INSTANCEGROUPKIND_AUTO = 0,
+  TRITONSERVER_INSTANCEGROUPKIND_CPU = 1,
+  TRITONSERVER_INSTANCEGROUPKIND_GPU = 2,
+  TRITONSERVER_INSTANCEGROUPKIND_MODEL = 3;
+
+/** Get the string representation of an instance-group kind. The
+ *  returned string is not owned by the caller and so should not be
+ *  modified or freed.
+ * 
+ *  @param kind The instance-group kind.
+ *  @return The string representation of the kind. */
+
+///
+///
+public static native String TRITONSERVER_InstanceGroupKindString(
+    @Cast("TRITONSERVER_InstanceGroupKind") int kind);
+
+/** TRITONSERVER_Logging
+ * 
+ *  Types/levels of logging.
+ *  */
+/** enum TRITONSERVER_LogLevel */
+public static final int
+  TRITONSERVER_LOG_INFO = 0,
+  TRITONSERVER_LOG_WARN = 1,
+  TRITONSERVER_LOG_ERROR = 2,
+  TRITONSERVER_LOG_VERBOSE = 3;
+
+/** Is a log level enabled?
+ * 
+ *  @param level The log level.
+ *  @return True if the log level is enabled, false if not enabled. */
+
+///
+public static native @Cast("bool") boolean TRITONSERVER_LogIsEnabled(
+    @Cast("TRITONSERVER_LogLevel") int level);
+
+/** Log a message at a given log level if that level is enabled.
+ * 
+ *  @param level The log level.
+ *  @param filename The file name of the location of the log message.
+ *  @param line The line number of the log message.
+ *  @param msg The log message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    @Cast("TRITONSERVER_LogLevel") int level, String filename, int line,
+    String msg);
+public static native TRITONSERVER_Error TRITONSERVER_LogMessage(
+    @Cast("TRITONSERVER_LogLevel") int level, @Cast("const char*") BytePointer filename, int line,
+    @Cast("const char*") BytePointer msg);
+
+/** TRITONSERVER_Error
+ * 
+ *  Errors are reported by a TRITONSERVER_Error object. A NULL
+ *  TRITONSERVER_Error indicates no error, a non-NULL TRITONSERVER_Error
+ *  indicates error and the code and message for the error can be
+ *  retrieved from the object.
+ * 
+ *  The caller takes ownership of a TRITONSERVER_Error object returned by
+ *  the API and must call TRITONSERVER_ErrorDelete to release the object.
+ * 
+ <p>
+ *  The TRITONSERVER_Error error codes */
+/** enum TRITONSERVER_Error_Code */
+public static final int
+  TRITONSERVER_ERROR_UNKNOWN = 0,
+  TRITONSERVER_ERROR_INTERNAL = 1,
+  TRITONSERVER_ERROR_NOT_FOUND = 2,
+  TRITONSERVER_ERROR_INVALID_ARG = 3,
+  TRITONSERVER_ERROR_UNAVAILABLE = 4,
+  TRITONSERVER_ERROR_UNSUPPORTED = 5,
+  TRITONSERVER_ERROR_ALREADY_EXISTS = 6;
+
+/** Create a new error object. The caller takes ownership of the
+ *  TRITONSERVER_Error object and must call TRITONSERVER_ErrorDelete to
+ *  release the object.
+ * 
+ *  @param code The error code.
+ *  @param msg The error message.
+ *  @return A new TRITONSERVER_Error object. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    @Cast("TRITONSERVER_Error_Code") int code, String msg);
+public static native TRITONSERVER_Error TRITONSERVER_ErrorNew(
+    @Cast("TRITONSERVER_Error_Code") int code, @Cast("const char*") BytePointer msg);
+
+/** Delete an error object.
+ * 
+ *  @param error The error object. */
+
+///
+public static native void TRITONSERVER_ErrorDelete(TRITONSERVER_Error error);
+
+/** Get the error code.
+ * 
+ *  @param error The error object.
+ *  @return The error code. */
+
+///
+public static native @Cast("TRITONSERVER_Error_Code") int TRITONSERVER_ErrorCode(TRITONSERVER_Error error);
+
+/** Get the string representation of an error code. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed. The lifetime of the returned string extends only as long as
+ *  'error' and must not be accessed once 'error' is deleted.
+ * 
+ *  @param error The error object.
+ *  @return The string representation of the error code. */
+
+///
+public static native String TRITONSERVER_ErrorCodeString(
+    TRITONSERVER_Error error);
+
+/** Get the error message. The returned string is not owned by the
+ *  caller and so should not be modified or freed. The lifetime of the
+ *  returned string extends only as long as 'error' and must not be
+ *  accessed once 'error' is deleted.
+ * 
+ *  @param error The error object.
+ *  @return The error message. */
+
+///
+///
+///
+public static native String TRITONSERVER_ErrorMessage(
+    TRITONSERVER_Error error);
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
+
+
+
+/** Create a new response allocator object.
+ * 
+ *  The response allocator object is used by Triton to allocate
+ *  buffers to hold the output tensors in inference responses. Most
+ *  models generate a single response for each inference request
+ *  (TRITONSERVER_TXN_ONE_TO_ONE). For these models the order of
+ *  callbacks will be:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn : optional (and typically not required)
+ *     - alloc_fn : called once for each output tensor in response
+ *    TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in response
+ * 
+ *  For models that generate multiple responses for each inference
+ *  request (TRITONSERVER_TXN_DECOUPLED), the start_fn callback can be
+ *  used to determine sets of alloc_fn callbacks that belong to the
+ *  same response:
+ * 
+ *    TRITONSERVER_ServerInferAsync called
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *     - start_fn
+ *     - alloc_fn : called once for each output tensor in response
+ *       ...
+ *    For each response, TRITONSERVER_InferenceResponseDelete called
+ *     - release_fn: called once for each output tensor in the response
+ * 
+ *  In all cases the start_fn, alloc_fn and release_fn callback
+ *  functions must be thread-safe. Typically making these functions
+ *  thread-safe does not require explicit locking. The recommended way
+ *  to implement these functions is to have each inference request
+ *  provide a 'response_allocator_userp' object that is unique to that
+ *  request with TRITONSERVER_InferenceRequestSetResponseCallback. The
+ *  callback functions then operate only on this unique state. Locking
+ *  is required only when the callback function needs to access state
+ *  that is shared across inference requests (for example, a common
+ *  allocation pool).
+ * 
+ *  @param allocator Returns the new response allocator object.
+ *  @param alloc_fn The function to call to allocate buffers for result
+ *  tensors.
+ *  @param release_fn The function to call when the server no longer
+ *  holds a reference to an allocated buffer.
+ *  @param start_fn The function to call to indicate that the
+ *  subsequent 'alloc_fn' calls are for a new response. This callback
+ *  is optional (use nullptr to indicate that it should not be
+ *  invoked).
+ <p>
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @Cast("TRITONSERVER_ResponseAllocator**") PointerPointer allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorNew(
+    @ByPtrPtr TRITONSERVER_ResponseAllocator allocator,
+    TRITONSERVER_ResponseAllocatorAllocFn_t alloc_fn,
+    TRITONSERVER_ResponseAllocatorReleaseFn_t release_fn,
+    TRITONSERVER_ResponseAllocatorStartFn_t start_fn);
+
+/** Delete a response allocator.
+ * 
+ *  @param allocator The response allocator object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ResponseAllocatorDelete(
+    TRITONSERVER_ResponseAllocator allocator);
+
+/** TRITONSERVER_Message
+ * 
+ *  Object representing a Triton Server message.
+ * 
+ <p>
+ *  Create a new message object from serialized JSON string.
+ * 
+ *  @param message The message object.
+ *  @param base The base of the serialized JSON.
+ *  @param byte_size The size, in bytes, of the serialized message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @Cast("TRITONSERVER_Message**") PointerPointer message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, String base, @Cast("size_t") long byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageNewFromSerializedJson(
+    @ByPtrPtr TRITONSERVER_Message message, @Cast("const char*") BytePointer base, @Cast("size_t") long byte_size);
+
+/** Delete a message object.
+ * 
+ *  @param message The message object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageDelete(
+    TRITONSERVER_Message message);
+
+/** Get the base and size of the buffer containing the serialized
+ *  message in JSON format. The buffer is owned by the
+ *  TRITONSERVER_Message object and should not be modified or freed by
+ *  the caller. The lifetime of the buffer extends only as long as
+ *  'message' and must not be accessed once 'message' is deleted.
+ * 
+ *  @param message The message object.
+ *  @param base Returns the base of the serialized message.
+ *  @param byte_size Returns the size, in bytes, of the serialized
+ *  message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MessageSerializeToJson(
+    TRITONSERVER_Message message, @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+
+/** TRITONSERVER_Metrics
+ * 
+ *  Object representing metrics.
+ * 
+ <p>
+ *  Metric format types */
+/** enum TRITONSERVER_MetricFormat */
+public static final int
+  TRITONSERVER_METRIC_PROMETHEUS = 0;
+
+/** Delete a metrics object.
+ * 
+ *  @param metrics The metrics object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MetricsDelete(
+    TRITONSERVER_Metrics metrics);
+
+/** Get a buffer containing the metrics in the specified format. For
+ *  each format the buffer contains the following:
+ * 
+ *    TRITONSERVER_METRIC_PROMETHEUS: 'base' points to a single multiline
+ *    string (char*) that gives a text representation of the metrics in
+ *    prometheus format. 'byte_size' returns the length of the string
+ *    in bytes.
+ * 
+ *  The buffer is owned by the 'metrics' object and should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  extends only as long as 'metrics' and must not be accessed once
+ *  'metrics' is deleted.
+ * 
+ *  @param metrics The metrics object.
+ *  @param format The format to use for the returned metrics.
+ *  @param base Returns a pointer to the base of the formatted
+ *  metrics, as described above.
+ *  @param byte_size Returns the size, in bytes, of the formatted
+ *  metrics.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr BytePointer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr ByteBuffer base, @Cast("size_t*") SizeTPointer byte_size);
+public static native TRITONSERVER_Error TRITONSERVER_MetricsFormatted(
+    TRITONSERVER_Metrics metrics, @Cast("TRITONSERVER_MetricFormat") int format,
+    @Cast("const char**") @ByPtrPtr byte[] base, @Cast("size_t*") SizeTPointer byte_size);
+
+/** TRITONSERVER_InferenceTrace
+ * 
+ *  Object that represents tracing for an inference request.
+ * 
+ <p>
+ *  Trace levels */
+/** enum TRITONSERVER_InferenceTraceLevel */
+public static final int
+  TRITONSERVER_TRACE_LEVEL_DISABLED = 0,
+  TRITONSERVER_TRACE_LEVEL_MIN = 1,
+  TRITONSERVER_TRACE_LEVEL_MAX = 2;
+
+/** Get the string representation of a trace level. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param level The trace level.
+ *  @return The string representation of the trace level. */
+public static native String TRITONSERVER_InferenceTraceLevelString(
+    @Cast("TRITONSERVER_InferenceTraceLevel") int level);
+
+// Trace activities
+/** enum TRITONSERVER_InferenceTraceActivity */
+public static final int
+  TRITONSERVER_TRACE_REQUEST_START = 0,
+  TRITONSERVER_TRACE_QUEUE_START = 1,
+  TRITONSERVER_TRACE_COMPUTE_START = 2,
+  TRITONSERVER_TRACE_COMPUTE_INPUT_END = 3,
+  TRITONSERVER_TRACE_COMPUTE_OUTPUT_START = 4,
+  TRITONSERVER_TRACE_COMPUTE_END = 5,
+  TRITONSERVER_TRACE_REQUEST_END = 6;
+
+/** Get the string representation of a trace activity. The returned
+ *  string is not owned by the caller and so should not be modified or
+ *  freed.
+ * 
+ *  @param activity The trace activity.
+ *  @return The string representation of the trace activity. */
+public static native String TRITONSERVER_InferenceTraceActivityString(
+    @Cast("TRITONSERVER_InferenceTraceActivity") int activity);
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
+
+
+
+/** Create a new inference trace object. The caller takes ownership of
+ *  the TRITONSERVER_InferenceTrace object and must call
+ *  TRITONSERVER_InferenceTraceDelete to release the object.
+ * 
+ *  The activity callback function will be called to report activity
+ *  for 'trace' as well as for any child traces that are spawned by
+ *  'trace', and so the activity callback must check the trace object
+ *  to determine specifically what activity is being reported.
+ * 
+ *  The release callback is called for both 'trace' and for any child
+ *  traces spawned by 'trace'.
+ * 
+ *  @param trace Returns the new inference trace object.
+ *  @param level The tracing level.
+ *  @param parent_id The parent trace id for this trace. A value of 0
+ *  indicates that there is not parent trace.
+ *  @param activity_fn The callback function where activity for the
+ *  trace is reported.
+ *  @param release_fn The callback function called when all activity
+ *  is complete for the trace.
+ *  @param trace_userp User-provided pointer that is delivered to
+ *  the activity and release callback functions.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @Cast("TRITONSERVER_InferenceTrace**") PointerPointer trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceNew(
+    @ByPtrPtr TRITONSERVER_InferenceTrace trace, @Cast("TRITONSERVER_InferenceTraceLevel") int level,
+    @Cast("uint64_t") long parent_id, TRITONSERVER_InferenceTraceActivityFn_t activity_fn,
+    TRITONSERVER_InferenceTraceReleaseFn_t release_fn, Pointer trace_userp);
+
+/** Delete a trace object.
+ * 
+ *  @param trace The trace object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceDelete(
+    TRITONSERVER_InferenceTrace trace);
+
+/** Get the id associated with a trace. Every trace is assigned an id
+ *  that is unique across all traces created for a Triton server.
+ * 
+ *  @param trace The trace.
+ *  @param id Returns the id associated with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] id);
+
+/** Get the parent id associated with a trace. The parent id indicates
+ *  a parent-child relationship between two traces. A parent id value
+ *  of 0 indicates that there is no parent trace.
+ * 
+ *  @param trace The trace.
+ *  @param id Returns the parent id associated with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongPointer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") LongBuffer parent_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceParentId(
+    TRITONSERVER_InferenceTrace trace, @Cast("uint64_t*") long[] parent_id);
+
+/** Get the name of the model associated with a trace. The caller does
+ *  not own the returned string and must not modify or delete it. The
+ *  lifetime of the returned string extends only as long as 'trace'.
+ * 
+ *  @param trace The trace.
+ *  @param model_name Returns the name of the model associated with
+ *  the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") PointerPointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr BytePointer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr ByteBuffer model_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelName(
+    TRITONSERVER_InferenceTrace trace, @Cast("const char**") @ByPtrPtr byte[] model_name);
+
+/** Get the version of the model associated with a trace.
+ * 
+ *  @param trace The trace.
+ *  @param model_version Returns the version of the model associated
+ *  with the trace.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceTraceModelVersion(
+    TRITONSERVER_InferenceTrace trace, @Cast("int64_t*") long[] model_version);
+
+/** TRITONSERVER_InferenceRequest
+ * 
+ *  Object representing an inference request. The inference request
+ *  provides the meta-data and input tensor values needed for an
+ *  inference and returns the inference result meta-data and output
+ *  tensors. An inference request object can be modified and reused
+ *  multiple times.
+ * 
+ <p>
+ *  Inference request flags. The enum values must be power-of-2 values. */
+/** enum TRITONSERVER_RequestFlag */
+public static final int
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_START = 1,
+  TRITONSERVER_REQUEST_FLAG_SEQUENCE_END = 2;
+
+/** Inference request release flags. The enum values must be
+ *  power-of-2 values. */
+/** enum TRITONSERVER_RequestReleaseFlag */
+public static final int
+  TRITONSERVER_REQUEST_RELEASE_ALL = 1;
+
+/** Inference response complete flags. The enum values must be
+ *  power-of-2 values. */
+/** enum TRITONSERVER_ResponseCompleteFlag */
+public static final int
+  TRITONSERVER_RESPONSE_COMPLETE_FINAL = 1;
+// Targeting ../tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
+
+
+// Targeting ../tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
+
+
+
+/** Create a new inference request object.
+ * 
+ *  @param inference_request Returns the new request object.
+ *  @param server the inference server object.
+ *  @param model_name The name of the model to use for the request.
+ *  @param model_version The version of the model to use for the
+ *  request. If -1 then the server will choose a version based on the
+ *  model's policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @Cast("TRITONSERVER_InferenceRequest**") PointerPointer inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestNew(
+    @ByPtrPtr TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version);
+
+/** Delete an inference request object.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestDelete(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Get the ID for a request. The returned ID is owned by
+ *  'inference_request' and must not be modified or freed by the
+ *  caller.
+ * 
+ *  @param inference_request The request object.
+ *  @param id Returns the ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") PointerPointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char**") @ByPtrPtr byte[] id);
+
+/** Set the ID for a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param id The ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, String id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer id);
+
+/** Get the flag(s) associated with a request. On return 'flags' holds
+ *  a bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
+ * 
+ *  @param inference_request The request object.
+ *  @param flags Returns the flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer flags);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] flags);
+
+/** Set the flag(s) associated with a request. 'flags' should holds a
+ *  bitwise-or of all flag values, see TRITONSERVER_RequestFlag for
+ *  available flags.
+ * 
+ *  @param inference_request The request object.
+ *  @param flags The flags.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetFlags(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int flags);
+
+/** Get the correlation ID of the inference request. Default is 0,
+ *  which indictes that the request has no correlation ID. The
+ *  correlation ID is used to indicate two or more inference request
+ *  are related to each other. How this relationship is handled by the
+ *  inference server is determined by the model's scheduling
+ *  policy.
+ * 
+ *  @param inference_request The request object.
+ *  @param correlation_id Returns the correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer correlation_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] correlation_id);
+
+/** Set the correlation ID of the inference request. Default is 0, which
+ *  indictes that the request has no correlation ID. The correlation ID
+ *  is used to indicate two or more inference request are related to
+ *  each other. How this relationship is handled by the inference
+ *  server is determined by the model's scheduling policy.
+ * 
+ *  @param inference_request The request object.
+ *  @param correlation_id The correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetCorrelationId(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long correlation_id);
+
+/** Get the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
+ * 
+ *  @param inference_request The request object.
+ *  @param priority Returns the priority level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntPointer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") IntBuffer priority);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t*") int[] priority);
+
+/** Set the priority for a request. The default is 0 indicating that
+ *  the request does not specify a priority and so will use the
+ *  model's default priority.
+ * 
+ *  @param inference_request The request object.
+ *  @param priority The priority level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetPriority(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint32_t") int priority);
+
+/** Get the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
+ * 
+ *  @param inference_request The request object.
+ *  @param timeout_us Returns the timeout, in microseconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongPointer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") LongBuffer timeout_us);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t*") long[] timeout_us);
+
+/** Set the timeout for a request, in microseconds. The default is 0
+ *  which indicates that the request has no timeout.
+ * 
+ *  @param inference_request The request object.
+ *  @param timeout_us The timeout, in microseconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetTimeoutMicroseconds(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("uint64_t") long timeout_us);
+
+/** Add an input to a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param datatype The type of the input. Valid type names are BOOL,
+ *  UINT8, UINT16, UINT32, UINT64, INT8, INT16, INT32, INT64, FP16,
+ *  FP32, FP64, and BYTES.
+ *  @param shape The shape of the input.
+ *  @param dim_count The number of dimensions of 'shape'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongPointer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") LongBuffer shape,
+    @Cast("uint64_t") long dim_count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Cast("const TRITONSERVER_DataType") int datatype, @Cast("const int64_t*") long[] shape,
+    @Cast("uint64_t") long dim_count);
+
+/** Remove an input from a request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveInput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove all inputs from a request.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputs(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Assign a buffer of data to an input. The buffer will be appended
+ *  to any existing buffers for that input. The 'inference_request'
+ *  object takes ownership of the buffer and so the caller should not
+ *  modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed
+ *  from 'inference_request'.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id);
+
+/** Assign a buffer of data to an input for execution on all model instances
+ *  with the specified host policy. The buffer will be appended to any existing
+ *  buffers for that input on all devices with this host policy. The
+ *  'inference_request' object takes ownership of the buffer and so the caller
+ *  should not modify or free the buffer until that ownership is released by
+ *  'inference_request' being deleted or by the input being removed from
+ *  'inference_request'. If the execution is scheduled on a device that does not
+ *  have a input buffer specified using this function, then the input buffer
+ *  specified with TRITONSERVER_InferenceRequestAppendInputData will be used so
+ *  a non-host policy specific version of data must be added using that API.
+ *  @param inference_request The request object.
+ *  @param name The name of the input.
+ *  @param base The base address of the input data.
+ *  @param byte_size The size, in bytes, of the input data.
+ *  @param memory_type The memory type of the input data.
+ *  @param memory_type_id The memory type id of the input data.
+ *  @param host_policy_name All model instances executing with this host_policy
+ *  will use this input buffer for execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, String name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id, String host_policy_name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name,
+    @Const Pointer base, @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id, @Cast("const char*") BytePointer host_policy_name);
+
+/** Clear all input data from an input, releasing ownership of the
+ *  buffer(s) that were appended to the input with
+ *  TRITONSERVER_InferenceRequestAppendInputData or
+ *  TRITONSERVER_InferenceRequestAppendInputDataWithHostPolicy
+ *  @param inference_request The request object.
+ *  @param name The name of the input. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllInputData(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Add an output request to an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestAddRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove an output request from an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @param name The name of the output.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, String name);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveRequestedOutput(
+    TRITONSERVER_InferenceRequest inference_request, @Cast("const char*") BytePointer name);
+
+/** Remove all output requests from an inference request.
+ * 
+ *  @param inference_request The request object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestRemoveAllRequestedOutputs(
+    TRITONSERVER_InferenceRequest inference_request);
+
+/** Set the release callback for an inference request. The release
+ *  callback is called by Triton to return ownership of the request
+ *  object.
+ * 
+ *  @param inference_request The request object.
+ *  @param request_release_fn The function called to return ownership
+ *  of the 'inference_request' object.
+ *  @param request_release_userp User-provided pointer that is
+ *  delivered to the 'request_release_fn' callback.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetReleaseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceRequestReleaseFn_t request_release_fn,
+    Pointer request_release_userp);
+
+/** Set the allocator and response callback for an inference
+ *  request. The allocator is used to allocate buffers for any output
+ *  tensors included in responses that are produced for this
+ *  request. The response callback is called to return response
+ *  objects representing responses produced for this request.
+ * 
+ *  @param inference_request The request object.
+ *  @param response_allocator The TRITONSERVER_ResponseAllocator to use
+ *  to allocate buffers to hold inference results.
+ *  @param response_allocator_userp User-provided pointer that is
+ *  delivered to the response allocator's start and allocation functions.
+ *  @param response_fn The function called to deliver an inference
+ *  response for this request.
+ *  @param response_userp User-provided pointer that is delivered to
+ *  the 'response_fn' callback.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceRequestSetResponseCallback(
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_ResponseAllocator response_allocator,
+    Pointer response_allocator_userp,
+    TRITONSERVER_InferenceResponseCompleteFn_t response_fn,
+    Pointer response_userp);
+
+/** TRITONSERVER_InferenceResponse
+ * 
+ *  Object representing an inference response. The inference response
+ *  provides the meta-data and output tensor values calculated by the
+ *  inference.
+ * 
+ <p>
+ *  Delete an inference response object.
+ * 
+ *  @param inference_response The response object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseDelete(
+    TRITONSERVER_InferenceResponse inference_response);
+
+/** Return the error status of an inference response. Return a
+ *  TRITONSERVER_Error object on failure, return nullptr on success.
+ *  The returned error object is owned by 'inference_response' and so
+ *  should not be deleted by the caller.
+ * 
+ *  @param inference_response The response object.
+ *  @return a TRITONSERVER_Error indicating the success or failure
+ *  status of the response. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseError(
+    TRITONSERVER_InferenceResponse inference_response);
+
+/** Get model used to produce a response. The caller does not own the
+ *  returned model name value and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param model_name Returns the name of the model.
+ *  @param model_version Returns the version of the model.
+ *  this response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") PointerPointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr BytePointer model_name,
+    @Cast("int64_t*") LongPointer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr ByteBuffer model_name,
+    @Cast("int64_t*") LongBuffer model_version);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseModel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const char**") @ByPtrPtr byte[] model_name,
+    @Cast("int64_t*") long[] model_version);
+
+/** Get the ID of the request corresponding to a response. The caller
+ *  does not own the returned ID and must not modify or delete it. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param request_id Returns the ID of the request corresponding to
+ *  this response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") PointerPointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr BytePointer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr ByteBuffer request_id);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseId(
+    TRITONSERVER_InferenceResponse inference_response,
+    @Cast("const char**") @ByPtrPtr byte[] request_id);
+
+/** Get the number of parameters available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of parameters.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameterCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+
+/** Get all information about a parameter. The caller does not own any
+ *  of the returned values and must not modify or delete them. The
+ *  lifetime of all returned values extends until 'inference_response'
+ *  is deleted.
+ * 
+ *  The 'vvalue' returns a void* pointer that must be cast
+ *  appropriately based on 'type'. For example:
+ * 
+ *    void* vvalue;
+ *    TRITONSERVER_ParameterType type;
+ *    TRITONSERVER_InferenceResponseParameter(
+ *                      response, index, &name, &type, &vvalue);
+ *    switch (type) {
+ *      case TRITONSERVER_PARAMETER_BOOL:
+ *        bool value = *(reinterpret_cast<bool*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_INT:
+ *        int64_t value = *(reinterpret_cast<int64_t*>(vvalue));
+ *        ...
+ *      case TRITONSERVER_PARAMETER_STRING:
+ *        const char* value = reinterpret_cast<const char*>(vvalue);
+ *        ...
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the parameter, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseParameterCount.
+ *  @param name Returns the name of the parameter.
+ *  @param type Returns the type of the parameter.
+ *  @param vvalue Returns a pointer to the parameter value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") PointerPointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_ParameterType*") IntPointer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_ParameterType*") IntBuffer type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseParameter(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_ParameterType*") int[] type, @Cast("const void**") @ByPtrPtr Pointer vvalue);
+
+/** Get the number of outputs available in the response.
+ * 
+ *  @param inference_response The response object.
+ *  @param count Returns the number of output tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputCount(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("uint32_t*") int[] count);
+
+/** Get all information about an output tensor.  The tensor data is
+ *  returned as the base pointer to the data and the size, in bytes,
+ *  of the data. The caller does not own any of the returned values
+ *  and must not modify or delete them. The lifetime of all returned
+ *  values extends until 'inference_response' is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param name Returns the name of the output.
+ *  @param datatype Returns the type of the output.
+ *  @param shape Returns the shape of the output.
+ *  @param dim_count Returns the number of dimensions of the returned
+ *  shape.
+ *  @param base Returns the tensor data for the output.
+ *  @param byte_size Returns the size, in bytes, of the data.
+ *  @param memory_type Returns the memory type of the data.
+ *  @param memory_type_id Returns the memory type id of the data.
+ *  @param userp The user-specified value associated with the buffer
+ *  in TRITONSERVER_ResponseAllocatorAllocFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") PointerPointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") PointerPointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer name, @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint64_t*") LongPointer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer name, @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint64_t*") LongBuffer dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutput(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] name, @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint64_t*") long[] dim_count, @Cast("const void**") @ByPtrPtr Pointer base, @Cast("size_t*") SizeTPointer byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id,
+    @Cast("void**") @ByPtrPtr Pointer userp);
+
+/** Get a classification label associated with an output for a given
+ *  index.  The caller does not own the returned label and must not
+ *  modify or delete it. The lifetime of all returned label extends
+ *  until 'inference_response' is deleted.
+ * 
+ *  @param inference_response The response object.
+ *  @param index The index of the output tensor, must be 0 <= index <
+ *  count, where 'count' is the value returned by
+ *  TRITONSERVER_InferenceResponseOutputCount.
+ *  @param class_index The index of the class.
+ *  @param name Returns the label corresponding to 'class_index' or
+ *  nullptr if no label.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") PointerPointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr BytePointer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr ByteBuffer label);
+public static native TRITONSERVER_Error TRITONSERVER_InferenceResponseOutputClassificationLabel(
+    TRITONSERVER_InferenceResponse inference_response, @Cast("const uint32_t") int index,
+    @Cast("const size_t") long class_index, @Cast("const char**") @ByPtrPtr byte[] label);
+
+
+/** TRITONSERVER_ServerOptions
+ * 
+ *  Options to use when creating an inference server.
+ * 
+ <p>
+ *  Model control modes */
+/** enum TRITONSERVER_ModelControlMode */
+public static final int
+  TRITONSERVER_MODEL_CONTROL_NONE = 0,
+  TRITONSERVER_MODEL_CONTROL_POLL = 1,
+  TRITONSERVER_MODEL_CONTROL_EXPLICIT = 2;
+
+/** Rate limit modes */
+/** enum TRITONSERVER_RateLimitMode */
+public static final int
+  TRITONSERVER_RATE_LIMIT_OFF = 0,
+  TRITONSERVER_RATE_LIMIT_EXEC_COUNT = 1;
+
+/** Create a new server options object. The caller takes ownership of
+ *  the TRITONSERVER_ServerOptions object and must call
+ *  TRITONSERVER_ServerOptionsDelete to release the object.
+ * 
+ *  @param options Returns the new server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @Cast("TRITONSERVER_ServerOptions**") PointerPointer options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsNew(
+    @ByPtrPtr TRITONSERVER_ServerOptions options);
+
+/** Delete a server options object.
+ * 
+ *  @param options The server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsDelete(
+    TRITONSERVER_ServerOptions options);
+
+/** Set the textual ID for the server in a server options. The ID is a
+ *  name that identifies the server.
+ * 
+ *  @param options The server options object.
+ *  @param server_id The server identifier.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, String server_id);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetServerId(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer server_id);
+
+/** Set the model repository path in a server options. The path must be
+ *  the full absolute path to the model repository. This function can be called
+ *  multiple times with different paths to set multiple model repositories.
+ *  Note that if a model is not unique across all model repositories
+ *  at any time, the model will not be available.
+ * 
+ *  @param options The server options object.
+ *  @param model_repository_path The full path to the model repository.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, String model_repository_path);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelRepositoryPath(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_repository_path);
+
+/** Set the model control mode in a server options. For each mode the models
+ *  will be managed as the following:
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_NONE: the models in model repository will be
+ *    loaded on startup. After startup any changes to the model repository will
+ *    be ignored. Calling TRITONSERVER_ServerPollModelRepository will result in
+ *    an error.
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_POLL: the models in model repository will be
+ *    loaded on startup. The model repository can be polled periodically using
+ *    TRITONSERVER_ServerPollModelRepository and the server will load, unload,
+ *    and updated models according to changes in the model repository.
+ * 
+ *    TRITONSERVER_MODEL_CONTROL_EXPLICIT: the models in model repository will
+ *    not be loaded on startup. The corresponding model control APIs must be
+ *    called to load / unload a model in the model repository.
+ * 
+ *  @param options The server options object.
+ *  @param mode The mode to use for the model control.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetModelControlMode(
+    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_ModelControlMode") int mode);
+
+/** Set the model to be loaded at startup in a server options. The model must be
+ *  present in one, and only one, of the specified model repositories.
+ *  This function can be called multiple times with different model name
+ *  to set multiple startup models.
+ *  Note that it only takes affect on TRITONSERVER_MODEL_CONTROL_EXPLICIT mode.
+ * 
+ *  @param options The server options object.
+ *  @param mode_name The name of the model to load on startup.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStartupModel(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer model_name);
+
+/** Enable or disable strict model configuration handling in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param strict True to enable strict model configuration handling,
+ *  false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictModelConfig(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+
+/** Set the rate limit mode in a server options.
+ * 
+ *    TRITONSERVER_RATE_LIMIT_EXEC_COUNT: The rate limiting prioritizes the
+ *    inference execution using the number of times each instance has got a
+ *    chance to run. The execution gets to run only when its resource
+ *    constraints are satisfied.
+ * 
+ *    TRITONSERVER_RATE_LIMIT_OFF: The rate limiting is turned off and the
+ *    inference gets executed whenever an instance is available.
+ * 
+ *  @param options The server options object.
+ *  @param mode The mode to use for the rate limiting. By default, execution
+ *  count is used to determine the priorities.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRateLimiterMode(
+    TRITONSERVER_ServerOptions options, @Cast("TRITONSERVER_RateLimitMode") int mode);
+
+/** Add resource count for rate limiting.
+ * 
+ *  @param options The server options object.
+ *  @param name The name of the resource.
+ *  @param count The count of the resource.
+ *  @param device The device identifier for the resource. A value of -1
+ *  indicates that the specified number of resources are available on every
+ *  device. The device value is ignored for a global resource. The server
+ *  will use the rate limiter configuration specified for instance groups
+ *  in model config to determine whether resource is global. In case of
+ *  conflicting resource type in different model configurations, server
+ *  will raise an appropriate error while loading model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsAddRateLimiterResource(
+    TRITONSERVER_ServerOptions options, String resource_name,
+    @Cast("const size_t") long resource_count, int device);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsAddRateLimiterResource(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer resource_name,
+    @Cast("const size_t") long resource_count, int device);
+
+/** Set the total pinned memory byte size that the server can allocate
+ *  in a server options. The pinned memory pool will be shared across
+ *  Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
+ * 
+ *  @param options The server options object.
+ *  @param size The pinned memory pool byte size.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetPinnedMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, @Cast("uint64_t") long size);
+
+/** Set the total CUDA memory byte size that the server can allocate
+ *  on given GPU device in a server options. The pinned memory pool
+ *  will be shared across Triton itself and the backends that use
+ *  TRITONBACKEND_MemoryManager to allocate memory.
+ * 
+ *  @param options The server options object.
+ *  @param gpu_device The GPU device to allocate the memory pool.
+ *  @param size The CUDA memory pool byte size.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetCudaMemoryPoolByteSize(
+    TRITONSERVER_ServerOptions options, int gpu_device, @Cast("uint64_t") long size);
+
+/** Set the minimum support CUDA compute capability in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param cc The minimum CUDA compute capability.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMinSupportedComputeCapability(
+    TRITONSERVER_ServerOptions options, double cc);
+
+/** Enable or disable exit-on-error in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param exit True to enable exiting on intialization error, false
+ *  to continue.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitOnError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean exit);
+
+/** Enable or disable strict readiness handling in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param strict True to enable strict readiness handling, false to
+ *  disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetStrictReadiness(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean strict);
+
+/** Set the exit timeout, in seconds, for the server in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param timeout The exit timeout, in seconds.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetExitTimeout(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int timeout);
+
+/** Set the number of threads used in buffer manager in a server options.
+ * 
+ *  @param thread_count The number of threads.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBufferManagerThreadCount(
+    TRITONSERVER_ServerOptions options, @Cast("unsigned int") int thread_count);
+
+/** Enable or disable info level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable info logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogInfo(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Enable or disable warning level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable warning logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogWarn(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Enable or disable error level logging.
+ * 
+ *  @param options The server options object.
+ *  @param log True to enable error logging, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogError(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean log);
+
+/** Set verbose logging level. Level zero disables verbose logging.
+ * 
+ *  @param options The server options object.
+ *  @param level The verbose logging level.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetLogVerbose(
+    TRITONSERVER_ServerOptions options, int level);
+
+/** Enable or disable metrics collection in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param metrics True to enable metrics, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean metrics);
+
+/** Enable or disable GPU metrics collection in a server options. GPU
+ *  metrics are collected if both this option and
+ *  TRITONSERVER_ServerOptionsSetMetrics are true.
+ * 
+ *  @param options The server options object.
+ *  @param gpu_metrics True to enable GPU metrics, false to disable.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetGpuMetrics(
+    TRITONSERVER_ServerOptions options, @Cast("bool") boolean gpu_metrics);
+
+/** Set the directory containing backend shared libraries. This
+ *  directory is searched last after the version and model directory
+ *  in the model repository when looking for the backend shared
+ *  library for a model. If the backend is named 'be' the directory
+ *  searched is 'backend_dir'/be/libtriton_be.so.
+ * 
+ *  @param options The server options object.
+ *  @param backend_dir The full path of the backend directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, String backend_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_dir);
+
+/** Set the directory containing repository agent shared libraries. This
+ *  directory is searched when looking for the repository agent shared
+ *  library for a model. If the backend is named 'ra' the directory
+ *  searched is 'repoagent_dir'/ra/libtritonrepoagent_ra.so.
+ * 
+ *  @param options The server options object.
+ *  @param repoagent_dir The full path of the repository agent directory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, String repoagent_dir);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetRepoAgentDirectory(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer repoagent_dir);
+
+/** Set a configuration setting for a named backend in a server
+ *  options.
+ * 
+ *  @param options The server options object.
+ *  @param backend_name The name of the backend.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, String backend_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetBackendConfig(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer backend_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+
+/** Set a host policy setting for a given policy name in a server options.
+ * 
+ *  @param options The server options object.
+ *  @param policy_name The name of the policy.
+ *  @param setting The name of the setting.
+ *  @param value The setting value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, String policy_name,
+    String setting, String value);
+public static native TRITONSERVER_Error TRITONSERVER_ServerOptionsSetHostPolicy(
+    TRITONSERVER_ServerOptions options, @Cast("const char*") BytePointer policy_name,
+    @Cast("const char*") BytePointer setting, @Cast("const char*") BytePointer value);
+
+/** TRITONSERVER_Server
+ * 
+ *  An inference server.
+ * 
+ <p>
+ *  Model batch flags. The enum values must be power-of-2 values. */
+/** enum TRITONSERVER_ModelBatchFlag */
+public static final int
+  TRITONSERVER_BATCH_UNKNOWN = 1,
+  TRITONSERVER_BATCH_FIRST_DIM = 2;
+
+/** Model index flags. The enum values must be power-of-2 values. */
+/** enum TRITONSERVER_ModelIndexFlag */
+public static final int
+  TRITONSERVER_INDEX_FLAG_READY = 1;
+
+/** Model transaction policy flags. The enum values must be
+ *  power-of-2 values. */
+/** enum TRITONSERVER_ModelTxnPropertyFlag */
+public static final int
+  TRITONSERVER_TXN_ONE_TO_ONE = 1,
+  TRITONSERVER_TXN_DECOUPLED = 2;
+
+/** Create a new server object. The caller takes ownership of the
+ *  TRITONSERVER_Server object and must call TRITONSERVER_ServerDelete
+ *  to release the object.
+ * 
+ *  @param server Returns the new inference server object.
+ *  @param options The inference server options object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @Cast("TRITONSERVER_Server**") PointerPointer server, TRITONSERVER_ServerOptions options);
+public static native TRITONSERVER_Error TRITONSERVER_ServerNew(
+    @ByPtrPtr TRITONSERVER_Server server, TRITONSERVER_ServerOptions options);
+
+/** Delete a server object. If server is not already stopped it is
+ *  stopped before being deleted.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerDelete(
+    TRITONSERVER_Server server);
+
+/** Stop a server object. A server can't be restarted once it is
+ *  stopped.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerStop(
+    TRITONSERVER_Server server);
+
+/** Check the model repository for changes and update server state
+ *  based on those changes.
+ * 
+ *  @param server The inference server object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerPollModelRepository(TRITONSERVER_Server server);
+
+/** Is the server live?
+ * 
+ *  @param server The inference server object.
+ *  @param live Returns true if server is live, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] live);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsLive(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer live);
+
+/** Is the server ready?
+ * 
+ *  @param server The inference server object.
+ *  @param ready Returns true if server is ready, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") boolean[] ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerIsReady(
+    TRITONSERVER_Server server, @Cast("bool*") BoolPointer ready);
+
+/** Is the model ready?
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model to get readiness for.
+ *  @param model_version The version of the model to get readiness
+ *  for.  If -1 then the server will choose a version based on the
+ *  model's policy.
+ *  @param ready Returns true if server is ready, false otherwise.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") boolean[] ready);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIsReady(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("bool*") BoolPointer ready);
+
+/** Get the batch properties of the model. The properties are
+ *  communicated by a flags value and an (optional) object returned by
+ *  'voidp'.
+ * 
+ *    - TRITONSERVER_BATCH_UNKNOWN: Triton cannot determine the
+ *      batching properties of the model. This means that the model
+ *      does not support batching in any way that is useable by
+ *      Triton. The returned 'voidp' value is nullptr.
+ * 
+ *    - TRITONSERVER_BATCH_FIRST_DIM: The model supports batching
+ *      along the first dimension of every input and output
+ *      tensor. Triton schedulers that perform batching can
+ *      automatically batch inference requests along this dimension.
+ *      The returned 'voidp' value is nullptr.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param flags Returns flags indicating the batch properties of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the
+ *  'flags' value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelBatchProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+
+/** Get the transaction policy of the model. The policy is
+ *  communicated by a flags value.
+ * 
+ *    - TRITONSERVER_TXN_ONE_TO_ONE: The model generates exactly
+ *      one response per request.
+ * 
+ *    - TRITONSERVER_TXN_DECOUPLED: The model may generate zero
+ *      to many responses per request.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param txn_flags Returns flags indicating the transaction policy of the
+ *  model.
+ *  @param voidp If non-nullptr, returns a point specific to the 'flags' value.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") PointerPointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntPointer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") IntBuffer txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelTransactionProperties(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("uint32_t*") int[] txn_flags, @Cast("void**") @ByPtrPtr Pointer voidp);
+
+/** Get the metadata of the server as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param server_metadata Returns the server metadata message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Message**") PointerPointer server_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetadata(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Message server_metadata);
+
+/** Get the metadata of a model as a TRITONSERVER_Message
+ *  object.  The caller takes ownership of the message object and must
+ *  call TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.
+ *  If -1 then the server will choose a version based on the model's
+ *  policy.
+ *  @param model_metadata Returns the model metadata message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelMetadata(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_metadata);
+
+/** Get the statistics of a model as a TRITONSERVER_Message
+ *  object. The caller takes ownership of the object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  If empty, then statistics for all available models will be returned,
+ *  and the server will choose a version based on those models' policies.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param model_stats Returns the model statistics message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("TRITONSERVER_Message**") PointerPointer model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelStatistics(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @ByPtrPtr TRITONSERVER_Message model_stats);
+
+/** Get the configuration of a model as a TRITONSERVER_Message object.
+ *  The caller takes ownership of the message object and must call
+ *  TRITONSERVER_MessageDelete to release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @param model_version The version of the model.  If -1 then the
+ *  server will choose a version based on the model's policy.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model config message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, String model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelConfig(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name,
+    @Cast("const int64_t") long model_version, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Get the index of all unique models in the model repositories as a
+ *  TRITONSERVER_Message object. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object.
+ * 
+ *  If TRITONSERVER_INDEX_FLAG_READY is set in 'flags' only the models
+ *  that are loaded into the server and ready for inferencing are
+ *  returned.
+ * 
+ *  @param server The inference server object.
+ *  @param flags TRITONSERVER_ModelIndexFlag flags that control how to
+ *  collect the index.
+ *  @param model_index Return the model index message that holds the
+ *  index of all models contained in the server's model repository(s).
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_index);
+public static native TRITONSERVER_Error TRITONSERVER_ServerModelIndex(
+    TRITONSERVER_Server server, @Cast("uint32_t") int flags,
+    @ByPtrPtr TRITONSERVER_Message model_index);
+
+/** Load the requested model or reload the model if it is already
+ *  loaded. The function does not return until the model is loaded or
+ *  fails to load. Returned error indicates if model loaded
+ *  successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerLoadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Unload the requested model. Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model to be fully unload
+ *  and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModel(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Unload the requested model, and also unload any dependent model that
+ *  was loaded along with the requested model (for example, the models composing
+ *  an ensemble). Unloading a model that is not loaded
+ *  on server has no affect and success code will be returned.
+ *  The function does not wait for the requested model and all dependent
+ *  models to be fully unload and success code will be returned.
+ *  Returned error indicates if model unloaded successfully or not.
+ * 
+ *  @param server The inference server object.
+ *  @param model_name The name of the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, String model_name);
+public static native TRITONSERVER_Error TRITONSERVER_ServerUnloadModelAndDependents(
+    TRITONSERVER_Server server, @Cast("const char*") BytePointer model_name);
+
+/** Get the current metrics for the server. The caller takes ownership
+ *  of the metrics object and must call TRITONSERVER_MetricsDelete to
+ *  release the object.
+ * 
+ *  @param server The inference server object.
+ *  @param metrics Returns the metrics.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @Cast("TRITONSERVER_Metrics**") PointerPointer metrics);
+public static native TRITONSERVER_Error TRITONSERVER_ServerMetrics(
+    TRITONSERVER_Server server, @ByPtrPtr TRITONSERVER_Metrics metrics);
+
+/** Perform inference using the meta-data and inputs supplied by the
+ *  'inference_request'. If the function returns success, then the
+ *  caller releases ownership of 'inference_request' and must not
+ *  access it in any way after this call, until ownership is returned
+ *  via the 'request_release_fn' callback registered in the request
+ *  object with TRITONSERVER_InferenceRequestSetReleaseCallback.
+ * 
+ *  The function unconditionally takes ownership of 'trace' and so the
+ *  caller must not access it in any way after this call (except in
+ *  the trace id callback) until ownership is returned via the trace's
+ *  release_fn callback.
+ * 
+ *  Responses produced for this request are returned using the
+ *  allocator and callback registered with the request by
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ * 
+ *  @param server The inference server object.
+ *  @param inference_request The request object.
+ *  @param trace The trace object for this request, or nullptr if no
+ *  tracing.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONSERVER_ServerInferAsync(
+    TRITONSERVER_Server server,
+    TRITONSERVER_InferenceRequest inference_request,
+    TRITONSERVER_InferenceTrace trace);
+
+
+// #ifdef __cplusplus
+// #endif
+
+
+// Parsed from tritonbackend.h
+
+// Copyright 2020-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+// #include "triton/core/tritonserver.h"
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONBACKEND
+// #if defined(_MSC_VER)
+// #define TRITONBACKEND_DECLSPEC __declspec(dllexport)
+// #define TRITONBACKEND_ISPEC __declspec(dllimport)
+// #elif defined(__GNUC__)
+// #define TRITONBACKEND_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONBACKEND_ISPEC
+// #else
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONBACKEND_DECLSPEC __declspec(dllimport)
+// #define TRITONBACKEND_ISPEC __declspec(dllexport)
+// #else
+// #define TRITONBACKEND_DECLSPEC
+// #define TRITONBACKEND_ISPEC
+// Targeting ../tritonserver/TRITONBACKEND_MemoryManager.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Input.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Output.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Request.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_ResponseFactory.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Response.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Backend.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_Model.java
+
+
+// Targeting ../tritonserver/TRITONBACKEND_ModelInstance.java
+
+
+
+/**
+ *  TRITONBACKEND API Version
+ * 
+ *  The TRITONBACKEND API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  backend should check that the API version used to compile the
+ *  backend is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the backend.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONBACKEND_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONBACKEND_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONBACKEND_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton backend API version does not support this backend");
+ *    }
+ *  */
+public static final int TRITONBACKEND_API_VERSION_MAJOR = 1;
+
+///
+public static final int TRITONBACKEND_API_VERSION_MINOR = 5;
+
+/** Get the TRITONBACKEND API version supported by Triton. This value
+ *  can be compared against the TRITONBACKEND_API_VERSION_MAJOR and
+ *  TRITONBACKEND_API_VERSION_MINOR used to build the backend to
+ *  ensure that Triton is compatible with the backend.
+ * 
+ *  @param major Returns the TRITONBACKEND API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONBACKEND API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONBACKEND_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONBACKEND_ArtifactType
+ * 
+ *  The ways that the files that make up a backend or model are
+ *  communicated to the backend.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model or backend
+ *      artifacts are made available to Triton via a locally
+ *      accessible filesystem. The backend can access these files
+ *      using an appropriate system API.
+ *  */
+/** enum TRITONBACKEND_ArtifactType */
+public static final int
+  TRITONBACKEND_ARTIFACT_FILESYSTEM = 0;
+
+
+/**
+ *  TRITONBACKEND_MemoryManager
+ * 
+ *  Object representing an memory manager that is capable of
+ *  allocating and otherwise managing different memory types. For
+ *  improved performance Triton maintains pools for GPU and CPU-pinned
+ *  memory and the memory manager allows backends to access those
+ *  pools.
+ * 
+ <p>
+ *  Allocate a contiguous block of memory of a specific type using a
+ *  memory manager. Two error codes have specific interpretations for
+ *  this function:
+ * 
+ *    TRITONSERVER_ERROR_UNSUPPORTED: Indicates that Triton is
+ *      incapable of allocating the requested memory type and memory
+ *      type ID. Requests for the memory type and ID will always fail
+ *      no matter 'byte_size' of the request.
+ * 
+ *    TRITONSERVER_ERROR_UNAVAILABLE: Indicates that Triton can
+ *       allocate the memory type and ID but that currently it cannot
+ *       allocate a contiguous block of memory of the requested
+ *       'byte_size'.
+ * 
+ *  @param manager The memory manager.
+ *  @param buffer Returns the allocated memory.
+ *  @param memory_type The type of memory to allocate.
+ *  @param memory_type_id The ID associated with the memory type to
+ *  allocate. For GPU memory this indicates the device ID of the GPU
+ *  to allocate from.
+ *  @param byte_size The size of memory to allocate, in bytes.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") PointerPointer buffer,
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerAllocate(
+    TRITONBACKEND_MemoryManager manager, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id,
+    @Cast("const uint64_t") long byte_size);
+
+/** Free a buffer that was previously allocated with
+ *  TRITONBACKEND_MemoryManagerAllocate. The call must provide the
+ *  same values for 'memory_type' and 'memory_type_id' as were used
+ *  when the buffer was allocate or else the behavior is undefined.
+ * 
+ *  @param manager The memory manager.
+ *  @param buffer The allocated memory buffer to free.
+ *  @param memory_type The type of memory of the buffer.
+ *  @param memory_type_id The ID associated with the memory type of
+ *  the buffer.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_MemoryManagerFree(
+    TRITONBACKEND_MemoryManager manager, Pointer buffer,
+    @Cast("const TRITONSERVER_MemoryType") int memory_type, @Cast("const int64_t") long memory_type_id);
+
+/**
+ *  TRITONBACKEND_Input
+ * 
+ *  Object representing an input tensor.
+ * 
+ <p>
+ *  Get the name and properties of an input tensor. The returned
+ *  strings and other properties are owned by the input, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param input The input tensor.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBuffer.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") PointerPointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputProperties(
+    TRITONBACKEND_Input input, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+
+/** Get the name and properties of an input tensor associated with a given
+ *  host policy. If there are no input buffers for the specified  host policy,
+ *  the properties of the fallback input buffers are returned. The returned
+ *  strings and other properties are owned by the input, not the caller, and so
+ *  should not be modified or freed.
+ * 
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input properties
+ *  will be return if nullptr is provided.
+ *  @param name If non-nullptr, returns the tensor name.
+ *  @param datatype If non-nullptr, returns the tensor datatype.
+ *  @param shape If non-nullptr, returns the tensor shape.
+ *  @param dim_count If non-nullptr, returns the number of dimensions
+ *  in the tensor shape.
+ *  @param byte_size If non-nullptr, returns the size of the available
+ *  data for the tensor, in bytes. This size reflects the actual data
+ *  available, and does not necessarily match what is
+ *  expected/required for the tensor given its shape and datatype. It
+ *  is the responsibility of the backend to handle mismatches in these
+ *  sizes appropriately.
+ *  @param buffer_count If non-nullptr, returns the number of buffers
+ *  holding the contents of the tensor. These buffers are accessed
+ *  using TRITONBACKEND_InputBufferForHostPolicy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") PointerPointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") PointerPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr BytePointer name,
+    @Cast("TRITONSERVER_DataType*") IntPointer datatype, @Cast("const int64_t**") @ByPtrPtr LongPointer shape,
+    @Cast("uint32_t*") IntPointer dims_count, @Cast("uint64_t*") LongPointer byte_size, @Cast("uint32_t*") IntPointer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name, @Cast("const char**") @ByPtrPtr ByteBuffer name,
+    @Cast("TRITONSERVER_DataType*") IntBuffer datatype, @Cast("const int64_t**") @ByPtrPtr LongBuffer shape,
+    @Cast("uint32_t*") IntBuffer dims_count, @Cast("uint64_t*") LongBuffer byte_size, @Cast("uint32_t*") IntBuffer buffer_count);
+public static native TRITONSERVER_Error TRITONBACKEND_InputPropertiesForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name, @Cast("const char**") @ByPtrPtr byte[] name,
+    @Cast("TRITONSERVER_DataType*") int[] datatype, @Cast("const int64_t**") @ByPtrPtr long[] shape,
+    @Cast("uint32_t*") int[] dims_count, @Cast("uint64_t*") long[] byte_size, @Cast("uint32_t*") int[] buffer_count);
+
+/** Get a buffer holding (part of) the tensor data for an input. For a
+ *  given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputProperties. The
+ *  returned buffer is owned by the input and so should not be
+ *  modified or freed by the caller. The lifetime of the buffer
+ *  matches that of the input and so the buffer should not be accessed
+ *  after the input tensor object is released.
+ * 
+ *  @param input The input tensor.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputProperties.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongPointer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") LongBuffer buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBuffer(
+    TRITONBACKEND_Input input, @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer,
+    @Cast("uint64_t*") long[] buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") int[] memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
+
+/** Get a buffer holding (part of) the tensor data for an input for a specific
+ *  host policy. If there are no input buffers specified for this host policy,
+ *  the fallback input buffer is returned.
+ *  For a given input the number of buffers composing the input are found
+ *  from 'buffer_count' returned by TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  The returned buffer is owned by the input and so should not be modified or
+ *  freed by the caller. The lifetime of the buffer matches that of the input
+ *  and so the buffer should not be accessed after the input tensor object is
+ *  released.
+ * 
+ *  @param input The input tensor.
+ *  @param host_policy_name The host policy name. Fallback input buffer
+ *  will be return if nullptr is provided.
+ *  @param index The index of the buffer. Must be 0 <= index <
+ *  buffer_count, where buffer_count is the value returned by
+ *  TRITONBACKEND_InputPropertiesForHostPolicy.
+ *  @param buffer Returns a pointer to a contiguous block of data for
+ *  the named input.
+ *  @param buffer_byte_size Returns the size, in bytes, of 'buffer'.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the function caller.  Returns
+ *  the actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the function caller.
+ *  Returns the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") PointerPointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongPointer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type, @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, String host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") LongBuffer buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type, @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_InputBufferForHostPolicy(
+    TRITONBACKEND_Input input, @Cast("const char*") BytePointer host_policy_name,
+    @Cast("const uint32_t") int index, @Cast("const void**") @ByPtrPtr Pointer buffer, @Cast("uint64_t*") long[] buffer_byte_size,
+    @Cast("TRITONSERVER_MemoryType*") int[] memory_type, @Cast("int64_t*") long[] memory_type_id);
+
+/**
+ *  TRITONBACKEND_Output
+ * 
+ *  Object representing a response output tensor.
+ * 
+ <p>
+ *  Get a buffer to use to hold the tensor data for the output. The
+ *  returned buffer is owned by the output and so should not be freed
+ *  by the caller. The caller can and should fill the buffer with the
+ *  output data for the tensor. The lifetime of the buffer matches
+ *  that of the output and so the buffer should not be accessed after
+ *  the output tensor object is released.
+ * 
+ *  @param buffer Returns a pointer to a buffer where the contents of
+ *  the output tensor should be placed.
+ *  @param buffer_byte_size The size, in bytes, of the buffer required
+ *  by the caller.
+ *  @param memory_type Acts as both input and output. On input gives
+ *  the buffer memory type preferred by the caller.  Returns the
+ *  actual memory type of 'buffer'.
+ *  @param memory_type_id Acts as both input and output. On input
+ *  gives the buffer memory type id preferred by the caller. Returns
+ *  the actual memory type id of 'buffer'.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") PointerPointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntPointer memory_type,
+    @Cast("int64_t*") LongPointer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") IntBuffer memory_type,
+    @Cast("int64_t*") LongBuffer memory_type_id);
+public static native TRITONSERVER_Error TRITONBACKEND_OutputBuffer(
+    TRITONBACKEND_Output output, @Cast("void**") @ByPtrPtr Pointer buffer,
+    @Cast("const uint64_t") long buffer_byte_size, @Cast("TRITONSERVER_MemoryType*") int[] memory_type,
+    @Cast("int64_t*") long[] memory_type_id);
+
+/**
+ *  TRITONBACKEND_Request
+ * 
+ *  Object representing an inference request.
+ * 
+ <p>
+ *  Get the ID of the request. Can be nullptr if request doesn't have
+ *  an ID. The returned string is owned by the request, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param request The inference request.
+ *  @param id Returns the ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") PointerPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr BytePointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr ByteBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestId(
+    TRITONBACKEND_Request request, @Cast("const char**") @ByPtrPtr byte[] id);
+
+/** Get the correlation ID of the request. Zero indicates that the
+ *  request does not have a correlation ID.
+ * 
+ *  @param request The inference request.
+ *  @param id Returns the correlation ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestCorrelationId(
+    TRITONBACKEND_Request request, @Cast("uint64_t*") long[] id);
+
+/** Get the number of input tensors specified in the request.
+ * 
+ *  @param request The inference request.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+
+/** Get the name of an input tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input_name Returns the name of the input tensor
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer input_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] input_name);
+
+/** Get a named request input. The lifetime of the returned input
+ *  object matches that of the request and so the input object should
+ *  not be accessed after the request object is released.
+ * 
+ *  @param request The inference request.
+ *  @param name The name of the input.
+ *  @param input Returns the input corresponding to the name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, String name,
+    @ByPtrPtr TRITONBACKEND_Input input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInput(
+    TRITONBACKEND_Request request, @Cast("const char*") BytePointer name,
+    @ByPtrPtr TRITONBACKEND_Input input);
+
+/** Get a request input by index. The order of inputs in a given
+ *  request is not necessarily consistent with other requests, even if
+ *  the requests are in the same batch. As a result, you can not
+ *  assume that an index obtained from one request will point to the
+ *  same input in a different request.
+ * 
+ *  The lifetime of the returned input object matches that of the
+ *  request and so the input object should not be accessed after the
+ *  request object is released.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the input tensor. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONBACKEND_RequestInputCount.
+ *  @param input Returns the input corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("TRITONBACKEND_Input**") PointerPointer input);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestInputByIndex(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @ByPtrPtr TRITONBACKEND_Input input);
+
+/** Get the number of output tensors requested to be returned in the
+ *  request.
+ * 
+ *  @param request The inference request.
+ *  @param count Returns the number of output tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputCount(
+    TRITONBACKEND_Request request, @Cast("uint32_t*") int[] count);
+
+/** Get the name of a requested output tensor. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'request'.
+ * 
+ *  @param request The inference request.
+ *  @param index The index of the requested output tensor. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_RequestOutputCount.
+ *  @param output_name Returns the name of the requested output tensor
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer output_name);
+public static native TRITONSERVER_Error TRITONBACKEND_RequestOutputName(
+    TRITONBACKEND_Request request, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] output_name);
+
+/** Release the request. The request should be released when it is no
+ *  longer needed by the backend. If this call returns with an error
+ *  (i.e. non-nullptr) then the request was not released and ownership
+ *  remains with the backend. If this call returns with success, the
+ *  'request' object is no longer owned by the backend and must not be
+ *  used. Any tensor names, data types, shapes, input tensors,
+ *  etc. returned by TRITONBACKEND_Request* functions for this request
+ *  are no longer valid. If a persistent copy of that data is required
+ *  it must be created before calling this function.
+ * 
+ *  @param request The inference request.
+ *  @param release_flags Flags indicating what type of request release
+ *  should be performed. @see TRITONSERVER_RequestReleaseFlag. @see
+ *  TRITONSERVER_InferenceRequestReleaseFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_RequestRelease(
+    TRITONBACKEND_Request request, @Cast("uint32_t") int release_flags);
+
+/**
+ *  TRITONBACKEND_ResponseFactory
+ * 
+ *  Object representing an inference response factory. Using a
+ *  response factory is not required; instead a response can be
+ *  generated directly from a TRITONBACKEND_Request object using
+ *  TRITONBACKEND_ResponseNew(). A response factory allows a request
+ *  to be released before all responses have been sent. Releasing a
+ *  request as early as possible releases all input tensor data and
+ *  therefore may be desirable in some cases.
+ <p>
+ *  Create the response factory associated with a request.
+ * 
+ *  @param factory Returns the new response factory.
+ *  @param request The inference request.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
+    @Cast("TRITONBACKEND_ResponseFactory**") PointerPointer factory, TRITONBACKEND_Request request);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryNew(
+    @ByPtrPtr TRITONBACKEND_ResponseFactory factory, TRITONBACKEND_Request request);
+
+/** Destroy a response factory.
+ * 
+ *  @param factory The response factory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactoryDelete(
+    TRITONBACKEND_ResponseFactory factory);
+
+/** Send response flags without a corresponding response.
+ * 
+ *  @param factory The response factory.
+ *  @param send_flags Flags to send. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseFactorySendFlags(
+    TRITONBACKEND_ResponseFactory factory, @Cast("const uint32_t") int send_flags);
+
+/**
+ *  TRITONBACKEND_Response
+ * 
+ *  Object representing an inference response. For a given request,
+ *  the backend must carefully manage the lifecycle of responses
+ *  generated for that request to ensure that the output tensor
+ *  buffers are allocated correctly. When a response is created with
+ *  TRITONBACKEND_ResponseNew or TRITONBACKEND_ResponseNewFromFactory,
+ *  all the outputs and corresponding buffers must be created for that
+ *  response using TRITONBACKEND_ResponseOutput and
+ *  TRITONBACKEND_OutputBuffer *before* another response is created
+ *  for the request. For a given response, outputs can be created in
+ *  any order but they must be created sequentially/sychronously (for
+ *  example, the backend cannot use multiple threads to simultaneously
+ *  add multiple outputs to a response).
+ * 
+ *  The above requirement applies only to responses being generated
+ *  for a given request. The backend may generate responses in
+ *  parallel on multiple threads as long as those responses are for
+ *  different requests.
+ * 
+ *  This order of response creation must be strictly followed. But,
+ *  once response(s) are created they do not need to be sent
+ *  immediately, nor do they need to be sent in the order they were
+ *  created. The backend may even delete a created response instead of
+ *  sending it by using TRITONBACKEND_ResponseDelete.
+ <p>
+ *  Create a response for a request.
+ * 
+ *  @param response Returns the new response.
+ *  @param request The request.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_Request request);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNew(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_Request request);
+
+/** Create a response using a factory.
+ * 
+ *  @param response Returns the new response.
+ *  @param factory The response factory.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
+    @Cast("TRITONBACKEND_Response**") PointerPointer response, TRITONBACKEND_ResponseFactory factory);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseNewFromFactory(
+    @ByPtrPtr TRITONBACKEND_Response response, TRITONBACKEND_ResponseFactory factory);
+
+/** Destroy a response. It is not necessary to delete a response if
+ *  TRITONBACKEND_ResponseSend is called as that function transfers
+ *  ownership of the response object to Triton.
+ * 
+ *  @param response The response.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseDelete(
+    TRITONBACKEND_Response response);
+
+/** Set a string parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, String name, String value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetStringParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const char*") BytePointer value);
+
+/** Set an integer parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const int64_t") long value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetIntParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const int64_t") long value);
+
+/** Set an boolean parameter in the response.
+ * 
+ *  @param response The response.
+ *  @param name The name of the parameter.
+ *  @param value The value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, String name, @Cast("const bool") boolean value);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSetBoolParameter(
+    TRITONBACKEND_Response response, @Cast("const char*") BytePointer name, @Cast("const bool") boolean value);
+
+/** Create an output tensor in the response. The lifetime of the
+ *  returned output tensor object matches that of the response and so
+ *  the output tensor object should not be accessed after the response
+ *  object is deleted.
+ * 
+ *  @param response The response.
+ *  @param output Returns the new response output.
+ *  @param name The name of the output tensor.
+ *  @param datatype The datatype of the output tensor.
+ *  @param shape The shape of the output tensor.
+ *  @param dims_count The number of dimensions in the output tensor
+ *  shape.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @Cast("TRITONBACKEND_Output**") PointerPointer output,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongPointer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    String name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") LongBuffer shape, @Cast("const uint32_t") int dims_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseOutput(
+    TRITONBACKEND_Response response, @ByPtrPtr TRITONBACKEND_Output output,
+    @Cast("const char*") BytePointer name, @Cast("const TRITONSERVER_DataType") int datatype,
+    @Cast("const int64_t*") long[] shape, @Cast("const uint32_t") int dims_count);
+
+/** Send a response. Calling this function transfers ownership of the
+ *  response object to Triton. The caller must not access or delete
+ *  the response object after calling this function.
+ * 
+ *  @param response The response.
+ *  @param send_flags Flags associated with the response. @see
+ *  TRITONSERVER_ResponseCompleteFlag. @see
+ *  TRITONSERVER_InferenceResponseCompleteFn_t.
+ *  @param error The TRITONSERVER_Error to send if the response is an
+ *  error, or nullptr if the response is successful.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ResponseSend(
+    TRITONBACKEND_Response response, @Cast("const uint32_t") int send_flags,
+    TRITONSERVER_Error error);
+
+/**
+ *  TRITONBACKEND_Backend
+ * 
+ *  Object representing a backend.
+ * 
+ <p>
+ *  TRITONBACKEND_ExecutionPolicy
+ * 
+ *  Types of execution policy that can be implemented by a backend.
+ * 
+ *    TRITONBACKEND_EXECUTION_BLOCKING: An instance of the model
+ *      blocks in TRITONBACKEND_ModelInstanceExecute until it is ready
+ *      to handle another inference. Upon returning from
+ *      TRITONBACKEND_ModelInstanceExecute, Triton may immediately
+ *      call TRITONBACKEND_ModelInstanceExecute for the same instance
+ *      to execute a new batch of requests. Thus, most backends using
+ *      this policy will not return from
+ *      TRITONBACKEND_ModelInstanceExecute until all responses have
+ *      been sent and all requests have been released. This is the
+ *      default execution policy.
+ * 
+ *    TRITONBACKEND_EXECUTION_DEVICE_BLOCKING: An instance, A, of the
+ *      model blocks in TRITONBACKEND_ModelInstanceExecute if the
+ *      device associated with the instance is unable to handle
+ *      another inference. Even if another instance, B, associated
+ *      with the device, is available and ready to perform an
+ *      inference, Triton will not invoke
+ *      TRITONBACKEND_ModeInstanceExecute for B until A returns from
+ *      TRITONBACKEND_ModelInstanceExecute. Triton will not be blocked
+ *      from calling TRITONBACKEND_ModelInstanceExecute for instance
+ *      C, which is associated with a different device than A and B,
+ *      even if A or B has not returned from
+ *      TRITONBACKEND_ModelInstanceExecute. This execution policy is
+ *      typically used by a backend that can cooperatively execute
+ *      multiple model instances on the same device.
+ *  */
+/** enum TRITONBACKEND_ExecutionPolicy */
+public static final int
+  TRITONBACKEND_EXECUTION_BLOCKING = 0,
+  TRITONBACKEND_EXECUTION_DEVICE_BLOCKING = 1;
+
+/** Get the name of the backend. The caller does not own the returned
+ *  string and must not modify or delete it. The lifetime of the
+ *  returned string extends only as long as 'backend'.
+ * 
+ *  @param backend The backend.
+ *  @param name Returns the name of the backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendName(
+    TRITONBACKEND_Backend backend, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the backend configuration.  The 'backend_config' message is
+ *  owned by Triton and should not be modified or freed by the caller.
+ * 
+ *  The backend configuration, as JSON, is:
+ * 
+ *    {
+ *      "cmdline" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param backend The backend.
+ *  @param backend_config Returns the backend configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @Cast("TRITONSERVER_Message**") PointerPointer backend_config);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendConfig(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONSERVER_Message backend_config);
+
+/** Get the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING.
+ * 
+ *  @param backend The backend.
+ *  @param policy Returns the execution policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntPointer policy);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") IntBuffer policy);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy*") int[] policy);
+
+/** Set the execution policy for this backend. By default the
+ *  execution policy is TRITONBACKEND_EXECUTION_BLOCKING. Triton reads
+ *  the backend's execution policy after calling
+ *  TRITONBACKEND_Initialize, so to be recognized changes to the
+ *  execution policy must be made in TRITONBACKEND_Initialize.
+ * 
+ *  @param backend The backend.
+ *  @param policy The execution policy.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetExecutionPolicy(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ExecutionPolicy") int policy);
+
+/** Get the location of the files that make up the backend
+ *  implementation. This location contains the backend shared library
+ *  and any other files located with the shared library. The
+ *  'location' communicated depends on how the backend is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The backend artifacts are
+ *      made available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory containing this
+ *      backend's artifacts. The returned string is owned by Triton,
+ *      not the caller, and so should not be modified or freed.
+ * 
+ *  @param backend The backend.
+ *  @param artifact_type Returns the artifact type for the backend.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendArtifacts(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Get the memory manager associated with a backend.
+ * 
+ *  @param backend The backend.
+ *  @param manager Returns the memory manager.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @Cast("TRITONBACKEND_MemoryManager**") PointerPointer manager);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendMemoryManager(
+    TRITONBACKEND_Backend backend, @ByPtrPtr TRITONBACKEND_MemoryManager manager);
+
+/** Get the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param backend The backend.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_BackendState(
+    TRITONBACKEND_Backend backend, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the backend. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param backend The backend.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_BackendSetState(
+    TRITONBACKEND_Backend backend, Pointer state);
+
+/**
+ *  TRITONBACKEND_Model
+ * 
+ *  Object representing a model implemented using the backend.
+ * 
+ <p>
+ *  Get the name of the model. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param model The model.
+ *  @param name Returns the model name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelName(
+    TRITONBACKEND_Model model, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the version of the model.
+ * 
+ *  @param model The model.
+ *  @param version Returns the model version.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongPointer version);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") LongBuffer version);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelVersion(
+    TRITONBACKEND_Model model, @Cast("uint64_t*") long[] version);
+
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONBACKEND_ARTIFACT_FILESYSTEM: The model artifacts are made
+ *      available to Triton via the local filesytem. 'location'
+ *      returns the full path to the directory in the model repository
+ *      that contains this model's artifacts. The returned string is
+ *      owned by Triton, not the caller, and so should not be modified
+ *      or freed.
+ * 
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the model.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntPointer artifact_type,
+    @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") IntBuffer artifact_type,
+    @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelRepository(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_ArtifactType*") int[] artifact_type,
+    @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. The configuration is available via this call even
+ *  before the model is loaded and so can be used in
+ *  TRITONBACKEND_ModelInitialize. TRITONSERVER_ServerModelConfig
+ *  returns equivalent information but is not useable until after the
+ *  model loads.
+ * 
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Whether the backend should attempt to auto-complete the model configuration.
+ *  If true, the model should fill the inputs, outputs, and max batch size in
+ *  the model configuration if incomplete. If the model configuration is
+ *  changed,  the new configuration must be reported to Triton using
+ *  TRITONBACKEND_ModelSetConfig.
+ * 
+ *  @param model The model.
+ *  @param auto_complete_config Returns whether the backend should auto-complete
+ *  the model configuration.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") boolean[] auto_complete_config);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelAutoCompleteConfig(
+    TRITONBACKEND_Model model, @Cast("bool*") BoolPointer auto_complete_config);
+
+/** Set the model configuration in Triton server. Only the inputs, outputs,
+ *  and max batch size can be changed. Any other changes to the model
+ *  configuration will be ignored by Triton. This function can only be called
+ *  from TRITONBACKEND_ModelInitialize, calling in any other context will result
+ *  in an error being returned. The function does not take ownership of the
+ *  message object and so the caller should call TRITONSERVER_MessageDelete to
+ *  release the object once the function returns.
+ * 
+ *  @param model The model.
+ *  @param config_version The format version of the model configuration.
+ *  If the configuration is not represented in the version's format
+ *  then an error will be returned. Currently only version 1 is supported.
+ *  @param model_config The updated model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetConfig(
+    TRITONBACKEND_Model model, @Cast("const uint32_t") int config_version,
+    TRITONSERVER_Message model_config);
+
+/** Get the TRITONSERVER_Server object that this model is being served
+ *  by.
+ * 
+ *  @param model The model.
+ *  @param server Returns the server.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @Cast("TRITONSERVER_Server**") PointerPointer server);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelServer(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONSERVER_Server server);
+
+/** Get the backend used by the model.
+ * 
+ *  @param model The model.
+ *  @param model Returns the backend object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @Cast("TRITONBACKEND_Backend**") PointerPointer backend);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelBackend(
+    TRITONBACKEND_Model model, @ByPtrPtr TRITONBACKEND_Backend backend);
+
+/** Get the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param model The model.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelState(
+    TRITONBACKEND_Model model, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model. The
+ *  state is completely owned and managed by the backend.
+ * 
+ *  @param model The model.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelSetState(
+    TRITONBACKEND_Model model, Pointer state);
+
+/**
+ *  TRITONBACKEND_ModelInstance
+ * 
+ *  Object representing a model instance implemented using the
+ *  backend.
+ * 
+ <p>
+ *  Get the name of the model instance. The returned string is owned by the
+ *  model object, not the caller, and so should not be modified or
+ *  freed.
+ * 
+ *  @param instance The model instance.
+ *  @param name Returns the instance name.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") PointerPointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr BytePointer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr ByteBuffer name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const char**") @ByPtrPtr byte[] name);
+
+/** Get the kind of the model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param kind Returns the instance kind.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") IntPointer kind);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") IntBuffer kind);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceKind(
+    TRITONBACKEND_ModelInstance instance,
+    @Cast("TRITONSERVER_InstanceGroupKind*") int[] kind);
+
+/** Get the device ID of the model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param device_id Returns the instance device ID.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntPointer device_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, IntBuffer device_id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceDeviceId(
+    TRITONBACKEND_ModelInstance instance, int[] device_id);
+
+/** Get the host policy setting.  The 'host_policy' message is
+ *  owned by Triton and should not be modified or freed by the caller.
+ * 
+ *  The host policy setting, as JSON, is:
+ * 
+ *    {
+ *      "<host_policy>" : {
+ *        "<setting>" : "<value>",
+ *        ...
+ *      }
+ *    }
+ * 
+ *  @param instance The model instance.
+ *  @param host_policy Returns the host policy setting as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONSERVER_Message**") PointerPointer host_policy);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceHostPolicy(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONSERVER_Message host_policy);
+
+/** Whether the model instance is passive.
+ * 
+ *  @param instance The model instance.
+ *  @param is_passive Returns true if the instance is passive, false otherwise
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") boolean[] is_passive);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceIsPassive(
+    TRITONBACKEND_ModelInstance instance, @Cast("bool*") BoolPointer is_passive);
+
+/** Get the number of optimization profiles to be loaded for the instance.
+ * 
+ *  @param instance The model instance.
+ *  @param count Returns the number of optimization profiles.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
+
+/** Get the name of optimization profile. The caller does not own
+ *  the returned string and must not modify or delete it. The lifetime
+ *  of the returned string extends only as long as 'instance'.
+ * 
+ *  @param instance The model instance.
+ *  @param index The index of the optimization profile. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_ModelInstanceProfileCount.
+ *  @param profile_name Returns the name of the optimization profile
+ *  corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") PointerPointer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr BytePointer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr ByteBuffer profile_name);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceProfileName(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint32_t") int index,
+    @Cast("const char**") @ByPtrPtr byte[] profile_name);
+
+/** Get the number of secondary devices configured for the instance.
+ * 
+ *  @param instance The model instance.
+ *  @param count Returns the number of secondary devices.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceCount(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t*") int[] count);
+
+/** Get the properties of indexed secondary device. The returned
+ *  strings and other properties are owned by the instance, not the
+ *  caller, and so should not be modified or freed.
+ * 
+ *  @param instance The model instance.
+ *  @param index The index of the secondary device. Must be 0
+ *  <= index < count, where count is the value returned by
+ *  TRITONBACKEND_ModelInstanceSecondaryDeviceCount.
+ *  @param kind Returns the kind of secondary device corresponding
+ *  to the index.
+ *  @param id Returns the id of secondary device corresponding to the index.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") PointerPointer kind,
+    @Cast("int64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer kind,
+    @Cast("int64_t*") LongPointer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer kind,
+    @Cast("int64_t*") LongBuffer id);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSecondaryDeviceProperties(
+    TRITONBACKEND_ModelInstance instance, @Cast("uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] kind,
+    @Cast("int64_t*") long[] id);
+
+/** Get the model associated with a model instance.
+ * 
+ *  @param instance The model instance.
+ *  @param backend Returns the model object.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Model**") PointerPointer model);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceModel(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Model model);
+
+/** Get the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
+ * 
+ *  @param instance The model instance.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceState(
+    TRITONBACKEND_ModelInstance instance, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model
+ *  instance. The state is completely owned and managed by the
+ *  backend.
+ * 
+ *  @param instance The model instance.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceSetState(
+    TRITONBACKEND_ModelInstance instance, Pointer state);
+
+/** Record statistics for an inference request.
+ * 
+ *  Set 'success' true to indicate that the inference request
+ *  completed successfully. In this case all timestamps should be
+ *  non-zero values reported in nanoseconds and should be collected
+ *  using std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  Set 'success' to false to indicate that the inference request failed
+ *  to complete successfully. In this case all timestamps values are
+ *  ignored.
+ * 
+ *  For consistency of measurement across different backends, the
+ *  timestamps should be collected at the following points during
+ *  TRITONBACKEND_ModelInstanceExecute.
+ * 
+ *    TRITONBACKEND_ModelInstanceExecute()
+ *      CAPTURE TIMESPACE (exec_start_ns)
+ *      < process input tensors to prepare them for inference
+ *        execution, including copying the tensors to/from GPU if
+ *        necessary>
+ *      CAPTURE TIMESPACE (compute_start_ns)
+ *      < perform inference computations to produce outputs >
+ *      CAPTURE TIMESPACE (compute_end_ns)
+ *      < allocate output buffers and extract output tensors, including
+ *        copying the tensors to/from GPU if necessary>
+ *      CAPTURE TIMESPACE (exec_end_ns)
+ *      return
+ * 
+ *  Note that these statistics are associated with a valid
+ *  TRITONBACKEND_Request object and so must be reported before the
+ *  request is released. For backends that release the request before
+ *  all response(s) are sent, these statistics cannot capture
+ *  information about the time required to produce the response.
+ * 
+ *  @param instance The model instance.
+ *  @param request The inference request that statistics are being
+ *  reported for.
+ *  @param success True if the inference request completed
+ *  successfully, false if it failed to complete.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportStatistics(
+    TRITONBACKEND_ModelInstance instance, TRITONBACKEND_Request request,
+    @Cast("const bool") boolean success, @Cast("const uint64_t") long exec_start_ns,
+    @Cast("const uint64_t") long compute_start_ns, @Cast("const uint64_t") long compute_end_ns,
+    @Cast("const uint64_t") long exec_end_ns);
+
+/** Record statistics for the execution of an entire batch of
+ *  inference requests.
+ * 
+ *  All timestamps should be non-zero values reported in nanoseconds
+ *  and should be collected using
+ *  std::chrono::steady_clock::now().time_since_epoch() or the equivalent.
+ *  See TRITONBACKEND_ModelInstanceReportStatistics for more information about
+ *  the timestamps.
+ * 
+ *  'batch_size' is the sum of the batch sizes for the individual
+ *  requests that were delivered together in the call to
+ *  TRITONBACKEND_ModelInstanceExecute. For example, if three requests
+ *  are passed to TRITONBACKEND_ModelInstanceExecute and those
+ *  requests have batch size 1, 2, and 3; then 'batch_size' should be
+ *  set to 6.
+ * 
+ *  @param instance The model instance.
+ *  @param batch_size Combined batch size of all the individual
+ *  requests executed in the batch.
+ *  @param exec_start_ns Timestamp for the start of execution.
+ *  @param compute_start_ns Timestamp for the start of execution
+ *  computations.
+ *  @param compute_end_ns Timestamp for the end of execution
+ *  computations.
+ *  @param exec_end_ns Timestamp for the end of execution.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceReportBatchStatistics(
+    TRITONBACKEND_ModelInstance instance, @Cast("const uint64_t") long batch_size,
+    @Cast("const uint64_t") long exec_start_ns, @Cast("const uint64_t") long compute_start_ns,
+    @Cast("const uint64_t") long compute_end_ns, @Cast("const uint64_t") long exec_end_ns);
+
+
+/**
+ *  The following functions can be implemented by a backend. Functions
+ *  indicated as required must be implemented or the backend will fail
+ *  to load.
+ * 
+ <p>
+ *  Initialize a backend. This function is optional, a backend is not
+ *  required to implement it. This function is called once when a
+ *  backend is loaded to allow the backend to initialize any state
+ *  associated with the backend. A backend has a single state that is
+ *  shared across all models that use the backend.
+ * 
+ *  @param backend The backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_Initialize(
+    TRITONBACKEND_Backend backend);
+
+/** Finalize for a backend. This function is optional, a backend is
+ *  not required to implement it. This function is called once, just
+ *  before the backend is unloaded. All state associated with the
+ *  backend should be freed and any threads created for the backend
+ *  should be exited/joined before returning from this function.
+ * 
+ *  @param backend The backend.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_Finalize(
+    TRITONBACKEND_Backend backend);
+
+/** Initialize for a model. This function is optional, a backend is
+ *  not required to implement it. This function is called once when a
+ *  model that uses the backend is loaded to allow the backend to
+ *  initialize any state associated with the model. The backend should
+ *  also examine the model configuration to determine if the
+ *  configuration is suitable for the backend. Any errors reported by
+ *  this function will prevent the model from loading.
+ * 
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInitialize(
+    TRITONBACKEND_Model model);
+
+/** Finalize for a model. This function is optional, a backend is not
+ *  required to implement it. This function is called once for a
+ *  model, just before the model is unloaded from Triton. All state
+ *  associated with the model should be freed and any threads created
+ *  for the model should be exited/joined before returning from this
+ *  function.
+ * 
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelFinalize(
+    TRITONBACKEND_Model model);
+
+/** Initialize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once when a model instance is created to allow the backend to
+ *  initialize any state associated with the instance.
+ * 
+ *  @param instance The model instance.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceInitialize(
+    TRITONBACKEND_ModelInstance instance);
+
+/** Finalize for a model instance. This function is optional, a
+ *  backend is not required to implement it. This function is called
+ *  once for an instance, just before the corresponding model is
+ *  unloaded from Triton. All state associated with the instance
+ *  should be freed and any threads created for the instance should be
+ *  exited/joined before returning from this function.
+ * 
+ *  @param instance The model instance.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceFinalize(
+    TRITONBACKEND_ModelInstance instance);
+
+/** Execute a batch of one or more requests on a model instance. This
+ *  function is required. Triton will not perform multiple
+ *  simultaneous calls to this function for a given model 'instance';
+ *  however, there may be simultaneous calls for different model
+ *  instances (for the same or different models).
+ * 
+ *  If an error is returned the ownership of the request objects
+ *  remains with Triton and the backend must not retain references to
+ *  the request objects or access them in any way.
+ * 
+ *  If success is returned, ownership of the request objects is
+ *  transferred to the backend and it is then responsible for creating
+ *  responses and releasing the request objects. Note that even though
+ *  ownership of the request objects is transferred to the backend, the
+ *  ownership of the buffer holding request pointers is returned back
+ *  to Triton upon return from TRITONBACKEND_ModelInstanceExecute. If
+ *  any request objects need to be maintained beyond
+ *  TRITONBACKEND_ModelInstanceExecute, then the pointers must be copied
+ *  out of the array within TRITONBACKEND_ModelInstanceExecute.
+ * 
+ *  @param instance The model instance.
+ *  @param requests The requests.
+ *  @param request_count The number of requests in the batch.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @Cast("TRITONBACKEND_Request**") PointerPointer requests,
+    @Cast("const uint32_t") int request_count);
+public static native TRITONSERVER_Error TRITONBACKEND_ModelInstanceExecute(
+    TRITONBACKEND_ModelInstance instance, @ByPtrPtr TRITONBACKEND_Request requests,
+    @Cast("const uint32_t") int request_count);
+
+
+// #ifdef __cplusplus
+// #endif
+
+
+// Parsed from tritonrepoagent.h
+
+// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+// #pragma once
+
+// #include <stddef.h>
+// #include <stdint.h>
+// #include "triton/core/tritonserver.h"
+
+// #ifdef __cplusplus
+// #endif
+
+// #ifdef _COMPILING_TRITONREPOAGENT
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllexport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllimport)
+// #elif defined(__GNUC__)
+// #define TRITONREPOAGENT_DECLSPEC __attribute__((__visibility__("default")))
+// #define TRITONREPOAGENT_ISPEC
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// #endif
+// #else
+// #if defined(_MSC_VER)
+// #define TRITONREPOAGENT_DECLSPEC __declspec(dllimport)
+// #define TRITONREPOAGENT_ISPEC __declspec(dllexport)
+// #else
+// #define TRITONREPOAGENT_DECLSPEC
+// #define TRITONREPOAGENT_ISPEC
+// Targeting ../tritonserver/TRITONREPOAGENT_Agent.java
+
+
+// Targeting ../tritonserver/TRITONREPOAGENT_AgentModel.java
+
+
+
+/**
+ *  TRITONREPOAGENT API Version
+ * 
+ *  The TRITONREPOAGENT API is versioned with major and minor version
+ *  numbers. Any change to the API that does not impact backwards
+ *  compatibility (for example, adding a non-required function)
+ *  increases the minor version number. Any change that breaks
+ *  backwards compatibility (for example, deleting or changing the
+ *  behavior of a function) increases the major version number. A
+ *  repository agent should check that the API version used to compile
+ *  the agent is compatible with the API version of the Triton server
+ *  that it is running in. This is typically done by code similar to
+ *  the following which makes sure that the major versions are equal
+ *  and that the minor version of Triton is >= the minor version used
+ *  to build the agent.
+ * 
+ *    uint32_t api_version_major, api_version_minor;
+ *    TRITONREPOAGENT_ApiVersion(&api_version_major, &api_version_minor);
+ *    if ((api_version_major != TRITONREPOAGENT_API_VERSION_MAJOR) ||
+ *        (api_version_minor < TRITONREPOAGENT_API_VERSION_MINOR)) {
+ *      return TRITONSERVER_ErrorNew(
+ *        TRITONSERVER_ERROR_UNSUPPORTED,
+ *        "triton repository agent API version does not support this agent");
+ *    }
+ *  */
+public static final int TRITONREPOAGENT_API_VERSION_MAJOR = 0;
+
+///
+public static final int TRITONREPOAGENT_API_VERSION_MINOR = 1;
+
+/** Get the TRITONREPOAGENT API version supported by Triton. This
+ *  value can be compared against the
+ *  TRITONREPOAGENT_API_VERSION_MAJOR and
+ *  TRITONREPOAGENT_API_VERSION_MINOR used to build the agent to
+ *  ensure that Triton is compatible with the agent.
+ * 
+ *  @param major Returns the TRITONREPOAGENT API major version supported
+ *  by Triton.
+ *  @param minor Returns the TRITONREPOAGENT API minor version supported
+ *  by Triton.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntPointer major, @Cast("uint32_t*") IntPointer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") IntBuffer major, @Cast("uint32_t*") IntBuffer minor);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ApiVersion(
+    @Cast("uint32_t*") int[] major, @Cast("uint32_t*") int[] minor);
+
+/** TRITONREPOAGENT_ArtifactType
+ * 
+ *  The ways that the files that make up a model's repository content
+ *  are communicated between Triton and the agent.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a locally
+ *      accessible filesystem. The agent can access these files using
+ *      an appropriate filesystem API.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      communicated to and from the repository agent via a remote filesystem.
+ *      The remote filesystem path follows the same convention as is used for
+ *      repository paths, for example, "s3://" prefix indicates an S3 path.
+ *  */
+/** enum TRITONREPOAGENT_ArtifactType */
+public static final int
+  TRITONREPOAGENT_ARTIFACT_FILESYSTEM = 0,
+  TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM = 1;
+
+/** TRITONREPOAGENT_ActionType
+ * 
+ *  Types of repository actions that can be handled by an agent.
+ *  The lifecycle of a TRITONREPOAGENT_AgentModel begins with a call to
+ *  TRITONREPOAGENT_ModelInitialize and ends with a call to
+ *  TRITONREPOAGENT_ModelFinalize. Between those calls the current lifecycle
+ *  state of the model is communicated by calls to TRITONREPOAGENT_ModelAction.
+ *  Possible lifecycles are:
+ * 
+ *  LOAD -> LOAD_COMPLETE -> UNLOAD -> UNLOAD_COMPLETE
+ *  LOAD -> LOAD_FAIL
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD: A model is being loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_COMPLETE: The model load completed
+ *      successfully and the model is now loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_LOAD_FAIL: The model load did not complete
+ *      successfully. The model is not loaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD: The model is being unloaded.
+ * 
+ *    TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE: The model unload is complete.
+ *  */
+/** enum TRITONREPOAGENT_ActionType */
+public static final int
+  TRITONREPOAGENT_ACTION_LOAD = 0,
+  TRITONREPOAGENT_ACTION_LOAD_COMPLETE = 1,
+  TRITONREPOAGENT_ACTION_LOAD_FAIL = 2,
+  TRITONREPOAGENT_ACTION_UNLOAD = 3,
+  TRITONREPOAGENT_ACTION_UNLOAD_COMPLETE = 4;
+
+/** Get the location of the files that make up the model. The
+ *  'location' communicated depends on how the model is being
+ *  communicated to the agent as indicated by 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to the agent via the local
+ *      filesytem. 'location' returns the full path to the directory
+ *      in the model repository that contains the model's
+ *      artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The
+ *      contents of the directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents. Use
+ *      TRITONREPOAGENT_RepositoryAcquire to get a location that can be
+ *      used to modify the model repository contents.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to the agent via a remote filesystem.
+ *      'location' returns the full path to the remote directory that contains
+ *      the model's artifacts. The returned location string is owned by Triton,
+ *      not the caller, and so should not be modified or freed. The contents of
+ *      the remote directory are owned by Triton, not the agent,
+ *      and so the agent should not delete or modify the contents.
+ *      Use TRITONREPOAGENT_ModelRepositoryLocationAcquire to get a location
+ *      that can be used to write updated model repository contents.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type Returns the artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntPointer artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") IntBuffer artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocation(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("TRITONREPOAGENT_ArtifactType*") int[] artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Acquire a location where the agent can produce a new version of
+ *  the model repository files. This is a convenience method to create
+ *  a temporary directory for the agent. The agent is responsible for
+ *  calling TRITONREPOAGENT_ModelRepositoryLocationDelete in
+ *  TRITONREPOAGENT_ModelFinalize to delete the location. Initially the
+ *  acquired location is empty. The 'location' communicated depends on
+ *  the requested 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The location is a directory
+ *      on the local filesystem. 'location' returns the full path to
+ *      an empty directory that the agent should populate with the
+ *      model's artifacts. The returned location string is owned by
+ *      Triton, not the agent, and so should not be modified or freed.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") PointerPointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr BytePointer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr ByteBuffer location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationAcquire(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char**") @ByPtrPtr byte[] location);
+
+/** Discard and release ownership of a previously acquired location
+ *  and its contents. The agent must not access or modify the location
+ *  or its contents after this call.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param path The location to release.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryLocationRelease(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const char*") BytePointer location);
+
+/** Inform Triton that the specified repository location should be used for
+ *  the model in place of the original model repository. This method can only be
+ *  called when TRITONREPOAGENT_ModelAction is invoked with
+ *  TRITONREPOAGENT_ACTION_LOAD. The 'location' The 'location'
+ *  communicated depends on how the repository is being
+ *  communicated to Triton as indicated by 'artifact_type'.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_FILESYSTEM: The model artifacts are
+ *      made available to Triton via the local filesytem. 'location' returns
+ *      the full path to the directory. Ownership of the contents of the
+ *      returned directory are transferred to Triton and the agent should not
+ *      modified or freed the contents until TRITONREPOAGENT_ModelFinalize.
+ *      The local filesystem directory can be created using
+ *      TRITONREPOAGENT_ModelReopsitroyLocationAcquire or the agent can use
+ *      its own local filesystem API.
+ * 
+ *    TRITONREPOAGENT_ARTIFACT_REMOTE_FILESYSTEM: The model artifacts are
+ *      made available to Triton via a remote filesystem. 'location' returns
+ *      the full path to the remote filesystem directory. Ownership of the
+ *      contents of the returned directory are transferred to Triton and
+ *      the agent should not modified or freed the contents until
+ *      TRITONREPOAGENT_ModelFinalize.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param artifact_type The artifact type for the location.
+ *  @param path Returns the location.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, String location);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelRepositoryUpdate(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ArtifactType") int artifact_type, @Cast("const char*") BytePointer location);
+
+/** Get the number of agent parameters defined for a model.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param count Returns the number of input tensors.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntPointer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") IntBuffer count);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameterCount(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("uint32_t*") int[] count);
+
+/** Get a parameter name and value. The caller does not own the
+ *  returned strings and must not modify or delete them.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param index The index of the parameter. Must be 0 <= index <
+ *  count, where count is the value returned by
+ *  TRITONREPOAGENT_ModelParameterCount.
+ *  @param parameter_name Returns the name of the parameter.
+ *  @param parameter_value Returns the value of the parameter.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") PointerPointer parameter_name,
+    @Cast("const char**") PointerPointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr BytePointer parameter_name,
+    @Cast("const char**") @ByPtrPtr BytePointer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr ByteBuffer parameter_name,
+    @Cast("const char**") @ByPtrPtr ByteBuffer parameter_value);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelParameter(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int index, @Cast("const char**") @ByPtrPtr byte[] parameter_name,
+    @Cast("const char**") @ByPtrPtr byte[] parameter_value);
+
+/** Get the model configuration. The caller takes ownership of the
+ *  message object and must call TRITONSERVER_MessageDelete to release
+ *  the object. If the model repository does not contain a
+ *  config.pbtxt file then 'model_config' is returned as nullptr.
+ * 
+ *  @param agent The agent.
+ *  @param model The model.
+ *  @param config_version The model configuration will be returned in
+ *  a format matching this version. If the configuration cannot be
+ *  represented in the requested version's format then an error will
+ *  be returned. Currently only version 1 is supported.
+ *  @param model_config Returns the model configuration as a message.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @Cast("TRITONSERVER_Message**") PointerPointer model_config);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelConfig(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const uint32_t") int config_version, @ByPtrPtr TRITONSERVER_Message model_config);
+
+/** Get the user-specified state associated with the model.
+ * 
+ *  @param model The agent model.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelState(
+    TRITONREPOAGENT_AgentModel model, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the model.
+ * 
+ *  @param model The agent model.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelSetState(
+    TRITONREPOAGENT_AgentModel model, Pointer state);
+
+/** Get the user-specified state associated with the agent.
+ * 
+ *  @param agent The agent.
+ *  @param state Returns the user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") PointerPointer state);
+public static native TRITONSERVER_Error TRITONREPOAGENT_State(
+    TRITONREPOAGENT_Agent agent, @Cast("void**") @ByPtrPtr Pointer state);
+
+/** Set the user-specified state associated with the agent.
+ * 
+ *  @param agent The agent.
+ *  @param state The user state, or nullptr if no user state.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_SetState(
+    TRITONREPOAGENT_Agent agent, Pointer state);
+
+/**
+ *  The following functions can be implemented by an agent. Functions
+ *  indicated as required must be implemented or the agent will fail
+ *  to load.
+ * 
+ <p>
+ *  Initialize an agent. This function is optional. This function is
+ *  called once when an agent is loaded to allow the agent to
+ *  initialize any state associated with the agent. An agent has a
+ *  single state that is shared across all invocations of the agent.
+ * 
+ *  @param agent The agent.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_Initialize(
+    TRITONREPOAGENT_Agent agent);
+
+/** Finalize for an agent. This function is optional. This function is
+ *  called once, just before the agent is unloaded. All state
+ *  associated with the agent should be freed and any threads created
+ *  for the agent should be exited/joined before returning from this
+ *  function.
+ * 
+ *  @param agent The agent.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_Finalize(
+    TRITONREPOAGENT_Agent agent);
+
+/** Initialize a model associated with an agent. This function is optional.
+ *  This function is called once when an agent model's lifecycle begins to allow
+ *  the agent model to initialize any state associated with it. An agent model
+ *  has a single state that is shared across all the lifecycle of the agent
+ *  model.
+ * 
+ *  @param agent The agent to be associated with the model.
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelInitialize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+
+/** Finalize for a model. This function is optional. This function is
+ *  called once, just before the end of the agent model's lifecycle. All state
+ *  associated with the agent model should be freed and any threads created
+ *  for the agent model should be exited/joined before returning from this
+ *  function. If the model acquired a model location using
+ *  TRITONREPOAGENT_ModelRepositoryLocationAcquire, it must call
+ *  TRITONREPOAGENT_ModelRepositoryLocationRelease to release that location.
+ * 
+ *  @param agent The agent associated with the model.
+ *  @param model The model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+
+///
+///
+///
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelFinalize(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model);
+
+/** Handle an action for a specified model. This function is
+ *  required. Triton will not perform multiple simultaneous calls to
+ *  this function for a given agent and model; however, there may be
+ *  simultaneous calls for the agent for different models.
+ * 
+ *  If the agent does not handle the action the agent should
+ *  immediately return success (nullptr).
+ * 
+ *  Any modification to the model's repository must be made when 'action_type'
+ *  is TRITONREPOAGENT_ACTION_LOAD.
+ *  To modify the model's repository the agent must either acquire a mutable
+ *  location via TRITONREPOAGENT_ModelRepositoryLocationAcquire
+ *  or its own managed location, report the location to Triton via
+ *  TRITONREPOAGENT_ModelRepositoryUpdate, and then return
+ *  success (nullptr). If the agent does not need to make any changes
+ *  to the model repository it should not call
+ *  TRITONREPOAGENT_ModelRepositoryUpdate and then return success.
+ *  To indicate that a model load should fail return a non-success status.
+ * 
+ *  @param agent The agent.
+ *  @param model The model that is the target of the action.
+ *  \action_type The type of action the agent should handle for the model.
+ *  @return a TRITONSERVER_Error indicating success or failure. */
+public static native TRITONSERVER_Error TRITONREPOAGENT_ModelAction(
+    TRITONREPOAGENT_Agent agent, TRITONREPOAGENT_AgentModel model,
+    @Cast("const TRITONREPOAGENT_ActionType") int action_type);
+
+// #ifdef __cplusplus
+// #endif
+
+
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
new file mode 100644
index 00000000000..f67238a4484
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Backend.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Backend extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Backend() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Backend(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
new file mode 100644
index 00000000000..d5733453fef
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Input.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Input extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Input() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Input(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
new file mode 100644
index 00000000000..365b5f33ab6
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_MemoryManager.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_MemoryManager extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_MemoryManager() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_MemoryManager(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
new file mode 100644
index 00000000000..900f251464f
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Model.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Model extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Model() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Model(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
new file mode 100644
index 00000000000..66fbfbf7901
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ModelInstance.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_ModelInstance extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_ModelInstance() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_ModelInstance(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
new file mode 100644
index 00000000000..83fa1b7d53a
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Output.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Output extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Output() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Output(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
new file mode 100644
index 00000000000..d8a5e96510c
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Request.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Request extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Request() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Request(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
new file mode 100644
index 00000000000..9acbf56eb49
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_Response.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_Response extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_Response() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_Response(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
new file mode 100644
index 00000000000..bdecf0e62bb
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONBACKEND_ResponseFactory.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONBACKEND_ResponseFactory extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONBACKEND_ResponseFactory() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONBACKEND_ResponseFactory(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
new file mode 100644
index 00000000000..dec4c76ade6
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_Agent.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONREPOAGENT_Agent extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONREPOAGENT_Agent() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONREPOAGENT_Agent(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
new file mode 100644
index 00000000000..e1a494c1036
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONREPOAGENT_AgentModel.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONREPOAGENT_AgentModel extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONREPOAGENT_AgentModel() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONREPOAGENT_AgentModel(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
new file mode 100644
index 00000000000..f8774fc5b6a
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Error.java
@@ -0,0 +1,38 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+// #endif
+// #endif
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Error extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Error() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Error(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
new file mode 100644
index 00000000000..30c463566b9
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequest.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceRequest extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceRequest() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceRequest(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
new file mode 100644
index 00000000000..a35db367e31
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceRequestReleaseFn_t.java
@@ -0,0 +1,64 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for inference request release callback function. The callback
+ *  indicates what type of release is being performed on the request
+ *  and for some of these the callback function takes ownership of the
+ *  TRITONSERVER_InferenceRequest object. The 'userp' data is the data
+ *  provided as 'request_release_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetReleaseCallback.
+ * 
+ *  One or more flags will be specified when the callback is invoked,
+ *  and the callback must take the following actions:
+ * 
+ *    - TRITONSERVER_REQUEST_RELEASE_ALL: The entire inference request
+ *      is being released and ownership is passed to the callback
+ *      function. Triton will not longer access the 'request' object
+ *      itself nor any input tensor data associated with the
+ *      request. The callback should free or otherwise manage the
+ *      'request' object and all associated tensor data.
+ * 
+ *  Note that currently TRITONSERVER_REQUEST_RELEASE_ALL should always
+ *  be set when the callback is invoked but in the future that may
+ *  change, so the callback should explicitly check for the flag
+ *  before taking ownership of the request object.
+ *  */
+
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceRequestReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceRequestReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceRequestReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceRequest request, @Cast("const uint32_t") int flags, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
new file mode 100644
index 00000000000..6f28a7329b3
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponse.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceResponse extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceResponse() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceResponse(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
new file mode 100644
index 00000000000..0e964d3b075
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceResponseCompleteFn_t.java
@@ -0,0 +1,59 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for callback function indicating that an inference response
+ *  has completed. The callback function takes ownership of the
+ *  TRITONSERVER_InferenceResponse object. The 'userp' data is the
+ *  data provided as 'response_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ * 
+ *  One or more flags may be specified when the callback is invoked:
+ * 
+ *    - TRITONSERVER_RESPONSE_COMPLETE_FINAL: Indicates that no more
+ *      responses will be generated for a given request (more
+ *      specifically, that no more responses will be generated for the
+ *      inference request that set this callback and 'userp'). When
+ *      this flag is set 'response' may be a response object or may be
+ *      nullptr. If 'response' is not nullptr, then 'response' is the
+ *      last response that Triton will produce for the request. If
+ *      'response' is nullptr then Triton is indicating that no more
+ *      responses will be produced for the request. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceResponseCompleteFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceResponseCompleteFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceResponseCompleteFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceResponse response, @Cast("const uint32_t") int flags,
+    Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
new file mode 100644
index 00000000000..9cd501b42ba
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTrace.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTrace extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_InferenceTrace() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_InferenceTrace(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
new file mode 100644
index 00000000000..38cae917a28
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceActivityFn_t.java
@@ -0,0 +1,47 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for trace activity callback function. This callback function
+ *  is used to report activity occurring for a trace. This function
+ *  does not take ownership of 'trace' and so any information needed
+ *  from that object must be copied before returning. The 'userp' data
+ *  is the same as what is supplied in the call to
+ *  TRITONSERVER_InferenceTraceNew. */
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTraceActivityFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceTraceActivityFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceTraceActivityFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceTrace trace,
+    @Cast("TRITONSERVER_InferenceTraceActivity") int activity, @Cast("uint64_t") long timestamp_ns,
+    Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
new file mode 100644
index 00000000000..ef311def896
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_InferenceTraceReleaseFn_t.java
@@ -0,0 +1,48 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for trace release callback function. This callback function
+ *  is called when all activity for the trace has completed. The
+ *  callback function takes ownership of the
+ *  TRITONSERVER_InferenceTrace object. The 'userp' data is the same
+ *  as what is supplied in the call to TRITONSERVER_InferenceTraceNew. */
+
+///
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_InferenceTraceReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_InferenceTraceReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_InferenceTraceReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native void call(
+    TRITONSERVER_InferenceTrace trace, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
new file mode 100644
index 00000000000..fd1a25c8413
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Message.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Message extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Message() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Message(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
new file mode 100644
index 00000000000..e1a050cf843
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Metrics.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Metrics extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Metrics() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Metrics(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
new file mode 100644
index 00000000000..01449657612
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocator.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocator extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_ResponseAllocator() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_ResponseAllocator(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
new file mode 100644
index 00000000000..ede55743410
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorAllocFn_t.java
@@ -0,0 +1,81 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** TRITONSERVER_ResponseAllocator
+ * 
+ *  Object representing a memory allocator for output tensors in an
+ *  inference response.
+ * 
+ <p>
+ *  Type for allocation function that allocates a buffer to hold an
+ *  output tensor.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param tensor_name The name of the output tensor to allocate for.
+ *  @param byte_size The size of the buffer to allocate.
+ *  @param memory_type The type of memory that the caller prefers for
+ *  the buffer allocation.
+ *  @param memory_type_id The ID of the memory that the caller prefers
+ *  for the buffer allocation.
+ *  @param userp The user data pointer that is provided as
+ *  'response_allocator_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param buffer Returns a pointer to the allocated memory.
+ *  @param buffer_userp Returns a user-specified value to associate
+ *  with the buffer, or nullptr if no user-specified value should be
+ *  associated with the buffer. This value will be provided in the
+ *  call to TRITONSERVER_ResponseAllocatorReleaseFn_t when the buffer
+ *  is released and will also be returned by
+ *  TRITONSERVER_InferenceResponseOutput.
+ *  @param actual_memory_type Returns the type of memory where the
+ *  allocation resides. May be different than the type of memory
+ *  requested by 'memory_type'.
+ *  @param actual_memory_type_id Returns the ID of the memory where
+ *  the allocation resides. May be different than the ID of the memory
+ *  requested by 'memory_type_id'.
+ *  @return a TRITONSERVER_Error object if a failure occurs while
+ *  attempting an allocation. If an error is returned all other return
+ *  values will be ignored. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorAllocFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorAllocFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorAllocFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, String tensor_name,
+    @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id, Pointer userp, @Cast("void**") PointerPointer buffer, @Cast("void**") PointerPointer buffer_userp,
+    @Cast("TRITONSERVER_MemoryType*") IntPointer actual_memory_type,
+    @Cast("int64_t*") LongPointer actual_memory_type_id);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
new file mode 100644
index 00000000000..eaa984d8f70
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorReleaseFn_t.java
@@ -0,0 +1,60 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for function that is called when the server no longer holds
+ *  any reference to a buffer allocated by
+ *  TRITONSERVER_ResponseAllocatorAllocFn_t. In practice this function
+ *  is typically called when the response object associated with the
+ *  buffer is deleted by TRITONSERVER_InferenceResponseDelete.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param buffer Pointer to the buffer to be freed.
+ *  @param buffer_userp The user-specified value associated
+ *  with the buffer in TRITONSERVER_ResponseAllocatorAllocFn_t.
+ *  @param byte_size The size of the buffer.
+ *  @param memory_type The type of memory holding the buffer.
+ *  @param memory_type_id The ID of the memory holding the buffer.
+ *  @return a TRITONSERVER_Error object if a failure occurs while
+ *  attempting the release. If an error is returned Triton will not
+ *  attempt to release the buffer again. */
+
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorReleaseFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorReleaseFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorReleaseFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, Pointer buffer, Pointer buffer_userp,
+    @Cast("size_t") long byte_size, @Cast("TRITONSERVER_MemoryType") int memory_type,
+    @Cast("int64_t") long memory_type_id);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
new file mode 100644
index 00000000000..62253b4bed1
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ResponseAllocatorStartFn_t.java
@@ -0,0 +1,55 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+/** Type for function that is called to indicate that subsequent
+ *  allocation requests will refer to a new response.
+ * 
+ *  @param allocator The allocator that is provided in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @param userp The user data pointer that is provided as
+ *  'response_allocator_userp' in the call to
+ *  TRITONSERVER_InferenceRequestSetResponseCallback.
+ *  @return a TRITONSERVER_Error object if a failure occurs. */
+
+///
+///
+///
+///
+///
+///
+@Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ResponseAllocatorStartFn_t extends FunctionPointer {
+    static { Loader.load(); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public    TRITONSERVER_ResponseAllocatorStartFn_t(Pointer p) { super(p); }
+    protected TRITONSERVER_ResponseAllocatorStartFn_t() { allocate(); }
+    private native void allocate();
+    public native TRITONSERVER_Error call(
+    TRITONSERVER_ResponseAllocator allocator, Pointer userp);
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
new file mode 100644
index 00000000000..ab656568bd4
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_Server.java
@@ -0,0 +1,35 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_Server extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_Server() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_Server(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
new file mode 100644
index 00000000000..4bb5d2c8892
--- /dev/null
+++ b/tritonserver/src/gen/java/org/bytedeco/tritonserver/tritonserver/TRITONSERVER_ServerOptions.java
@@ -0,0 +1,41 @@
+// Targeted by JavaCPP version 1.5.7-SNAPSHOT: DO NOT EDIT THIS FILE
+
+package org.bytedeco.tritonserver.tritonserver;
+
+import java.nio.*;
+import org.bytedeco.javacpp.*;
+import org.bytedeco.javacpp.annotation.*;
+
+import static org.bytedeco.javacpp.presets.javacpp.*;
+import org.bytedeco.cuda.cudart.*;
+import static org.bytedeco.cuda.global.cudart.*;
+import org.bytedeco.cuda.cublas.*;
+import static org.bytedeco.cuda.global.cublas.*;
+import org.bytedeco.cuda.cudnn.*;
+import static org.bytedeco.cuda.global.cudnn.*;
+import org.bytedeco.cuda.nvrtc.*;
+import static org.bytedeco.cuda.global.nvrtc.*;
+import org.bytedeco.tensorrt.nvinfer.*;
+import static org.bytedeco.tensorrt.global.nvinfer.*;
+import org.bytedeco.tensorrt.nvinfer_plugin.*;
+import static org.bytedeco.tensorrt.global.nvinfer_plugin.*;
+import org.bytedeco.tensorrt.nvonnxparser.*;
+import static org.bytedeco.tensorrt.global.nvonnxparser.*;
+import org.bytedeco.tensorrt.nvparsers.*;
+import static org.bytedeco.tensorrt.global.nvparsers.*;
+
+import static org.bytedeco.tritonserver.global.tritonserver.*;
+
+
+
+///
+///
+///
+///
+@Opaque @Properties(inherit = org.bytedeco.tritonserver.presets.tritonserver.class)
+public class TRITONSERVER_ServerOptions extends Pointer {
+    /** Empty constructor. Calls {@code super((Pointer)null)}. */
+    public TRITONSERVER_ServerOptions() { super((Pointer)null); }
+    /** Pointer cast constructor. Invokes {@link Pointer#Pointer(Pointer)}. */
+    public TRITONSERVER_ServerOptions(Pointer p) { super(p); }
+}
diff --git a/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
new file mode 100644
index 00000000000..d8ab9a806b1
--- /dev/null
+++ b/tritonserver/src/main/java/org/bytedeco/tritonserver/presets/tritonserver.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2021 Jack He, Samuel Audet
+ *
+ * Licensed either under the Apache License, Version 2.0, or (at your option)
+ * under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (subject to the "Classpath" exception),
+ * either version 2, or any later version (collectively, the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.gnu.org/licenses/
+ *     http://www.gnu.org/software/classpath/license.html
+ *
+ * or as provided in the LICENSE.txt file that accompanied this code.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.bytedeco.tritonserver.presets;
+
+import java.util.List;
+import org.bytedeco.javacpp.ClassProperties;
+import org.bytedeco.javacpp.LoadEnabled;
+import org.bytedeco.javacpp.Loader;
+import org.bytedeco.javacpp.annotation.Platform;
+import org.bytedeco.javacpp.annotation.Properties;
+import org.bytedeco.javacpp.tools.Info;
+import org.bytedeco.javacpp.tools.InfoMap;
+import org.bytedeco.javacpp.tools.InfoMapper;
+
+import org.bytedeco.cuda.presets.cudart;
+import org.bytedeco.cuda.presets.cublas;
+import org.bytedeco.cuda.presets.cudnn;
+import org.bytedeco.cuda.presets.nvrtc;
+import org.bytedeco.tensorrt.presets.nvinfer;
+import org.bytedeco.tensorrt.presets.nvinfer_plugin;
+import org.bytedeco.tensorrt.presets.nvonnxparser;
+import org.bytedeco.tensorrt.presets.nvparsers;
+
+/**
+ *
+ * @author Jack He
+ */
+@Properties(
+    inherit = {cublas.class, cudnn.class, nvrtc.class, nvinfer.class, nvinfer_plugin.class, nvonnxparser.class, nvparsers.class},
+    value = {
+        @Platform(
+            value = {"linux-arm64", "linux-ppc64le", "linux-x86_64", "windows-x86_64"},
+            include = {"tritonserver.h", "tritonbackend.h", "tritonrepoagent.h"},
+            exclude = {"<cudaGL.h>", "<cuda_gl_interop.h>"},
+            link = "tritonserver",
+            includepath = {"/opt/tritonserver/include/triton/core/", "/opt/tritonserver/include/", "/usr/include"},
+            linkpath = {"/opt/tritonserver/lib/"}
+        ),
+        @Platform(
+            value = "windows-x86_64",
+            includepath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/include/triton/core/",
+            linkpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/lib/",
+            preloadpath = "C:/Program Files/NVIDIA GPU Computing Toolkit/TritonServer/bin/"
+        )
+    },
+    target = "org.bytedeco.tritonserver.tritonserver",
+    global = "org.bytedeco.tritonserver.global.tritonserver"
+)
+public class tritonserver implements LoadEnabled, InfoMapper {
+    static { Loader.checkVersion("org.bytedeco", "tritonserver"); }
+
+    @Override public void init(ClassProperties properties) {
+        String platform = properties.getProperty("platform");
+        List<String> preloads = properties.get("platform.preload");
+        List<String> resources = properties.get("platform.preloadresource");
+
+        // Only apply this at load time since we don't want to copy the CUDA libraries here
+        if (!Loader.isLoadLibraries()) {
+            return;
+        }
+        int i = 0;
+        String[] libs = {"cudart", "cublasLt", "cublas", "cudnn", "nvrtc",
+                         "cudnn_ops_infer", "cudnn_ops_train", "cudnn_adv_infer",
+                         "cudnn_adv_train", "cudnn_cnn_infer", "cudnn_cnn_train",
+                         "nvinfer", "nvinfer_plugin", "nvonnxparser", "nvparsers"};
+        for (String lib : libs) {
+            if (platform.startsWith("linux")) {
+                lib += lib.startsWith("cudnn") ? "@.8" : lib.equals("cudart") ? "@.11.0" : lib.equals("nvrtc") ? "@.11.2" : "@.11";
+                lib += lib.startsWith("nvinfer") ? "@.8" : lib.equals("nvonnxparser") ? "@.8" : lib.equals("nvparsers") ? "@.8" :"@.8";
+            } else if (platform.startsWith("windows")) {
+                lib += lib.startsWith("cudnn") ? "64_8" : lib.equals("cudart") ? "64_110" : lib.equals("nvrtc") ? "64_112_0" : "64_11";
+                lib += lib.startsWith("nvinfer") ? "64_8" : lib.equals("nvonnxparser") ? "64_8" : lib.equals("nvparsers") ? "64_8" :"64_8";
+            } else {
+                continue; // no CUDA
+            }
+            if (!preloads.contains(lib)) {
+                preloads.add(i++, lib);
+            }
+        }
+        if (i > 0) {
+            resources.add("/org/bytedeco/cuda/");
+            resources.add("/org/bytedeco/tensorrt/");
+        }
+    }
+
+    public void map(InfoMap infoMap) {
+        infoMap.putFirst(new Info().enumerate(false))
+               .put(new Info("bool").cast().valueTypes("boolean").pointerTypes("boolean[]", "BoolPointer"))
+               .put(new Info("TRITONSERVER_EXPORT", "TRITONSERVER_DECLSPEC",
+                             "TRITONBACKEND_DECLSPEC", "TRITONBACKEND_ISPEC",
+                             "TRITONREPOAGENT_DECLSPEC", "TRITONREPOAGENT_ISPEC").cppTypes().annotations())
+        ;
+    }
+}
diff --git a/tritonserver/src/main/java9/module-info.java b/tritonserver/src/main/java9/module-info.java
new file mode 100644
index 00000000000..3fea113a11a
--- /dev/null
+++ b/tritonserver/src/main/java9/module-info.java
@@ -0,0 +1,8 @@
+module org.bytedeco.tritonserver {
+  requires transitive org.bytedeco.javacpp;
+  requires transitive org.bytedeco.cuda;
+  requires transitive org.bytedeco.tensorrt;
+  exports org.bytedeco.tritonserver.global;
+  exports org.bytedeco.tritonserver.presets;
+  exports org.bytedeco.tritonserver;
+}