From e2029403fee89fc6e239c6e90df05f119013588c Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek <kparzysz@quicinc.com>
Date: Wed, 15 Sep 2021 23:28:26 -0500
Subject: [PATCH] [Hexagon] Implement model launcher (#8986)

* [Hexagon] Implement model launcher

This implements a launcher that allows execution of ML models compiled
into a shared library on Hexagon DSP. It consists of two parts: the
Hexagon-side skel library and `launcher_android` to be used from
`adb shell`.

The launcher does not implement any performance-related optimizations,
it's built on top of the `graph_executor` from TVM runtime, and so it
executes a single layer at a time. This launcher should not be used to
measure performance (because if will be highly suboptimal), its main
purpose is to help in validating correctness.

* Address review comments: explanations and elaborations in README.md

* Rename cmake variables to be same as for TVM

- `HEXAGON_SDK_ROOT` -> `USE_HEXAGON_SDK`
- `HEXAGON_ARCH` -> `USE_HEXAGON_ARCH`

* Address more review comments

* Error out in cmake when USE_HEXAGON_SDK/USE_HEXAGON_ARCH are undefined

* Change FATAL_ERROR to SEND_ERROR in cmake file
---
 cmake/modules/HexagonSDK.cmake                |   5 +
 src/runtime/hexagon/launcher/CMakeLists.txt   | 156 ++++++++++++
 src/runtime/hexagon/launcher/README.md        | 175 +++++++++++++
 .../hexagon/launcher/launcher_android.cc      | 164 +++++++++++++
 src/runtime/hexagon/launcher/launcher_core.cc | 176 ++++++++++++++
 src/runtime/hexagon/launcher/launcher_core.h  | 132 ++++++++++
 .../hexagon/launcher/launcher_hexagon.cc      | 229 ++++++++++++++++++
 src/runtime/hexagon/launcher/launcher_main.cc | 148 +++++++++++
 src/runtime/hexagon/launcher/launcher_rpc.idl |  33 +++
 src/runtime/hexagon/launcher/launcher_util.cc |  68 ++++++
 src/runtime/hexagon/launcher/launcher_util.h  |  34 +++
 11 files changed, 1320 insertions(+)
 create mode 100644 src/runtime/hexagon/launcher/CMakeLists.txt
 create mode 100644 src/runtime/hexagon/launcher/README.md
 create mode 100644 src/runtime/hexagon/launcher/launcher_android.cc
 create mode 100644 src/runtime/hexagon/launcher/launcher_core.cc
 create mode 100644 src/runtime/hexagon/launcher/launcher_core.h
 create mode 100644 src/runtime/hexagon/launcher/launcher_hexagon.cc
 create mode 100644 src/runtime/hexagon/launcher/launcher_main.cc
 create mode 100644 src/runtime/hexagon/launcher/launcher_rpc.idl
 create mode 100644 src/runtime/hexagon/launcher/launcher_util.cc
 create mode 100644 src/runtime/hexagon/launcher/launcher_util.h

diff --git a/cmake/modules/HexagonSDK.cmake b/cmake/modules/HexagonSDK.cmake
index 9541f5be821c..42785116214e 100644
--- a/cmake/modules/HexagonSDK.cmake
+++ b/cmake/modules/HexagonSDK.cmake
@@ -76,6 +76,7 @@ function(find_hexagon_sdk_root HEXAGON_SDK_PATH HEXAGON_ARCH)
   # - HEXAGON_SDK_VERSION
   # - HEXAGON_SDK_INCLUDES
   # - HEXAGON_QURT_INCLUDES
+  # - HEXAGON_QURT_LIBS
   # - HEXAGON_RPCMEM_ROOT
   # - HEXAGON_REMOTE_ROOT
   # - HEXAGON_QAIC_EXE
@@ -95,6 +96,8 @@ function(find_hexagon_sdk_root HEXAGON_SDK_PATH HEXAGON_ARCH)
     set_parent(HEXAGON_QURT_INCLUDES
       "${HEXAGON_SDK_ROOT}/libs/common/qurt/${HEXARCH_DIR}/include/posix"
       "${HEXAGON_SDK_ROOT}/libs/common/qurt/${HEXARCH_DIR}/include/qurt")
+    set_parent(HEXAGON_QURT_LIBS
+      "${HEXAGON_SDK_ROOT}/libs/common/qurt/${HEXARCH_DIR}/lib")
     set_parent(HEXAGON_RPCMEM_ROOT "${HEXAGON_SDK_ROOT}/libs/common/rpcmem")
     set_parent(HEXAGON_REMOTE_ROOT
       "${HEXAGON_SDK_ROOT}/libs/common/remote/ship/android_Release_aarch64")
@@ -111,6 +114,8 @@ function(find_hexagon_sdk_root HEXAGON_SDK_PATH HEXAGON_ARCH)
     set_parent(HEXAGON_QURT_INCLUDES
       "${HEXAGON_SDK_ROOT}/rtos/qurt/${HEXARCH_DIR}/include/posix"
       "${HEXAGON_SDK_ROOT}/rtos/qurt/${HEXARCH_DIR}/include/qurt")
+    set_parent(HEXAGON_QURT_LIBS
+      "${HEXAGON_SDK_ROOT}/rtos/qurt/${HEXARCH_DIR}/lib/pic")
     set_parent(HEXAGON_RPCMEM_ROOT "${HEXAGON_SDK_ROOT}/ipc/fastrpc/rpcmem")
     set_parent(HEXAGON_REMOTE_ROOT  # libadsprpc.so
       "${HEXAGON_SDK_ROOT}/ipc/fastrpc/remote/ship/android_aarch64")
diff --git a/src/runtime/hexagon/launcher/CMakeLists.txt b/src/runtime/hexagon/launcher/CMakeLists.txt
new file mode 100644
index 000000000000..d3a2f4f8161d
--- /dev/null
+++ b/src/runtime/hexagon/launcher/CMakeLists.txt
@@ -0,0 +1,156 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cmake_minimum_required(VERSION 3.2)
+project(HexagonLauncher C CXX)
+
+if(NOT "${FASTRPC_LIBS}" STREQUAL "SKEL" AND
+   NOT "${FASTRPC_LIBS}" STREQUAL "STUB")
+  message(SEND_ERROR "Please set FASTRPC_LIBS to either SKEL or STUB")
+endif()
+
+if(NOT DEFINED USE_HEXAGON_SDK)
+  message(SEND_ERROR "Please set USE_HEXAGON_SDK to the location of Hexagon SDK")
+endif()
+if (NOT DEFINED USE_HEXAGON_ARCH)
+  message(SEND_ERROR "Please set USE_HEXAGON_ARCH to the Hexagon architecture version")
+endif()
+
+include(../../../../cmake/modules/HexagonSDK.cmake)
+
+find_hexagon_sdk_root("${USE_HEXAGON_SDK}" "${USE_HEXAGON_ARCH}")
+
+include_directories(SYSTEM ${HEXAGON_SDK_INCLUDES} ${HEXAGON_REMOTE_ROOT})
+
+set(QAIC_EXE "${HEXAGON_QAIC_EXE}")
+foreach(INCDIR IN LISTS HEXAGON_SDK_INCLUDES HEXAGON_REMOTE_ROOT)
+  list(APPEND QAIC_FLAGS "-I${INCDIR}")
+endforeach()
+
+set(LAUNCHER_SRC "${CMAKE_CURRENT_SOURCE_DIR}")
+set(CMAKE_SKIP_RPATH TRUE)
+
+# Qaic for the domain header.
+#
+# Don't add paths to these filenames, or otherwise cmake may spontaneously
+# add -o option to the qaic invocation (with an undesirable path).
+set(LAUNCHER_RPC_IDL "launcher_rpc.idl")
+set(LAUNCHER_RPC_H "launcher_rpc.h")
+set(LAUNCHER_RPC_SKEL_C "launcher_rpc_skel.c")
+set(LAUNCHER_RPC_STUB_C "launcher_rpc_stub.c")
+
+add_custom_command(
+  OUTPUT ${LAUNCHER_RPC_SKEL_C} ${LAUNCHER_RPC_STUB_C}
+    "${LAUNCHER_SRC}/${LAUNCHER_RPC_H}"
+  COMMAND ${QAIC_EXE} ${QAIC_FLAGS}
+    "${LAUNCHER_SRC}/${LAUNCHER_RPC_IDL}"
+  COMMAND ${CMAKE_COMMAND} -E rename "${LAUNCHER_RPC_H}"
+    "${LAUNCHER_SRC}/${LAUNCHER_RPC_H}"
+  MAIN_DEPENDENCY "${LAUNCHER_SRC}/${LAUNCHER_RPC_IDL}"
+)
+
+
+if("${FASTRPC_LIBS}" STREQUAL "SKEL")
+  # Skel libraries.
+  #
+  if (NOT DEFINED TVM_RUNTIME_HEXAGON)
+    message(SEND_ERROR "Please set TVM_RUNTIME_HEXAGON=/path/to/libtvm_runtime.a")
+  endif()
+
+  include_directories(SYSTEM ${HEXAGON_QURT_INCLUDES})
+  include_directories(
+    "${LAUNCHER_SRC}"
+    "${LAUNCHER_SRC}/../../../../include"
+    "${LAUNCHER_SRC}/../../../../3rdparty/dlpack/include"
+    "${LAUNCHER_SRC}/../../../../3rdparty/dmlc-core/include"
+  )
+  link_directories(${HEXAGON_QURT_LIBS})
+
+  add_definitions(-D_MACH_I32=int)
+  add_definitions(-DDMLC_CXX11_THREAD_LOCAL=0)
+  add_definitions(-DDMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+
+  # Extra compile flags (both C and C++).
+  set(EXTRA_COMP_FLAGS
+    "-O3"
+    "-m${USE_HEXAGON_ARCH}"
+  )
+  string(REGEX REPLACE ";" " " EXTRA_COMP_FLAGS_STR "${EXTRA_COMP_FLAGS}")
+  set(CMAKE_C_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_C_FLAGS}")
+  set(CMAKE_CXX_FLAGS "${EXTRA_COMP_FLAGS_STR} ${CMAKE_CXX_FLAGS}")
+
+  set(EXTRA_LINK_FLAGS
+    "-lposix"
+    "-lqurt"
+    "-Wl,--export-dynamic"
+    "-Wl,--whole-archive ${TVM_RUNTIME_HEXAGON} -Wl,--no-whole-archive"
+    "-Wl,--defsym=HEAP_SIZE=0x40000000"
+  )
+  string(REGEX REPLACE ";" " " EXTRA_LINK_FLAGS_STR "${EXTRA_LINK_FLAGS}")
+
+  set(SKEL_SRCS
+    "launcher_core.cc"
+    "launcher_hexagon.cc"
+  )
+  add_library(launcher_rpc_skel SHARED
+    "${LAUNCHER_SRC}/${LAUNCHER_RPC_H}"
+    "${LAUNCHER_RPC_SKEL_C}"
+    "${SKEL_SRCS}"
+  )
+
+  # Extra linker flags for linking shared libraries.
+  set_target_properties(launcher_rpc_skel PROPERTIES
+    LINK_FLAGS ${EXTRA_LINK_FLAGS_STR}
+  )
+else()
+  # Stub libraries.
+  #
+  if (NOT DEFINED TVM_RUNTIME_ANDROID)
+    message(SEND_ERROR "Please set TVM_RUNTIME_ANDROID=/path/to/libtvm_runtime.so")
+  endif()
+
+  include_directories(SYSTEM
+    "${HEXAGON_SDK_INCLUDES}"
+    "${HEXAGON_RPCMEM_ROOT}/inc"
+  )
+  include_directories(
+    "${LAUNCHER_SRC}"
+    "${LAUNCHER_SRC}/../../../../include"
+    "${LAUNCHER_SRC}/../../../../3rdparty/dlpack/include"
+    "${LAUNCHER_SRC}/../../../../3rdparty/dmlc-core/include"
+  )
+  link_directories(${HEXAGON_REMOTE_ROOT})
+
+  add_definitions(-DDMLC_USE_LOGGING_LIBRARY=<tvm/runtime/logging.h>)
+
+  set(STUB_SRCS
+    "launcher_android.cc"
+    "launcher_core.cc"
+    "launcher_main.cc"
+    "launcher_util.cc"
+  )
+
+  add_executable(launcher_android
+    "${STUB_SRCS}"
+    "${LAUNCHER_RPC_STUB_C}"
+  )
+  target_link_libraries(launcher_android cdsprpc log)
+
+  set_target_properties(launcher_android PROPERTIES
+    LINK_FLAGS "${TVM_RUNTIME_ANDROID}"
+  )
+endif()
diff --git a/src/runtime/hexagon/launcher/README.md b/src/runtime/hexagon/launcher/README.md
new file mode 100644
index 000000000000..a8a570918514
--- /dev/null
+++ b/src/runtime/hexagon/launcher/README.md
@@ -0,0 +1,175 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+# Hexagon Graph Launcher
+
+## Compilation
+
+The launcher consists of two parts: part running on Hexagon, and part running
+on Android. They need to be compiled separately. Since some source files are
+shared between these two parts, make sure to delete all object files between
+compilations. Compile the Hexagon code first.
+
+The supported Snapdragon architectures are 855, 865, and 888.
+
+### Prerequisites
+
+1. Android NDK version r19c or later.
+2. Hexagon SDK version 4.0.0 or later.
+
+Android NDK can be downloaded from https://developer.android.com/ndk.
+Hexagon SDK is available at //developer.qualcomm.com/software/hexagon-dsp-sdk.
+
+### Compilation of the Hexagon part
+
+1. Build the static version of TVM runtime for Hexagon. Use Hexagon clang
+   from the Hexagon SDK. This step is the same as building the shared version,
+   except at the cmake step, add `-DBUILD_STATIC_RUNTIME=ON`. The compilation
+   step should create `libtvm_runtime.a`.
+
+2. Create a subdirectory for the build files, and run `cmake` with the
+   following variables set:
+   - `FASTRPC_LIBS=SKEL`
+   - `USE_HEXAGON_SDK` to the path to the Hexagon SDK
+   - `CMAKE_C_COMPILER=hexagon-clang`
+   - `CMAKE_CXX_COMPILER=hexagon-clang++`
+   - `USE_HEXAGON_ARCH` to one of v65, v66, v68
+   - `TVM_RUNTIME_HEXAGON=/path/to/libtvm_runtime.a` _statically_ linked
+     TVM runtime
+
+   Make sure to provide the path to launcher's `CMakeLists.txt` directory
+   in `cmake` invocation.
+
+3. Run `make`. This will create `liblauncher_rpc_skel.so`.
+
+### Compilation of the Android part
+
+1. Build TVM runtime for Android, using clang for AArch64 from the Android
+   NDK. Unlike in the Hexagon case, this should be the dynamic library (which
+   is the default), i.e. `libtvm_runtime.so`.
+
+2. Create a subdirectory for the build files (different from the one used for
+   Hexagon files), and run `cmake` with the following variables set:
+   - `FASTRPC_LIBS=STUB`
+   - `USE_HEXAGON_SDK` to the path to the Hexagon SDK
+   - `CMAKE_C_COMPILER=aarch64-linux-android28-clang` (or later)
+   - `CMAKE_CXX_COMPILER=aarch64-linux-android28-clang++` (or later)
+   - `USE_HEXAGON_ARCH` to one of v65, v66, v68 (same as for the Hexagon part)
+   - `TVM_RUNTIME_ANDROID=/path/to/libtvm_runtime.so` dynamically or
+     statically linked TVM runtime
+
+3. Run `make`. This will create `launcher_android`.
+
+## Execution
+
+From the Android shell, do
+```
+./launcher_android --in_config input.json --out_config output.json
+```
+
+You may need to add the location of `libtvm_runtime.so` to `LD_LIBRARY_PATH`.
+See below for more information about the setup and launcher's inputs.
+
+### Preparation steps
+
+Copy the following binaries to the device:
+- `liblauncher_rpc_skel.so`: created by the compilation step for Hexagon,
+- `libgcc.so`: take this one from the Hexagon toolchain,
+- `launcher_android`: created by the compilation step for Android,
+- `libtvm_runtime.so`: built for Android.
+
+These are only the binaries related to the launcher itself. To run a model
+copy the shared object with the model and the model JSON file over to the
+device (both are obtained from relay).  Also, copy all input files for the
+model as well.
+
+The following snippet illustrates how to obtain the shared object and the
+JSON file from a TFLite model (using Inception V3 as an example):
+
+```
+# Skipped imports, etc.
+
+with open("inception_v3.tflite", "rb") as f:
+    tflite_model_buf = f.read()
+tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
+
+shape_dict = { "input": [1,299,299,3] }
+dtype_dict = { "input": "float32" }
+
+mod, params = relay.frontend.from_tflite(
+    tflite_model, shape_dict=shape_dict, dtype_dict=dtype_dict
+)
+
+target = tvm.target.hexagon('v68', link_params=True)
+with tvm.transform.PassContext(opt_level=3):
+    lib = relay.build(mod, target, target_host=target, params=params, mod_name="default")
+
+# Save model.so and model.json:
+with open('model.json', 'w') as f:
+    f.write(lib.get_graph_json())
+lib.get_lib().save('model.so')
+```
+
+The final thing is to prepare a JSON configuration file for the launcher.
+The JSON has two attributes describing the model: `model-library` and
+`model-json`, and an attribute `inputs`, which is a list of records, one
+for each input file.
+An input file record has three attributes: `file`, `shape`, and `dtype`.
+
+Below is an example of the input config file for Inception V3:
+```
+{
+  "model-library": "inceptionv3-float32.so",
+  "model-json": "inceptionv3-float32.json",
+  "inputs" : [
+    {
+      "file": "panda_299x299_fp.dat",
+      "shape": [1,299,299,3],
+      "dtype": "float32"
+    }
+  ]
+}
+```
+
+The launcher will then create the output JSON file (with the name given via
+`--out_config`) containing information about the execution time and the model
+outputs. The output JSON file has three attributes: "pcycles", "usecs" that
+contain the execution duration in terms of processor cycles and microseconds
+respectivaly, and an attribute `outputs`, which is a list of output file records
+whose syntax is identical to the input file records in the input file.
+A sample output JSON from running the Inception V3 model may look like
+```
+{
+  "pcycles": 112965680178,
+  "usecs": 79532302,
+  "outputs": [
+    {
+      "file": "output0.dat",
+      "shape": [1, 1001],
+      "dtype": "float32"
+    }
+  ]
+}
+```
+
+# Disclaimer
+
+The launcher does not perform any correctness verification. In order to verify
+correctness, the user needs to copy the output files from the device and
+verify their contents.
+
+This launcher is intended for use with prototyping and does not utilize any
+performance acceleration, as such the measured performance may be very poor.
diff --git a/src/runtime/hexagon/launcher/launcher_android.cc b/src/runtime/hexagon/launcher/launcher_android.cc
new file mode 100644
index 000000000000..c0e428cb63ca
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_android.cc
@@ -0,0 +1,164 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <AEEStdDef.h>
+#include <AEEStdErr.h>
+#include <remote.h>
+#include <rpcmem.h>
+
+#include <algorithm>
+#include <ios>
+#include <iostream>
+#include <string>
+#include <vector>
+
+#include "launcher_core.h"
+#include "launcher_rpc.h"
+
+AEEResult enable_unsigned_pd(bool enable) {
+  remote_rpc_control_unsigned_module data{static_cast<int>(enable), CDSP_DOMAIN_ID};
+  AEEResult rc = remote_session_control(DSPRPC_CONTROL_UNSIGNED_MODULE, &data, sizeof(data));
+  if (rc != AEE_SUCCESS) {
+    std::cout << "error " << (enable ? "enabling" : "disabling") << " unsigned PD\n";
+  }
+  return rc;
+}
+
+AEEResult set_remote_stack_size(int size) {
+  remote_rpc_thread_params th_data{CDSP_DOMAIN_ID, -1, size};
+  AEEResult rc = remote_session_control(FASTRPC_THREAD_PARAMS, &th_data, sizeof(th_data));
+  if (rc != AEE_SUCCESS) {
+    std::cout << "error setting remote stack size: " << std::hex << rc << '\n';
+  }
+  return rc;
+}
+
+struct RPCChannel : public ExecutionSession {
+  explicit RPCChannel(const std::string& uri) {
+    enable_unsigned_pd(true);
+    set_remote_stack_size(128 * 1024);
+
+    int rc = launcher_rpc_open(uri.c_str(), &handle);
+    if (rc != AEE_SUCCESS) {
+      handle = -1;
+    }
+  }
+
+  ~RPCChannel() {
+    if (handle == -1) {
+      return;
+    }
+
+    for (void* ptr : allocations) {
+      rpcmem_free(ptr);
+    }
+    if (model_loaded) {
+      unload_model();
+    }
+    launcher_rpc_close(handle);
+    handle = -1;
+  }
+
+  void* alloc_mem(size_t nbytes, size_t align) override {
+    void* host_ptr = rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, RPCMEM_DEFAULT_FLAGS, nbytes);
+    if (host_ptr != nullptr) {
+      allocations.push_back(host_ptr);
+    }
+    return host_ptr;
+  }
+
+  void free_mem(void* addr) override {
+    auto f = std::find(allocations.begin(), allocations.end(), addr);
+    if (f != allocations.end()) {
+      allocations.erase(f);
+      rpcmem_free(addr);
+    }
+  }
+
+  bool load_model(const std::string& model_path, const std::string& model_json) override {
+    AEEResult rc = launcher_rpc_load(handle, model_path.c_str(), model_json.c_str());
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error loading graph module: " << std::hex << rc << '\n';
+    } else {
+      model_loaded = true;
+    }
+    return rc == AEE_SUCCESS;
+  }
+
+  bool unload_model() override {
+    AEEResult rc = launcher_rpc_unload(handle);
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error unloading model: " << std::hex << rc << '\n';
+    }
+    model_loaded = false;
+    return rc == AEE_SUCCESS;
+  }
+
+  bool set_input(int input_idx, const tensor_meta* input_meta, const void* input_data) override {
+    AEEResult rc = launcher_rpc_set_input(
+        handle, input_idx, reinterpret_cast<const unsigned char*>(input_meta),
+        input_meta->meta_size(), reinterpret_cast<const unsigned char*>(input_data),
+        input_meta->data_size());
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error setting model input no." << input_idx << ": " << std::hex << rc << '\n';
+    }
+    return rc == AEE_SUCCESS;
+  }
+
+  bool run(uint64_t* pcycles, uint64_t* usecs) override {
+    AEEResult rc = launcher_rpc_run(handle, pcycles, usecs);
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error running model: " << std::hex << rc << '\n';
+    }
+    return rc == AEE_SUCCESS;
+  }
+
+  bool get_num_outputs(int* num_outputs) override {
+    AEEResult rc = launcher_rpc_get_num_outputs(handle, num_outputs);
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error getting number of outputs: " << std::hex << rc << '\n';
+    }
+    return rc == AEE_SUCCESS;
+  }
+
+  bool get_output(int output_idx, tensor_meta* output_meta, int meta_size, void* output_data,
+                  int data_size) override {
+    AEEResult rc = launcher_rpc_get_output(
+        handle, output_idx, reinterpret_cast<unsigned char*>(output_meta), meta_size,
+        reinterpret_cast<unsigned char*>(output_data), data_size);
+    if (rc != AEE_SUCCESS) {
+      std::cout << "error getting output no." << output_idx << ": " << std::hex << rc << '\n';
+    }
+    return rc == AEE_SUCCESS;
+  }
+
+  bool model_loaded = false;
+  remote_handle64 handle = -1;
+  std::vector<void*> allocations;
+};
+
+ExecutionSession* create_execution_session() {
+  auto* session = new RPCChannel(launcher_rpc_URI CDSP_DOMAIN);
+  if (session->handle == -1) {
+    delete session;
+    session = nullptr;
+    std::cout << "Error opening FastRPC channel\n";
+  }
+  return session;
+}
diff --git a/src/runtime/hexagon/launcher/launcher_core.cc b/src/runtime/hexagon/launcher/launcher_core.cc
new file mode 100644
index 000000000000..364e7abfd171
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_core.cc
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "launcher_core.h"
+
+#include <tvm/runtime/c_backend_api.h>
+#include <tvm/runtime/packed_func.h>
+#include <tvm/runtime/registry.h>
+
+#include <fstream>
+#include <ios>
+#include <iterator>
+#include <string>
+#include <vector>
+
+const std::string TensorConfig::file_key = "file";    // NOLINT(runtime/string)
+const std::string TensorConfig::shape_key = "shape";  // NOLINT(runtime/string)
+const std::string TensorConfig::dtype_key = "dtype";  // NOLINT(runtime/string)
+
+std::string tensor_meta::to_string() const {
+  std::stringstream out;
+  out << "ndim=" << ndim << ", dtype=" << tvm::runtime::DLDataType2String(dtype) << ", shape=";
+  for (int i = 0; i != ndim; ++i) {
+    out << shape[i];
+    if (i + 1 < ndim) {
+      out << 'x';
+    }
+  }
+  return out.str();
+}
+
+void TensorConfig::Load(dmlc::JSONReader* reader) {
+  reader->BeginObject();
+  std::string key;
+  while (!bad && reader->NextObjectItem(&key)) {
+    if (key == file_key) {
+      reader->Read(&file_name);
+    } else if (key == shape_key) {
+      reader->Read(&shape);
+      if (shape.empty()) {
+        std::cout << "error: empty shape\n";
+        bad = true;
+      }
+    } else if (key == dtype_key) {
+      reader->Read(&dtype);
+    } else {
+      std::cout << "unknown tensor config key: " << key << '\n';
+      bad = true;
+    }
+  }
+}
+
+void TensorConfig::Save(dmlc::JSONWriter* writer) const {
+  writer->BeginObject(true);
+  writer->WriteObjectKeyValue(file_key, file_name);
+  writer->WriteObjectKeyValue(shape_key, shape);
+  writer->WriteObjectKeyValue(dtype_key, dtype);
+  writer->EndObject();
+}
+
+void ModelConfig::Load(dmlc::JSONReader* reader) {
+  reader->BeginObject();
+  std::string key;
+  while (!bad && reader->NextObjectItem(&key)) {
+    if (key == "model-library") {
+      reader->Read(&model_library);
+    } else if (key == "model-json") {
+      reader->Read(&model_json);
+    } else if (key == "inputs") {
+      reader->Read(&inputs);
+      bad = std::any_of(inputs.begin(), inputs.end(), [](auto t) { return t.bad; });
+    } else {
+      std::cout << "unknown model config key: " << key << '\n';
+      bad = true;
+    }
+  }
+}
+
+void OutputConfig::Save(dmlc::JSONWriter* writer) const {
+  writer->BeginObject(true);
+  writer->WriteObjectKeyValue("pcycles", pcycles);
+  writer->WriteObjectKeyValue("usecs", usecs);
+  writer->WriteObjectKeyValue("outputs", outputs);
+  writer->EndObject();
+}
+
+bool read_model_config(const std::string& file_name, ModelConfig* model_config) {
+  if (model_config == nullptr) {
+    return false;
+  }
+  std::ifstream mfc(file_name);
+  if (!mfc.is_open()) {
+    return false;
+  }
+  dmlc::JSONReader reader(&mfc);
+  model_config->Load(&reader);
+  if (model_config->bad || !mfc) {
+    return false;
+  }
+  return true;
+}
+
+bool write_output_config(const std::string& file_name, OutputConfig* output_config) {
+  std::ofstream ofc(file_name);
+  if (!ofc.is_open()) {
+    return false;
+  }
+  dmlc::JSONWriter writer(&ofc);
+  output_config->Save(&writer);
+  if (!ofc) {
+    return false;
+  }
+  return true;
+}
+
+Model::Model(tvm::runtime::Module executor, tvm::runtime::Module module, std::string json)
+    : graph_executor(executor), graph_module(module), graph_json(json) {
+  // Lookup "run" ahead of time to reduce overhead in the model execution.
+  run = get_module_func(graph_executor, "run");
+}
+
+const tvm::runtime::PackedFunc get_runtime_func(const std::string& name) {
+  if (const tvm::runtime::PackedFunc* pf = tvm::runtime::Registry::Get(name)) {
+    return *pf;
+  }
+  return tvm::runtime::PackedFunc();
+}
+
+const tvm::runtime::PackedFunc get_module_func(tvm::runtime::Module module,
+                                               const std::string& name) {
+  return module.GetFunction(name, false);
+}
+
+void reset_device_api() {
+  const tvm::runtime::PackedFunc api = get_runtime_func("device_api.cpu");
+  tvm::runtime::Registry::Register("device_api.hexagon", true).set_body(api);
+}
+
+tvm::runtime::Module load_module(const std::string& file_name) {
+  static const tvm::runtime::PackedFunc loader = get_runtime_func("runtime.module.loadfile_so");
+  tvm::runtime::TVMRetValue rv = loader(file_name);
+  if (rv.type_code() == kTVMModuleHandle) {
+    return rv.operator tvm::runtime::Module();
+  }
+  return tvm::runtime::Module();
+}
+
+tvm::runtime::Module create_graph_executor(const std::string& graph_json,
+                                           tvm::runtime::Module graph_module,
+                                           tvm::runtime::Device device) {
+  std::string launcher_name = "tvm.graph_executor.create";
+
+  const tvm::runtime::PackedFunc create_executor = get_runtime_func(launcher_name);
+  uint64_t device_type = device.device_type;
+  uint64_t device_id = device.device_id;
+
+  // Use default param lookup function (linked into the module).
+  tvm::runtime::TVMRetValue rv = create_executor(graph_json, graph_module, device_type, device_id);
+  return rv.operator tvm::runtime::Module();
+}
diff --git a/src/runtime/hexagon/launcher/launcher_core.h b/src/runtime/hexagon/launcher/launcher_core.h
new file mode 100644
index 000000000000..e799e1c798cb
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_core.h
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_CORE_H_
+#define TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_CORE_H_
+
+#include <dlpack/dlpack.h>
+#include <dmlc/json.h>
+#include <tvm/runtime/data_type.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/packed_func.h>
+
+#include <string>
+#include <vector>
+
+struct tensor_meta {
+  int ndim;
+  DLDataType dtype;
+  int64_t shape[];
+
+  int meta_size() const { return meta_size(ndim); }
+  int data_size() const {
+    int size = tvm::runtime::DataType(dtype).bytes();
+    for (int d = 0; d != ndim; ++d) {
+      size *= shape[d];
+    }
+    return size;
+  }
+
+  static int meta_size(int ndim) { return sizeof(tensor_meta) + ndim * sizeof(int64_t); }
+
+  std::string to_string() const;
+};
+
+struct TensorConfig {
+  static const std::string file_key;
+  static const std::string shape_key;
+  static const std::string dtype_key;
+
+  std::string file_name;
+  std::vector<int> shape;
+  std::string dtype;
+  bool bad = false;
+
+  void Load(dmlc::JSONReader* reader);
+  void Save(dmlc::JSONWriter* writer) const;
+};
+
+struct ModelConfig {
+  std::string model_library;
+  std::string model_json;
+  std::vector<TensorConfig> inputs;
+  bool bad = false;
+
+  void Load(dmlc::JSONReader* reader);
+};
+
+struct OutputConfig {
+  uint64_t pcycles;
+  uint64_t usecs;
+  std::vector<TensorConfig> outputs;
+
+  void Save(dmlc::JSONWriter* writer) const;
+};
+
+struct Model {
+  Model(tvm::runtime::Module executor, tvm::runtime::Module module, std::string json);
+
+  tvm::runtime::Module graph_executor;
+  tvm::runtime::Module graph_module;
+  std::string graph_json;
+
+  static tvm::runtime::Device device() {
+    return tvm::runtime::Device{static_cast<DLDeviceType>(kDLHexagon), 0};
+  }
+
+  tvm::runtime::PackedFunc run;
+};
+
+struct ExecutionSession {
+  template <typename T>
+  T* alloc(size_t bytes, size_t align = 1) {
+    return reinterpret_cast<T*>(alloc_mem(bytes, align));
+  }
+  void free(void* ptr) { free_mem(ptr); }
+
+  virtual void* alloc_mem(size_t bytes, size_t align) = 0;
+  virtual void free_mem(void* ptr) = 0;
+
+  virtual bool load_model(const std::string& model_path, const std::string& model_json) = 0;
+  virtual bool unload_model() = 0;
+
+  virtual bool set_input(int input_idx, const tensor_meta* input_meta, const void* input_data) = 0;
+  virtual bool run(uint64_t* pcycles, uint64_t* usecs) = 0;
+  virtual bool get_num_outputs(int* num_outputs) = 0;
+  virtual bool get_output(int output_idx, tensor_meta* output_meta, int meta_size,
+                          void* output_data, int data_size) = 0;
+};
+
+bool read_model_config(const std::string& file_name, ModelConfig* model_config);
+bool write_output_config(const std::string& file_name, OutputConfig* output_config);
+
+void reset_device_api();
+
+tvm::runtime::Module load_module(const std::string& file_name);
+
+const tvm::runtime::PackedFunc get_runtime_func(const std::string& name);
+const tvm::runtime::PackedFunc get_module_func(tvm::runtime::Module module,
+                                               const std::string& name);
+
+tvm::runtime::Module create_graph_executor(const std::string& graph_json,
+                                           tvm::runtime::Module graph_module,
+                                           tvm::runtime::Device device);
+
+#endif  // TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_CORE_H_
diff --git a/src/runtime/hexagon/launcher/launcher_hexagon.cc b/src/runtime/hexagon/launcher/launcher_hexagon.cc
new file mode 100644
index 000000000000..0a5d1f55e0c2
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_hexagon.cc
@@ -0,0 +1,229 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+extern "C" {
+#include <AEEStdDef.h>
+#include <AEEStdErr.h>
+#include <HAP_farf.h>
+#include <HAP_perf.h>
+#include <qurt_error.h>
+#include <qurt_hvx.h>
+}
+
+#include <algorithm>
+#include <memory>
+#include <string>
+
+#include "launcher_core.h"
+#include "launcher_rpc.h"
+
+static std::unique_ptr<Model> TheModel;
+
+static AEEResult error_too_small(const std::string& func_name, const std::string& value_name,
+                                 int given, int needed) {
+  FARF(ERROR, "%s: %s value too small (%d), need at least %d", func_name.c_str(),
+       value_name.c_str(), given, needed);
+  return AEE_EBADPARM;
+}
+
+int __QAIC_HEADER(launcher_rpc_open)(const char* uri, remote_handle64* handle) {
+  *handle = 0;  // Just use any value.
+  reset_device_api();
+  return AEE_SUCCESS;
+}
+
+int __QAIC_HEADER(launcher_rpc_close)(remote_handle64 handle) {
+  // Comment to stop clang-format from single-lining this function.
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_load)(remote_handle64 handle, const char* module_path,
+                                           const char* graph_json) {
+  if (TheModel) {
+    // Need to unload first.
+    FARF(ERROR, "%s: model already loaded, unload first", __func__);
+    return AEE_EUNABLETOLOAD;
+  }
+
+  tvm::runtime::Module module = load_module(module_path);
+  tvm::runtime::Module executor = create_graph_executor(graph_json, module, Model::device());
+
+  TheModel = std::make_unique<Model>(executor, module, graph_json);
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_unload)(remote_handle64 handle) {
+  if (TheModel) {
+    TheModel.reset();
+  }
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_get_num_inputs)(remote_handle64 handle, int* num_inputs) {
+  if (!TheModel) {
+    // No model created.
+    return AEE_EBADSTATE;
+  }
+
+  tvm::runtime::PackedFunc get_num_inputs =
+      get_module_func(TheModel->graph_executor, "get_num_inputs");
+  *num_inputs = get_num_inputs();
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_set_input)(remote_handle64 handle, int input_idx,
+                                                const unsigned char* input_meta, int meta_size,
+                                                const unsigned char* input_value, int value_size) {
+  if (!TheModel) {
+    // No model created.
+    FARF(ERROR, "%s: no model created", __func__);
+    return AEE_EBADSTATE;
+  }
+
+  const auto* meta = reinterpret_cast<const tensor_meta*>(input_meta);
+  if (meta_size < meta->meta_size()) {
+    return error_too_small(__func__, "meta_size", meta_size, meta->meta_size());
+  }
+  if (value_size < meta->data_size()) {
+    return error_too_small(__func__, "value_size", value_size, meta->data_size());
+  }
+
+  DLTensor tensor{
+      const_cast<unsigned char*>(input_value),
+      Model::device(),
+      meta->ndim,
+      meta->dtype,
+      const_cast<int64_t*>(meta->shape),
+      /*strides*/ nullptr,
+      /*byte_offset*/ 0,
+  };
+  DLManagedTensor managed{tensor, /*manager_ctx*/ nullptr, /*deleter*/ nullptr};
+
+  auto input = tvm::runtime::NDArray::FromDLPack(&managed);
+
+  tvm::runtime::PackedFunc set_input = get_module_func(TheModel->graph_executor, "set_input");
+  set_input(input_idx, input);
+
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_get_num_outputs)(remote_handle64 handle, int* num_outputs) {
+  if (!TheModel) {
+    // No model created.
+    return AEE_EBADSTATE;
+  }
+
+  tvm::runtime::PackedFunc get_num_outputs =
+      get_module_func(TheModel->graph_executor, "get_num_outputs");
+  *num_outputs = get_num_outputs();
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_get_output)(remote_handle64 handle, int output_idx,
+                                                 unsigned char* output_meta, int meta_size,
+                                                 unsigned char* output_value, int value_size) {
+  if (!TheModel) {
+    // No model created.
+    return AEE_EBADSTATE;
+  }
+  if (meta_size < 0 || value_size < 0) {
+    return AEE_EBADPARM;
+  }
+  if ((output_meta == nullptr && meta_size != 0) || (output_value == nullptr && value_size != 0)) {
+    // If the pointer is null, the size must be 0.
+    return AEE_EBADPARM;
+  }
+
+  tvm::runtime::PackedFunc get_output = get_module_func(TheModel->graph_executor, "get_output");
+  tvm::runtime::NDArray output = get_output(output_idx);
+
+  if (meta_size != 0) {
+    auto* meta = reinterpret_cast<tensor_meta*>(output_meta);
+    if (meta_size < meta->meta_size(output->ndim)) {
+      return error_too_small(__func__, "meta_size", meta_size, meta->meta_size(output->ndim));
+    }
+
+    meta->ndim = output->ndim;
+    meta->dtype = output->dtype;
+    std::copy(&output->shape[0], &output->shape[output->ndim], meta->shape);
+  }
+
+  if (value_size != 0) {
+    size_t data_size = tvm::runtime::GetDataSize(*output.operator->());
+    if (value_size < data_size) {
+      return error_too_small(__func__, "value_size", value_size, data_size);
+    }
+
+    auto data = reinterpret_cast<decltype(output_value)>(output->data);
+    std::copy(data, data + data_size, output_value);
+  }
+
+  return AEE_SUCCESS;
+}
+
+AEEResult __QAIC_HEADER(launcher_rpc_run)(remote_handle64 handle, uint64_t* pcycles,
+                                          uint64_t* usecs) {
+  if (!TheModel) {
+    // No model created.
+    FARF(ERROR, "%s: no model created", __func__);
+    return AEE_EBADSTATE;
+  }
+
+  // Reserve HVX.
+  int res = qurt_hvx_reserve(QURT_HVX_RESERVE_ALL_AVAILABLE);
+  switch (res) {
+    case QURT_HVX_RESERVE_NOT_SUPPORTED:
+    case QURT_HVX_RESERVE_NOT_SUCCESSFUL:
+      FARF(ERROR, "error reserving HVX: %u", res);
+      return AEE_EFAILED;
+    default:
+      break;
+  }
+  // Lock HVX.
+  int lck = qurt_hvx_lock(QURT_HVX_MODE_128B);
+  if (lck != 0) {
+    FARF(ERROR, "error locking HVX: %u", lck);
+    return AEE_EFAILED;
+  }
+
+  uint64_t us_begin = HAP_perf_get_time_us();
+  uint64_t pc_begin = HAP_perf_get_pcycles();
+
+  TheModel->run();
+
+  uint64_t pc_end = HAP_perf_get_pcycles();
+  uint64_t us_end = HAP_perf_get_time_us();
+  *pcycles = pc_end - pc_begin;
+  *usecs = us_end - us_begin;
+
+  // Unlock HVX.
+  int unl = qurt_hvx_unlock();
+  if (unl != 0) {
+    FARF(ERROR, "error unlocking HVX: %u", unl);
+    return AEE_EFAILED;
+  }
+  // Release HVX.
+  int rel = qurt_hvx_cancel_reserve();
+  if (rel != 0) {
+    FARF(ERROR, "error canceling HVX reservation: %u", rel);
+    return AEE_EFAILED;
+  }
+
+  return AEE_SUCCESS;
+}
diff --git a/src/runtime/hexagon/launcher/launcher_main.cc b/src/runtime/hexagon/launcher/launcher_main.cc
new file mode 100644
index 000000000000..ac21a7be1636
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_main.cc
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include <getopt.h>
+
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <utility>
+
+#include "launcher_core.h"
+#include "launcher_util.h"
+
+ExecutionSession* create_execution_session();
+
+int parse_command_line(int argc, char* argv[], std::string* in_path, std::string* out_path) {
+  static option long_options[] = {
+      {"in_config", required_argument, nullptr, 0},
+      {"out_config", required_argument, nullptr, 0},
+  };
+
+  bool show_usage = false;
+  int opt, long_index = 0;
+  while ((opt = getopt_long(argc, argv, "i:o:u:", long_options, &long_index)) != -1) {
+    if (opt != 0) {
+      show_usage = true;
+      continue;
+    }
+    switch (long_index) {
+      case 0:
+        *in_path = std::string(optarg);
+        break;
+      case 1:
+        *out_path = std::string(optarg);
+        break;
+    }
+  }
+  if (in_path->empty() || out_path->empty() || show_usage) {
+    std::cout << "Usage: " << argv[0] << " --" << long_options[0].name << " input.json --"
+              << long_options[1].name << " output.json\n";
+    return 1;
+  }
+  return 0;
+}
+
+int main(int argc, char* argv[]) {
+  std::string in_path, out_path;
+  if (parse_command_line(argc, argv, &in_path, &out_path) != 0) {
+    return 1;
+  }
+
+  ModelConfig config;
+  if (!read_model_config(in_path, &config)) {
+    return 1;
+  }
+
+  ExecutionSession* session_ptr = create_execution_session();
+  if (session_ptr == nullptr) {
+    return 1;
+  }
+  ExecutionSession& session = *session_ptr;
+
+  std::cout << "loading model files: " << config.model_json << ", " << config.model_library << '\n';
+  std::string json = load_text_file(config.model_json);
+  if (!session.load_model(config.model_library, json.c_str())) {
+    return 1;
+  }
+
+  int max_ndim = 0;
+  for (const TensorConfig& tc : config.inputs) {
+    max_ndim = std::max<int>(max_ndim, tc.shape.size());
+  }
+  auto* input_meta = session.alloc<tensor_meta>(tensor_meta::meta_size(max_ndim));
+
+  for (int i = 0, e = config.inputs.size(); i != e; ++i) {
+    const TensorConfig& tc = config.inputs[i];
+    input_meta->ndim = tc.shape.size();
+    input_meta->dtype = tvm::runtime::String2DLDataType(tc.dtype);
+    std::copy(tc.shape.begin(), tc.shape.end(), input_meta->shape);
+
+    auto* input_data = session.alloc<unsigned char>(input_meta->data_size());
+    std::cout << "loading input file #" << i << ": " << tc.file_name << '\n';
+    load_binary_file(tc.file_name, input_data, input_meta->data_size());
+    if (!session.set_input(i, input_meta, input_data)) {
+      return 1;
+    }
+  }
+
+  OutputConfig output_config;
+
+  std::cout << "running..." << std::flush;
+  if (!session.run(&output_config.pcycles, &output_config.usecs)) {
+    std::cout << '\n';
+    return 1;
+  }
+  std::cout << '\n';
+  std::cout << "Finished in " << output_config.pcycles << " pcycles, (" << output_config.usecs
+            << "us)\n";
+
+  auto* output_meta = session.alloc<tensor_meta>(128);
+  int num_outputs = 0;
+  if (!session.get_num_outputs(&num_outputs)) {
+    return 1;
+  }
+
+  for (int i = 0; i != num_outputs; ++i) {
+    if (!session.get_output(i, output_meta, 128, nullptr, 0)) {
+      return 1;
+    }
+    int data_size = output_meta->data_size();
+    auto* output_data = session.alloc<unsigned char>(data_size);
+    if (!session.get_output(i, output_meta, 128, output_data, data_size)) {
+      return 1;
+    }
+
+    TensorConfig oc;
+    oc.file_name = "output" + std::to_string(i) + ".dat";
+    for (int i = 0, e = output_meta->ndim; i != e; ++i) {
+      oc.shape.push_back(output_meta->shape[i]);
+    }
+    oc.dtype = tvm::runtime::DLDataType2String(output_meta->dtype);
+    write_binary_file(oc.file_name, output_data, data_size);
+    output_config.outputs.push_back(std::move(oc));
+
+    session.free(output_data);
+  }
+
+  if (!write_output_config(out_path, &output_config)) {
+    return 1;
+  }
+  return 0;
+}
diff --git a/src/runtime/hexagon/launcher/launcher_rpc.idl b/src/runtime/hexagon/launcher/launcher_rpc.idl
new file mode 100644
index 000000000000..6677108a76f0
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_rpc.idl
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "remote.idl"
+#include "AEEStdDef.idl"
+
+typedef sequence<octet> buffer;
+
+interface launcher_rpc : remote_handle64 {
+  AEEResult load(in string module_path, in string model_json);
+  AEEResult unload();
+  AEEResult get_num_inputs(rout long num_inputs);
+  AEEResult set_input(in long input_idx, in buffer input_meta, in buffer input_value);
+  AEEResult get_num_outputs(rout long num_outputs);
+  AEEResult get_output(in long output_idx, rout buffer output_meta, rout buffer output_value);
+  AEEResult run(rout uint64_t pcycles, rout uint64_t usecs);
+};
diff --git a/src/runtime/hexagon/launcher/launcher_util.cc b/src/runtime/hexagon/launcher/launcher_util.cc
new file mode 100644
index 000000000000..9c565167142b
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_util.cc
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "launcher_util.h"
+
+#include <tvm/runtime/logging.h>
+
+#include <algorithm>
+#include <fstream>
+#include <ios>
+#include <iostream>
+#include <string>
+#include <utility>
+
+size_t get_file_size(std::ifstream& in_file) {
+  std::ifstream::pos_type pos = in_file.tellg();
+  size_t size = in_file.seekg(0, std::ios::end).tellg();
+  in_file.seekg(pos, std::ios::beg);
+  return size;
+}
+
+size_t get_file_size(std::ifstream&& in_file) {
+  return get_file_size(in_file);  // calls the & version
+}
+
+std::string load_text_file(const std::string& file_name) {
+  constexpr size_t block_size = 1024 * 1024;  // 1MB
+  std::ifstream in_file(file_name);
+  ICHECK(in_file.is_open()) << "cannot open file " << file_name;
+  size_t file_size = get_file_size(in_file);
+  std::string buffer(file_size + 1, 0);
+
+  in_file.read(&buffer[0], file_size);
+  return std::move(buffer);
+}
+
+void* load_binary_file(const std::string& file_name, void* buffer, size_t buffer_size) {
+  std::ifstream in_file(file_name);
+  ICHECK(in_file.is_open()) << "cannot open file " << file_name;
+  size_t file_size = get_file_size(in_file);
+
+  in_file.read(reinterpret_cast<std::ifstream::char_type*>(buffer),
+               std::min(buffer_size, file_size));
+  return buffer;
+}
+
+void write_binary_file(const std::string& file_name, void* buffer, size_t buffer_size) {
+  std::ofstream out_file(file_name);
+  ICHECK(out_file.is_open()) << "cannot open file " << file_name;
+
+  out_file.write(reinterpret_cast<std::ofstream::char_type*>(buffer), buffer_size);
+}
diff --git a/src/runtime/hexagon/launcher/launcher_util.h b/src/runtime/hexagon/launcher/launcher_util.h
new file mode 100644
index 000000000000..13db89d052fb
--- /dev/null
+++ b/src/runtime/hexagon/launcher/launcher_util.h
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_UTIL_H_
+#define TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_UTIL_H_
+
+#include <cstddef>
+#include <fstream>
+#include <string>
+
+size_t get_file_size(std::ifstream& in_file);
+size_t get_file_size(std::ifstream&& in_file);
+
+std::string load_text_file(const std::string& file_name);
+void* load_binary_file(const std::string& file_name, void* buffer, size_t buffer_size);
+void write_binary_file(const std::string& file_name, void* buffer, size_t buffer_size);
+
+#endif  // TVM_RUNTIME_HEXAGON_LAUNCHER_LAUNCHER_UTIL_H_