Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Android demo app tutorial fix for XNNPACK and QNN #2962

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 45 additions & 42 deletions examples/demo-apps/android/ExecuTorchDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ This guide explains how to setup ExecuTorch for Android using a demo app. The ap
* Refer to [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) to set up the repo and dev environment.
* Download and install [Android Studio and SDK](https://developer.android.com/studio).
* Supported Host OS: CentOS, macOS Ventura (M1/x86_64). See below for Qualcomm HTP specific requirements.
* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](build-run-qualcomm-ai-engine-direct-backend.md) for hardware and software pre-requisites.
* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](build-run-qualcomm-ai-engine-direct-backend.md) for hardware and software pre-requisites. The version we use for this tutorial is 2.19. The chip we use for this tutorial is SM8450.
:::
::::

Expand All @@ -39,7 +39,7 @@ We generate the model file for the ExecuTorch runtime in Android Demo App.
For delegating DeepLab v3 to XNNPACK backend, please do the following to export the model:

```bash
export FLATC_EXECUTABLE=$(realpath third-party/flatbuffers/cmake-out/flatc)
export FLATC_EXECUTABLE=$(realpath third-party/flatbuffers/cmake-android-out/flatc)
python3 -m examples.xnnpack.aot_compiler --model_name="dl3" --delegate
mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
cp dl3_xnnpack_fp32.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
Expand All @@ -54,7 +54,7 @@ For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](build-
After generating the model, copy the model to `assets` directory.

```bash
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 -s <adb_connected_device_serial>
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8450 -s <adb_connected_device_serial>
cp deeplab_v3/dlv3_qnn.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
```

Expand All @@ -68,22 +68,20 @@ We build the required ExecuTorch runtime library to run the model.

```bash
export ANDROID_NDK=<path-to-android-ndk>
export BUCK2=/tmp/buck2 # Or your buck path
export ANDROID_ABI=arm64-v8a

rm -rf cmake-out && mkdir cmake-out && cd cmake-out
rm -rf cmake-android-out && mkdir cmake-android-out

# Build the core executorch library
cmake .. -DCMAKE_INSTALL_PREFIX=cmake-out \
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DBUCK2="${BUCK2}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DFLATC_EXECUTABLE="${FLATC}" \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-android-out

cmake --build . -j16 --target install
cmake --build cmake-android-out -j16 --target install
```

When we set `EXECUTORCH_BUILD_XNNPACK=ON`, we will build the target [`xnnpack_backend`](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt) which in turn is linked into libexecutorch_jni via [CMake](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/jni/CMakeLists.txt).
Expand All @@ -93,60 +91,63 @@ When we set `EXECUTORCH_BUILD_XNNPACK=ON`, we will build the target [`xnnpack_ba
```bash

# Build the android extension
cmake ../extension/android -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-Bextension/android
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-Bcmake-android-out/extension/android

cmake --build ./extension/android -j16
cmake --build cmake-android-out/extension/android -j16
```

`libexecutorch_jni.so` wraps up the required XNNPACK Backend runtime library from `xnnpack_backend`, and adds an additional JNI layer using fbjni. This is later exposed to Java app.

#### Qualcomm Hexagon NPU

1. Configure the CMake target for the library with Qualcomm Hexagon NPU (HTP) backend (XNNPACK also included):
1. Build the CMake target for the library with Qualcomm Hexagon NPU (HTP) backend (XNNPACK also included):

```bash
export ANDROID_NDK=<path-to-android-ndk>
export QNN_SDK=<path-to-qnn-sdk>

rm -rf cmake-out && mkdir cmake-out && cd cmake-out
cmake .. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DBUCK2=/tmp/buck2 \
-DEXECUTORCH_BUILD_ANDROID_JNI=ON \
export ANDROID_ABI=arm64-v8a
export QNN_SDK_ROOT=<path-to-qnn-sdk>

rm -rf cmake-android-out && mkdir cmake-android-out && cd cmake-android-out
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_QNN=ON \
-DQNN_SDK_ROOT=$QNN_SDK \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-android-out

cmake --build cmake-android-out -j16 --target install
```
Similar to the XNNPACK library, with this setup, we compile `libexecutorch_jni.so` but it adds an additional static library `qnn_executorch_backend` which wraps up Qualcomm HTP runtime library and registers the Qualcomm HTP backend. This is later exposed to Java app.

`qnn_executorch_backend` is built when we turn on CMake option `EXECUTORCH_BUILD_QNN`. It will include the [CMakeLists.txt](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/CMakeLists.txt) from backends/qualcomm where we `add_library(qnn_executorch_backend STATIC)`.

2. Build the libraries:
2. Build the Android extension:

```bash
cmake --build . -j16
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-Bcmake-android-out/extension/android

cmake --build cmake-android-out/extension/android -j16
```

## Deploying on Device via Demo App

### Steps for Deploying Model via XNNPACK

```bash
mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
```

Copy the core libraries:

```bash
cp ./examples/demo-apps/android/jni/libexecutorch_jni.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
cp cmake-android-out/extension/android/libexecutorch_jni.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
```

This allows the Android app to load ExecuTorch runtime with XNNPACK backend as a JNI library. Later, this shared library will be loaded by `NativePeer.java` in Java code.
Expand All @@ -160,15 +161,17 @@ mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64
We need to push some additional Qualcomm HTP backend libraries to the app. Please refer to [Qualcomm docs](build-run-qualcomm-ai-engine-direct-backend.md) here.

```bash
cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpStub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
```

Copy the core libraries:

```bash
cp ./examples/demo-apps/android/jni/libexecutorch_jni.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
cp cmake-android-out/extension/android/libexecutorch_jni.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
cp cmake-android-out/lib/libqnn_executorch_backend.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libqnn_executorch_backend.so
```

## Running the App
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,12 @@ dependencies {
debugImplementation("androidx.compose.ui:ui-tooling")
debugImplementation("androidx.compose.ui:ui-test-manifest")
}

tasks.register("setup") {
doFirst {
exec {
commandLine("sh", "examples/demo-apps/android/LlamaDemo/setup.sh")
workingDir("../../../../../")
}
}
}
28 changes: 14 additions & 14 deletions examples/demo-apps/android/ExecuTorchDemo/setup.sh
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# Copyright 2023-2024 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -eu

# Note: Set up ANDROID_NDK, ANDROID_ABI, BUCK2, and FLATC
cmake . -DCMAKE_INSTALL_PREFIX=cmake-out \
CMAKE_OUT="${CMAKE_OUT:-cmake-out-android}"
# Note: Set up ANDROID_NDK and ANDROID_ABI
cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DBUCK2="${BUCK2}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DFLATC_EXECUTABLE="${FLATC}" \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-out
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"

if [ "$(uname)" == "Darwin" ]; then
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
else
CMAKE_JOBS=$(( $(nproc) - 1 ))
fi
cmake --build cmake-out -j "${CMAKE_JOBS}" --target install
cmake --build "${CMAKE_OUT}" -j "${CMAKE_JOBS}" --target install --config Release

cmake extension/android -DBUCK2="${BUCK2}" \
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-Bcmake-out/extension/android
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

cmake --build cmake-out/extension/android -j "${CMAKE_JOBS}"
cmake --build "${CMAKE_OUT}"/extension/android -j "${CMAKE_JOBS}" --config Release

JNI_LIBS_PATH="examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs"
mkdir -p "${JNI_LIBS_PATH}/${ANDROID_ABI}"
cp cmake-out/extension/android/libexecutorch_jni.so "${JNI_LIBS_PATH}/${ANDROID_ABI}/libexecutorch.so"
cp "${CMAKE_OUT}"/extension/android/libexecutorch_jni.so "${JNI_LIBS_PATH}/${ANDROID_ABI}/"
7 changes: 1 addition & 6 deletions extension/android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,9 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
add_library(llama_runner STATIC IMPORTED)
set_property(TARGET llama_runner PROPERTY IMPORTED_LOCATION ${LLAMA_RUNNER_PATH})

set(CUSTOM_OPS_LIB_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama2/custom_ops/libcustom_ops_lib.a)
add_library(custom_ops_lib STATIC IMPORTED)
set_property(TARGET custom_ops_lib PROPERTY IMPORTED_LOCATION ${CUSTOM_OPS_LIB_PATH})

set(CUSTOM_OPS_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama2/custom_ops/libcustom_ops.a)
add_library(custom_ops STATIC IMPORTED)
set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${CUSTOM_OPS_PATH})
target_link_options_shared_lib(custom_ops_lib)

if(TARGET pthreadpool)
set(LLAMA_JNI_SRCS jni/jni_layer_llama.cpp ../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
Expand All @@ -82,6 +77,6 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
endif()
target_include_directories(executorch_llama_jni PRIVATE ${_common_include_directories})
target_link_libraries(executorch_llama_jni ${link_libraries} llama_runner
custom_ops custom_ops_lib cpublas eigen_blas)
custom_ops cpublas eigen_blas)
target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
endif()
Loading