Skip to content

Commit

Permalink
Android demo app tutorial fix for XNNPACK and QNN (#2962) (#3027)
Browse files Browse the repository at this point in the history
Summary:
* Update tutorial due to recent changes.
* Clean up setup.sh for app helper lib build.

Pull Request resolved: #2962

Reviewed By: cccclai

Differential Revision: D55951189

Pulled By: kirklandsign

fbshipit-source-id: 2c95e8580145b039f503e7cd99a4003867f8dbb0
(cherry picked from commit 26365f1)
  • Loading branch information
kirklandsign authored Apr 17, 2024
1 parent 212e91f commit 925f674
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 62 deletions.
87 changes: 45 additions & 42 deletions examples/demo-apps/android/ExecuTorchDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ This guide explains how to setup ExecuTorch for Android using a demo app. The ap
* Refer to [Setting up ExecuTorch](https://pytorch.org/executorch/stable/getting-started-setup) to set up the repo and dev environment.
* Download and install [Android Studio and SDK](https://developer.android.com/studio).
* Supported Host OS: CentOS, macOS Ventura (M1/x86_64). See below for Qualcomm HTP specific requirements.
* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](build-run-qualcomm-ai-engine-direct-backend.md) for hardware and software pre-requisites.
* *Qualcomm HTP Only[^1]:* To build and run on Qualcomm's AI Engine Direct, please follow [Building and Running ExecuTorch with Qualcomm AI Engine Direct Backend](build-run-qualcomm-ai-engine-direct-backend.md) for hardware and software pre-requisites. The version we use for this tutorial is 2.19. The chip we use for this tutorial is SM8450.
:::
::::

Expand All @@ -39,7 +39,7 @@ We generate the model file for the ExecuTorch runtime in Android Demo App.
For delegating DeepLab v3 to XNNPACK backend, please do the following to export the model:

```bash
export FLATC_EXECUTABLE=$(realpath third-party/flatbuffers/cmake-out/flatc)
export FLATC_EXECUTABLE=$(realpath third-party/flatbuffers/cmake-android-out/flatc)
python3 -m examples.xnnpack.aot_compiler --model_name="dl3" --delegate
mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
cp dl3_xnnpack_fp32.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
Expand All @@ -54,7 +54,7 @@ For delegating to Qualcomm Hexagon NPU, please follow the tutorial [here](build-
After generating the model, copy the model to `assets` directory.

```bash
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8550 -s <adb_connected_device_serial>
python -m examples.qualcomm.scripts.deeplab_v3 -b build_android -m SM8450 -s <adb_connected_device_serial>
cp deeplab_v3/dlv3_qnn.pte examples/demo-apps/android/ExecuTorchDemo/app/src/main/assets/
```

Expand All @@ -68,22 +68,20 @@ We build the required ExecuTorch runtime library to run the model.

```bash
export ANDROID_NDK=<path-to-android-ndk>
export BUCK2=/tmp/buck2 # Or your buck path
export ANDROID_ABI=arm64-v8a

rm -rf cmake-out && mkdir cmake-out && cd cmake-out
rm -rf cmake-android-out && mkdir cmake-android-out

# Build the core executorch library
cmake .. -DCMAKE_INSTALL_PREFIX=cmake-out \
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DBUCK2="${BUCK2}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DFLATC_EXECUTABLE="${FLATC}" \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-android-out

cmake --build . -j16 --target install
cmake --build cmake-android-out -j16 --target install
```

When we set `EXECUTORCH_BUILD_XNNPACK=ON`, we will build the target [`xnnpack_backend`](https://github.com/pytorch/executorch/blob/main/backends/xnnpack/CMakeLists.txt) which in turn is linked into libexecutorch_jni via [CMake](https://github.com/pytorch/executorch/blob/main/examples/demo-apps/android/jni/CMakeLists.txt).
Expand All @@ -93,60 +91,63 @@ When we set `EXECUTORCH_BUILD_XNNPACK=ON`, we will build the target [`xnnpack_ba
```bash

# Build the android extension
cmake ../extension/android -DBUCK2="${BUCK2}" \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-Bextension/android
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-Bcmake-android-out/extension/android

cmake --build ./extension/android -j16
cmake --build cmake-android-out/extension/android -j16
```

`libexecutorch_jni.so` wraps up the required XNNPACK Backend runtime library from `xnnpack_backend`, and adds an additional JNI layer using fbjni. This is later exposed to Java app.

#### Qualcomm Hexagon NPU

1. Configure the CMake target for the library with Qualcomm Hexagon NPU (HTP) backend (XNNPACK also included):
1. Build the CMake target for the library with Qualcomm Hexagon NPU (HTP) backend (XNNPACK also included):

```bash
export ANDROID_NDK=<path-to-android-ndk>
export QNN_SDK=<path-to-qnn-sdk>

rm -rf cmake-out && mkdir cmake-out && cd cmake-out
cmake .. \
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=arm64-v8a \
-DBUCK2=/tmp/buck2 \
-DEXECUTORCH_BUILD_ANDROID_JNI=ON \
export ANDROID_ABI=arm64-v8a
export QNN_SDK_ROOT=<path-to-qnn-sdk>

rm -rf cmake-android-out && mkdir cmake-android-out && cd cmake-android-out
cmake . -DCMAKE_INSTALL_PREFIX=cmake-android-out \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_QNN=ON \
-DQNN_SDK_ROOT=$QNN_SDK \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON
-DQNN_SDK_ROOT="${QNN_SDK_ROOT}" \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-android-out

cmake --build cmake-android-out -j16 --target install
```
Similar to the XNNPACK library, with this setup, we compile `libexecutorch_jni.so` but it adds an additional static library `qnn_executorch_backend` which wraps up Qualcomm HTP runtime library and registers the Qualcomm HTP backend. This is later exposed to Java app.

`qnn_executorch_backend` is built when we turn on CMake option `EXECUTORCH_BUILD_QNN`. It will include the [CMakeLists.txt](https://github.com/pytorch/executorch/blob/main/backends/qualcomm/CMakeLists.txt) from backends/qualcomm where we `add_library(qnn_executorch_backend STATIC)`.

2. Build the libraries:
2. Build the Android extension:

```bash
cmake --build . -j16
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}"/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-android-out \
-Bcmake-android-out/extension/android

cmake --build cmake-android-out/extension/android -j16
```

## Deploying on Device via Demo App

### Steps for Deploying Model via XNNPACK

```bash
mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
```

Copy the core libraries:

```bash
cp ./examples/demo-apps/android/jni/libexecutorch_jni.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
mkdir -p examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
cp cmake-android-out/extension/android/libexecutorch_jni.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
```

This allows the Android app to load ExecuTorch runtime with XNNPACK backend as a JNI library. Later, this shared library will be loaded by `NativePeer.java` in Java code.
Expand All @@ -160,15 +161,17 @@ mkdir -p ../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64
We need to push some additional Qualcomm HTP backend libraries to the app. Please refer to [Qualcomm docs](build-run-qualcomm-ai-engine-direct-backend.md) here.

```bash
cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpStub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
cp ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtp.so ${QNN_SDK_ROOT}/lib/hexagon-v69/unsigned/libQnnHtpV69Skel.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnHtpV69Stub.so ${QNN_SDK_ROOT}/lib/aarch64-android/libQnnSystem.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a
```

Copy the core libraries:

```bash
cp ./examples/demo-apps/android/jni/libexecutorch_jni.so \
../examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
cp cmake-android-out/extension/android/libexecutorch_jni.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libexecutorch.so
cp cmake-android-out/lib/libqnn_executorch_backend.so \
examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs/arm64-v8a/libqnn_executorch_backend.so
```

## Running the App
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,12 @@ dependencies {
debugImplementation("androidx.compose.ui:ui-tooling")
debugImplementation("androidx.compose.ui:ui-test-manifest")
}

tasks.register("setup") {
doFirst {
exec {
commandLine("sh", "examples/demo-apps/android/LlamaDemo/setup.sh")
workingDir("../../../../../")
}
}
}
28 changes: 14 additions & 14 deletions examples/demo-apps/android/ExecuTorchDemo/setup.sh
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
#!/usr/bin/env bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# Copyright 2023-2024 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

set -eu

# Note: Set up ANDROID_NDK, ANDROID_ABI, BUCK2, and FLATC
cmake . -DCMAKE_INSTALL_PREFIX=cmake-out \
CMAKE_OUT="${CMAKE_OUT:-cmake-out-android}"
# Note: Set up ANDROID_NDK and ANDROID_ABI
cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
-DANDROID_ABI="${ANDROID_ABI}" \
-DBUCK2="${BUCK2}" \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_FLATC=OFF \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DFLATC_EXECUTABLE="${FLATC}" \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-Bcmake-out
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"

if [ "$(uname)" == "Darwin" ]; then
CMAKE_JOBS=$(( $(sysctl -n hw.ncpu) - 1 ))
else
CMAKE_JOBS=$(( $(nproc) - 1 ))
fi
cmake --build cmake-out -j "${CMAKE_JOBS}" --target install
cmake --build "${CMAKE_OUT}" -j "${CMAKE_JOBS}" --target install --config Release

cmake extension/android -DBUCK2="${BUCK2}" \
cmake extension/android \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI="${ANDROID_ABI}" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-Bcmake-out/extension/android
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

cmake --build cmake-out/extension/android -j "${CMAKE_JOBS}"
cmake --build "${CMAKE_OUT}"/extension/android -j "${CMAKE_JOBS}" --config Release

JNI_LIBS_PATH="examples/demo-apps/android/ExecuTorchDemo/app/src/main/jniLibs"
mkdir -p "${JNI_LIBS_PATH}/${ANDROID_ABI}"
cp cmake-out/extension/android/libexecutorch_jni.so "${JNI_LIBS_PATH}/${ANDROID_ABI}/libexecutorch.so"
cp "${CMAKE_OUT}"/extension/android/libexecutorch_jni.so "${JNI_LIBS_PATH}/${ANDROID_ABI}/"
7 changes: 1 addition & 6 deletions extension/android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,9 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
add_library(llama_runner STATIC IMPORTED)
set_property(TARGET llama_runner PROPERTY IMPORTED_LOCATION ${LLAMA_RUNNER_PATH})

set(CUSTOM_OPS_LIB_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama2/custom_ops/libcustom_ops_lib.a)
add_library(custom_ops_lib STATIC IMPORTED)
set_property(TARGET custom_ops_lib PROPERTY IMPORTED_LOCATION ${CUSTOM_OPS_LIB_PATH})

set(CUSTOM_OPS_PATH ${CMAKE_CURRENT_BINARY_DIR}/../../examples/models/llama2/custom_ops/libcustom_ops.a)
add_library(custom_ops STATIC IMPORTED)
set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${CUSTOM_OPS_PATH})
target_link_options_shared_lib(custom_ops_lib)

if(TARGET pthreadpool)
set(LLAMA_JNI_SRCS jni/jni_layer_llama.cpp ../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
Expand All @@ -82,6 +77,6 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
endif()
target_include_directories(executorch_llama_jni PRIVATE ${_common_include_directories})
target_link_libraries(executorch_llama_jni ${link_libraries} llama_runner
custom_ops custom_ops_lib cpublas eigen_blas)
custom_ops cpublas eigen_blas)
target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
endif()

0 comments on commit 925f674

Please sign in to comment.