From fea3043345b5492a406297ed93ea3b9d9ca4aea5 Mon Sep 17 00:00:00 2001 From: Hansong Zhang Date: Thu, 28 Mar 2024 17:04:48 -0700 Subject: [PATCH] [Android] Use threadpool --- extension/android/CMakeLists.txt | 14 +++++++++++++- extension/android/jni/jni_layer_llama.cpp | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt index 40e5a7c002..6bfd3d803e 100644 --- a/extension/android/CMakeLists.txt +++ b/extension/android/CMakeLists.txt @@ -66,7 +66,19 @@ if(EXECUTORCH_BUILD_LLAMA_JNI) set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${CUSTOM_OPS_PATH}) target_link_options_shared_lib(custom_ops_lib) - add_library(executorch_llama_jni SHARED jni/jni_layer_llama.cpp) + if(TARGET pthreadpool) + set(LLAMA_JNI_SRCS jni/jni_layer_llama.cpp ../../backends/xnnpack/threadpool/cpuinfo_utils.cpp) + else() + set(LLAMA_JNI_SRCS jni/jni_layer_llama.cpp) + endif() + add_library(executorch_llama_jni SHARED ${LLAMA_JNI_SRCS}) + if(TARGET pthreadpool) + target_compile_definitions(executorch_llama_jni PRIVATE ET_USE_THREADPOOL=1) + target_include_directories(executorch_llama_jni PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../backends/xnnpack/third-party/cpuinfo/include) + target_include_directories(executorch_llama_jni PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/../../backends/xnnpack/third-party/pthreadpool/include) + endif() target_include_directories(executorch_llama_jni PRIVATE ${_common_include_directories}) target_link_libraries(executorch_llama_jni ${link_libraries} llama_runner custom_ops custom_ops_lib cpublas eigen_blas) diff --git a/extension/android/jni/jni_layer_llama.cpp b/extension/android/jni/jni_layer_llama.cpp index b570db8d51..9790c7c950 100644 --- a/extension/android/jni/jni_layer_llama.cpp +++ b/extension/android/jni/jni_layer_llama.cpp @@ -20,6 +20,11 @@ #include #include +#if defined(ET_USE_THREADPOOL) +#include +#include +#endif + #include #include @@ -91,6 +96,17 @@ class ExecuTorchLlamaJni facebook::jni::alias_ref model_path, facebook::jni::alias_ref tokenizer_path, jfloat temperature) { +#if defined(ET_USE_THREADPOOL) + // Reserve 1 thread for the main thread. + uint32_t num_performant_cores = + torch::executorch::cpuinfo::get_num_performant_cores() - 1; + if (num_performant_cores > 0) { + ET_LOG(Info, "Resetting threadpool to %d threads", num_performant_cores); + torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool( + num_performant_cores); + } +#endif + runner_ = std::make_unique( model_path->toStdString().c_str(), tokenizer_path->toStdString().c_str(),