Skip to content

Commit

Permalink
Revert "Use new API to register custom ops for llama model (#2840)" (#…
Browse files Browse the repository at this point in the history
…2912)

Summary:
This reverts commit 020d8be.

Pull Request resolved: #2912

Reviewed By: shoumikhin

Differential Revision: D55852547

Pulled By: larryliu0820

fbshipit-source-id: c8528041c03196239d6daef7e2843ee5cf8a8f3d
  • Loading branch information
larryliu0820 authored and facebook-github-bot committed Apr 7, 2024
1 parent 61ad48d commit 643c628
Show file tree
Hide file tree
Showing 20 changed files with 286 additions and 384 deletions.
41 changes: 11 additions & 30 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,6 @@ if [[ -z "${MODE:-}" ]]; then
exit 1
fi

if [[ "${MODE}" =~ xnnpack.* ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi

if [[ "${MODE}" =~ .*custom.* ]]; then
CUSTOM=ON
else
CUSTOM=OFF
fi

if [[ -z "${BUCK:-}" ]]; then
BUCK=buck2
fi
Expand All @@ -59,39 +47,38 @@ fi

which "${PYTHON_EXECUTABLE}"

CMAKE_PREFIX_PATH=$($PYTHON_EXECUTABLE -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")

cmake_install_executorch_libraries() {
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
rm -rf cmake-out
if [[ "${MODE}" == "xnnpack" ]]; then
XNNPACK=ON
else
XNNPACK=OFF
fi
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out .
cmake --build cmake-out -j9 --target install --config Debug
cmake --build cmake-out -j9 --target install --config Release
}

cmake_build_llama_runner() {
echo "Building llama runner"
dir="examples/models/llama2"
retry cmake -DBUCK2="$BUCK" \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
-DCMAKE_BUILD_TYPE=Debug \
-DEXECUTORCH_BUILD_CUSTOM="$CUSTOM" \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
-Bcmake-out/${dir} \
${dir}
cmake --build cmake-out/${dir} -j9 --config Debug
cmake --build cmake-out/${dir} -j9 --config Release

}

Expand Down Expand Up @@ -126,20 +113,13 @@ else
exit 1
fi

# Install custom ops before exporting
echo "Installing executorch libraries"
cmake_install_executorch_libraries

# Export model.
EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
echo "Exporting ${EXPORTED_MODEL_NAME}"
EXPORT_ARGS="-c stories110M.pt -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME}"
if [[ "${MODE}" == "xnnpack+kv+custom" ]]; then
if [[ "${MODE}" == "xnnpack" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv --use_sdpa_with_kv_cache -X -qmode 8da4w -G 128"
fi
# Add dynamically linked library location
export LD_LIBRARY_PATH=${PWD}/cmake-out/lib
export DYLD_LIBRARY_PATH=${PWD}/cmake-out/lib
$PYTHON_EXECUTABLE -m examples.models.llama2.export_llama ${EXPORT_ARGS}

# Create tokenizer.bin.
Expand All @@ -155,6 +135,7 @@ if [[ "${BUILD_TOOL}" == "buck2" ]]; then
# shellcheck source=/dev/null
$BUCK run examples/models/llama2:main -- ${RUNTIME_ARGS} > result.txt
elif [[ "${BUILD_TOOL}" == "cmake" ]]; then
cmake_install_executorch_libraries
cmake_build_llama_runner
# Run llama runner
NOW=$(date +"%H:%M:%S")
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack+kv+custom]
mode: [portable, xnnpack]
fail-fast: false
with:
runner: linux.2xlarge
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
matrix:
dtype: [fp32]
build-tool: [buck2, cmake]
mode: [portable, xnnpack+kv+custom]
mode: [portable, xnnpack]
fail-fast: false
with:
runner: macos-m1-stable
Expand Down
62 changes: 25 additions & 37 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,8 @@ option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
#
cmake_dependent_option(
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)
cmake_dependent_option(EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library."
ON "NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF)

#
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
Expand All @@ -187,9 +186,6 @@ cmake_dependent_option(EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON

if(EXECUTORCH_BUILD_CPUINFO)
# --- cpuinfo
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CPUINFO_SOURCE_DIR "backends/xnnpack/third-party/cpuinfo")
set(CPUINFO_BUILD_TOOLS
OFF
Expand All @@ -211,15 +207,10 @@ if(EXECUTORCH_BUILD_CPUINFO)
CACHE STRING "")
set(CLOG_SOURCE_DIR "${CPUINFO_SOURCE_DIR}/deps/clog")
add_subdirectory("${CPUINFO_SOURCE_DIR}")
set(CMAKE_POSITION_INDEPENDENT_CODE
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()

if(EXECUTORCH_BUILD_PTHREADPOOL)
# --- pthreadpool
set(ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG
${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(PTHREADPOOL_SOURCE_DIR "backends/xnnpack/third-party/pthreadpool")
set(PTHREADPOOL_BUILD_TESTS
OFF
Expand All @@ -239,8 +230,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
CACHE STRING "")
endif()
add_subdirectory("${PTHREADPOOL_SOURCE_DIR}")
set(CMAKE_POSITION_INDEPENDENT_CODE
${ORIGINAL_CMAKE_POSITION_INDEPENDENT_CODE_FLAG})
endif()

if(NOT PYTHON_EXECUTABLE)
Expand Down Expand Up @@ -515,38 +504,25 @@ if(EXECUTORCH_BUILD_PYBIND)
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/sdk)
endif()

# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")

set(_dep_libs
${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch)

if(EXECUTORCH_BUILD_COREML)
list(APPEND _dep_libs coremldelegate)
set(PYBIND_LINK_COREML "coremldelegate")
endif()

if(EXECUTORCH_BUILD_MPS)
list(APPEND _dep_libs mpsdelegate)
set(PYBIND_LINK_MPS "mpsdelegate")
endif()

if(EXECUTORCH_BUILD_XNNPACK)
# need to explicitly specify XNNPACK here otherwise uses XNNPACK symbols
# from libtorch_cpu
list(APPEND _dep_libs xnnpack_backend XNNPACK)
# need to explicitly specify XNNPACK here
# otherwise uses XNNPACK symbols from libtorch_cpu
set(PYBIND_LINK_XNNPACK xnnpack_backend XNNPACK)
endif()

if(EXECUTORCH_BUILD_CUSTOM)
list(APPEND _dep_libs custom_ops custom_ops_aot_lib)
endif()
# find pytorch lib, to allow pybind to take at::Tensor as input/output
find_package(Torch CONFIG REQUIRED)
find_library(TORCH_PYTHON_LIBRARY torch_python
PATHS "${TORCH_INSTALL_PREFIX}/lib")

# compile options for pybind

set(_pybind_compile_options -Wno-deprecated-declarations -fPIC -frtti
Expand All @@ -568,7 +544,19 @@ if(EXECUTORCH_BUILD_PYBIND)
PUBLIC EXECUTORCH_PYTHON_MODULE_NAME=portable_lib)
target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS})
target_compile_options(portable_lib PUBLIC ${_pybind_compile_options})
target_link_libraries(portable_lib PUBLIC ${_dep_libs})
target_link_libraries(
portable_lib
PUBLIC ${TORCH_PYTHON_LIBRARY}
bundled_program
etdump
executorch
extension_data_loader
portable_ops_lib
util
torch
${PYBIND_LINK_COREML}
${PYBIND_LINK_MPS}
${PYBIND_LINK_XNNPACK})

install(TARGETS portable_lib
LIBRARY DESTINATION executorch/extension/pybindings)
Expand Down
66 changes: 23 additions & 43 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,72 +49,56 @@ set(_common_compile_options -Wno-deprecated-declarations -fPIC)
# Let files say "include <executorch/path/to/header.h>".
set(_common_include_directories ${EXECUTORCH_ROOT}/..)

# For some reason android build is not able to find where gflags is and hence
# cannot find corresponding .cmake file
# For some reason android build is not able to find where gflags is
# and hence cannot find corresponding .cmake file
set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
find_package(gflags REQUIRED)

#
# llama_main: test binary to run llama, with tokenizer and sampler integrated
#
add_executable(llama_main main.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp)
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

# find `executorch` libraries Same as for gflags
# find `executorch` libraries
# Same as for gflags
set(executorch_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../lib/cmake/ExecuTorch)
find_package(executorch CONFIG REQUIRED)
if(CMAKE_TOOLCHAIN_IOS OR ANDROID)
target_link_options_shared_lib(executorch)
endif()

# custom ops library
if(EXECUTORCH_BUILD_CUSTOM)
add_subdirectory(custom_ops)
endif()
add_subdirectory(custom_ops)

# llama_runner library
add_subdirectory(runner)

target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include)
target_include_directories(llama_main PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include)

set(link_libraries)
set(_srcs main.cpp)

if(EXECUTORCH_BUILD_OPTIMIZED)
list(
APPEND
link_libraries
optimized_native_cpu_ops_lib
optimized_kernels
portable_kernels
cpublas
eigen_blas)
list(APPEND link_libraries optimized_native_cpu_ops_lib optimized_kernels
portable_kernels cpublas eigen_blas)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
else()
list(APPEND link_libraries portable_ops_lib portable_kernels)
target_link_options_shared_lib(portable_ops_lib)
endif()

if(EXECUTORCH_BUILD_CUSTOM)
target_link_options_shared_lib(custom_ops)
list(APPEND link_libraries custom_ops)
endif()
target_link_libraries(llama_main PUBLIC gflags llama_runner custom_ops_lib)

# XNNPACK pthreadpool cpuinfo
if(TARGET xnnpack_backend)
set(xnnpack_backend_libs xnnpack_backend XNNPACK pthreadpool cpuinfo)
list(APPEND link_libraries ${xnnpack_backend_libs})
# HACK: main only include these when xnnpack backend is availabe, so that we
# have all the threadpool sources under xnnpack.
list(APPEND _common_compile_options -DET_USE_THREADPOOL)
list(
APPEND
_srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/threadpool/cpuinfo_utils.cpp
)
list(
APPEND
_common_include_directories
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/cpuinfo/include
${CMAKE_CURRENT_SOURCE_DIR}/../../../backends/xnnpack/third-party/pthreadpool/include
)
# end of hack
target_link_options_shared_lib(xnnpack_backend)
endif()

Expand All @@ -130,19 +114,15 @@ if(TARGET qnn_executorch_backend)
target_link_options_shared_lib(qnn_executorch_backend)
endif()

# This one is needed for cpuinfo where it uses android specific log lib
# This one is needed for cpuinfo where it uses android
# specific log lib
if(ANDROID)
list(APPEND link_libraries log)
endif()

add_executable(llama_main ${_srcs})
if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
target_link_options(llama_main PRIVATE "LINKER:--gc-sections")
endif()

target_include_directories(llama_main PUBLIC ${_common_include_directories})
target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
target_compile_options(llama_main PUBLIC ${_common_compile_options})
target_compile_options(llama_main PUBLIC ${_common_compile_options}
-DET_USE_THREADPOOL)
target_link_libraries(llama_main PUBLIC ${link_libraries})

if(APPLE)
target_link_options_shared_lib(executorch)
Expand Down
3 changes: 1 addition & 2 deletions examples/models/llama2/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ runtime.python_library(
],
deps = [
"//caffe2:torch",
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_py",
"//executorch/examples/models/llama2/custom_ops:llama_custom_ops_aot_lib",
],
)

Expand Down Expand Up @@ -52,7 +52,6 @@ runtime.python_binary(
main_module = "executorch.examples.models.llama2.export_llama",
# visibility = ["//executorch/examples/..."],
preload_deps = [
"//executorch/examples/models/llama2/custom_ops:custom_ops_aot_lib",
"//executorch/kernels/quantized:aot_lib",
],
deps = [
Expand Down
Loading

0 comments on commit 643c628

Please sign in to comment.