Skip to content

Commit

Permalink
Add out-variants to support ET export
Browse files Browse the repository at this point in the history
Differential Revision: D62385428

Pull Request resolved: #859
  • Loading branch information
metascroy committed Sep 16, 2024
1 parent a584e24 commit b2e1d49
Show file tree
Hide file tree
Showing 25 changed files with 743 additions and 387 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@
// LICENSE file in the root directory of this source tree.

#pragma once
#include <stdint.h>
#include <torchao/experimental/kernels/cpu/macro.h>
#include <torchao/experimental/kernels/cpu/parallel.h>
#include <algorithm>
#include <cassert>
#include <cstdlib>

namespace torchao::operators::cpu::linear::
channelwise_8bit_activation_groupwise_lowbit_weight {

PackWeightDataTilingParams get_default_pack_weight_data_tiling_params(
inline PackWeightDataTilingParams get_default_pack_weight_data_tiling_params(
const UKernelConfig& ukernel_config,
int n,
int target_panels_per_thread) {
Expand All @@ -38,7 +40,7 @@ PackWeightDataTilingParams get_default_pack_weight_data_tiling_params(
return tiling_params;
}

void pack_weight_data_operator(
inline void pack_weight_data_operator(
const UKernelConfig& ukernel_config,
const PackWeightDataTilingParams& tiling_params,
// Outputs
Expand Down Expand Up @@ -79,7 +81,7 @@ void pack_weight_data_operator(
}

// This default mimics XNNPACK behavior if target_tiles_per_thread = 5
LinearTilingParams get_default_linear_tiling_params(
inline LinearTilingParams get_default_linear_tiling_params(
const UKernelConfig& ukernel_config,
int m,
int n,
Expand Down Expand Up @@ -137,12 +139,12 @@ get_activation_data_buffer_size_with_tile_schedule_policy_parallel_mc_parallel_n
return ukernel_config.activation_data_size_fn(m, k, group_size);
}

void linear_operator_with_tile_schedule_policy_single_mc_parallel_nc(
inline void linear_operator_with_tile_schedule_policy_single_mc_parallel_nc(
const UKernelConfig& ukernel_config,
const LinearTilingParams& tiling_params,
char* activation_data_buffer,
// Outputs
float32_t* output,
float* output,
// Inputs
int m,
int n,
Expand Down Expand Up @@ -199,12 +201,12 @@ void linear_operator_with_tile_schedule_policy_single_mc_parallel_nc(
}
}

void linear_operator_with_tile_schedule_policy_parallel_mc_parallel_nc(
inline void linear_operator_with_tile_schedule_policy_parallel_mc_parallel_nc(
const UKernelConfig& ukernel_config,
const LinearTilingParams& tiling_params,
char* activation_data_buffer,
// Outputs
float32_t* output,
float* output,
// Inputs
int m,
int n,
Expand Down Expand Up @@ -271,7 +273,7 @@ void linear_operator_with_tile_schedule_policy_parallel_mc_parallel_nc(
}
} // namespace internal

void linear_operator(
inline void linear_operator(
const UKernelConfig& ukernel_config,
const LinearTilingParams& tiling_params,
LinearTileSchedulingPolicy scheduling_policy,
Expand Down Expand Up @@ -363,7 +365,7 @@ namespace torchao::operators::cpu::linear::
channelwise_8bit_activation_groupwise_lowbit_weight {
template <int weight_nbit, bool has_weight_zeros, bool has_bias, bool has_clamp>

UKernelConfig get_ukernel_config() {
inline UKernelConfig get_ukernel_config() {
UKernelConfig config;

namespace ukernel = torchao::kernels::cpu::aarch64::linear::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
// LICENSE file in the root directory of this source tree.

#pragma once
#include <stdint.h>

// TODO: maybe move to operator directory
namespace torchao::operators::cpu::linear::
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,41 @@ include_directories(${TORCHAO_LIBRARIES})

add_subdirectory(${TORCHAO_LIBRARIES}/torchao/experimental/kernels/cpu/aarch64 ${CMAKE_CURRENT_BINARY_DIR}/kernel_aarch64)

find_package(Torch REQUIRED)
include_directories("${TORCH_INCLUDE_DIRS}")
include(${TORCHAO_LIBRARIES}/torchao/experimental/kernels/cpu/Utils.cmake)

add_library(torch_custom_op SHARED torch_custom_op.cpp)
target_link_libraries(torch_custom_op PRIVATE "${TORCH_LIBRARIES}")
target_link_libraries(torch_custom_op PRIVATE kernel_aarch64)
set(PLATFORM "ATEN" CACHE STRING "Choose platform surface: ATEN, EXECUTORCH")
string(TOUPPER ${PLATFORM} PLATFORM_TO_UPPER)

include(${TORCHAO_LIBRARIES}/torchao/experimental/kernels/cpu/Utils.cmake)
set(TORCHAO_PARALLEL_BACKEND "ATEN_OPENMP" CACHE STRING "Choose parallel backend to use for torchao parallelism (aten_openmp, openmp, pthreadpool, single_threaded)")
target_link_torchao_parallel_backend(torch_custom_op "${TORCHAO_PARALLEL_BACKEND}")
if(PLATFORM_TO_UPPER STREQUAL "ATEN")
message(STATUS "Building with PLATFORM=ATEN")

find_package(Torch REQUIRED)
add_library(lowbit_op_aten SHARED lowbit_op_aten.cpp)
target_link_libraries(lowbit_op_aten PRIVATE kernel_aarch64)
target_include_directories(lowbit_op_aten PRIVATE "${TORCH_INCLUDE_DIRS}")
target_link_libraries(lowbit_op_aten PRIVATE "${TORCH_LIBRARIES}")
target_compile_definitions(lowbit_op_aten PRIVATE USE_ATEN=1)
target_link_torchao_parallel_backend(lowbit_op_aten "ATEN_OPENMP")

elseif(PLATFORM_TO_UPPER STREQUAL "EXECUTORCH")
message(STATUS "Building with PLATFORM=EXECUTORCH")

add_library(lowbit_op_executorch SHARED
lowbit_op_executorch/w2s.cpp
lowbit_op_executorch/w2sz.cpp
lowbit_op_executorch/w3s.cpp
lowbit_op_executorch/w3sz.cpp
lowbit_op_executorch/w4s.cpp
lowbit_op_executorch/w4sz.cpp
lowbit_op_executorch/w5s.cpp
lowbit_op_executorch/w5sz.cpp
)
target_include_directories(lowbit_op_executorch PRIVATE ${EXECUTORCH_INCLUDE_DIRS})
target_compile_definitions(lowbit_op_executorch PRIVATE USE_EXECUTORCH=1)
target_link_torchao_parallel_backend(lowbit_op_executorch "SINGLE_THREADED")
target_link_libraries(lowbit_op_executorch PRIVATE ${EXECUTORCH_LIBRARIES})
target_link_libraries(lowbit_op_executorch PRIVATE kernel_aarch64)

else()
message(FATAL_ERROR "Unknown PLATFORM: ${PLATFORM}. Please choose one of: ATEN, EXECUTORCH.")
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ echo "CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}"
export CMAKE_OUT=/tmp/cmake-out/torch_ao/examples/torch_custom_op
cmake -DTORCHAO_LIBRARIES=${TORCHAO_LIBRARIES} \
-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH} \
-DTORCHAO_PARALLEL_BACKEND="aten_openmp" \
-DPLATFORM="ATEN" \
-S ${TORCHAO_LIBRARIES}/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op \
-B ${CMAKE_OUT}
cmake --build ${CMAKE_OUT}
Loading

0 comments on commit b2e1d49

Please sign in to comment.