diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index e5f365d3ea..cefc330d3d 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -63,9 +63,6 @@ endif() if(CMAKE_BUILD_TYPE STREQUAL "Release") # strip symbols add_link_options("-s") - # hide dynamic symbols - set(CMAKE_C_VISIBILITY_PRESET hidden) - set(CMAKE_CXX_VISIBILITY_PRESET hidden) # --gc-sections is added by torch. add_compile_options( diff --git a/backends/qualcomm/builders/op_prelu.py b/backends/qualcomm/builders/op_prelu.py index 17e352fd9c..8305b0c965 100644 --- a/backends/qualcomm/builders/op_prelu.py +++ b/backends/qualcomm/builders/op_prelu.py @@ -43,6 +43,10 @@ def define_node( coeff_node = node.args[1] coeff_tensor = torch.zeros(input_node.meta["val"].shape) coeff = get_parameter(coeff_node, self.edge_program) + # param nodes will be FakeTensor when doing partition + # fill in random numeric for validation + if isinstance(coeff, torch._subclasses.fake_tensor.FakeTensor): + coeff = torch.ones(coeff.shape) # per-channel activation if coeff_node.meta["val"].shape[0] > 1: for i in range(input_node.meta["val"].shape[1]): diff --git a/backends/qualcomm/builders/utils.py b/backends/qualcomm/builders/utils.py index 38e3b676d3..ede32a5e65 100755 --- a/backends/qualcomm/builders/utils.py +++ b/backends/qualcomm/builders/utils.py @@ -7,7 +7,14 @@ from typing import Dict, Optional import torch -from torch._export.utils import get_buffer, get_param, is_buffer, is_param +from torch._export.utils import ( + get_buffer, + get_lifted_tensor_constant, + get_param, + is_buffer, + is_lifted_tensor_constant, + is_param, +) def is_parameter( @@ -16,7 +23,7 @@ def is_parameter( return ( is_param(edge_program, node) or is_buffer(edge_program, node) - or node.name in edge_program.graph_signature.inputs_to_lifted_tensor_constants + or is_lifted_tensor_constant(edge_program, node) ) @@ -28,9 +35,8 @@ def get_parameter( param = get_param(edge_program, node) if is_buffer(edge_program, node): param = get_buffer(edge_program, node) - if node.name in edge_program.graph_signature.inputs_to_lifted_tensor_constants: - name = edge_program.graph_signature.inputs_to_lifted_tensor_constants[node.name] - param = edge_program.constants[name] + if is_lifted_tensor_constant(edge_program, node): + param = get_lifted_tensor_constant(edge_program, node) if param is not None: # update node.meta["val"] to qualified QNN datatype (e.g. i64 to i32) assert isinstance(param, torch.Tensor), "Expect parameter to be tensor" diff --git a/backends/qualcomm/passes/recompose_pixel_unshuffle.py b/backends/qualcomm/passes/recompose_pixel_unshuffle.py index 50ce238ce7..57ef9bd077 100644 --- a/backends/qualcomm/passes/recompose_pixel_unshuffle.py +++ b/backends/qualcomm/passes/recompose_pixel_unshuffle.py @@ -24,7 +24,7 @@ def __init__(self, quantization_capture=False): self.quantization_capture = quantization_capture if quantization_capture: - self.reshape_target = torch.ops.aten.reshape.default + self.reshape_target = torch.ops.aten._unsafe_view.default self.permute_target = torch.ops.aten.permute.default self.view_target = torch.ops.aten.view.default self.op = torch.ops.aten.pixel_unshuffle.default diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index 00c4f07a14..3712a83fde 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -29,6 +29,7 @@ CMAKE_X86_64="build_x86_64" BUILD_AARCH64="true" CMAKE_AARCH64="build_android" CLEAN="true" +BUILD_TYPE="Debug" if [ -z PYTHON_EXECUTABLE ]; then PYTHON_EXECUTABLE="python3" @@ -38,7 +39,7 @@ if [ -z BUCK2 ]; then BUCK2="buck2" fi -long_options=skip_x86_64,skip_aarch64,no_clean +long_options=skip_x86_64,skip_aarch64,no_clean,release parsed_args=$(getopt -a --options '' --longoptions $long_options --name "$0" -- "$@") eval set -- "$parsed_args" @@ -49,6 +50,7 @@ while true ; do --skip_x86_64) BUILD_X86_64="false"; shift;; --skip_aarch64) BUILD_AARCH64="false"; shift;; --no_clean) CLEAN="false"; shift;; + --release) BUILD_TYPE="Release"; shift;; --) shift; break;; esac done @@ -66,9 +68,9 @@ if [ "$BUILD_AARCH64" = true ]; then fi cd $BUILD_ROOT - # If we build debug type, we need to change flatcc to flatcc_d cmake .. \ -DCMAKE_INSTALL_PREFIX=$BUILD_ROOT \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DEXECUTORCH_BUILD_QNN=ON \ -DEXECUTORCH_BUILD_SDK=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ @@ -87,6 +89,7 @@ if [ "$BUILD_AARCH64" = true ]; then cmake $PRJ_ROOT/$EXAMPLE_ROOT \ -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DANDROID_ABI='arm64-v8a' \ -DANDROID_NATIVE_API_LEVEL=23 \ -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \ diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index 476532f48d..f31f07562b 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -302,7 +302,7 @@ def get_qdq_module( custom_quant_annotations: Tuple[Callable] = (), quant_dtype: QuantDtype = QuantDtype.use_8a8w, ) -> torch.fx.GraphModule: - m = torch._export.capture_pre_autograd_graph(module, inputs) + m = torch.export.export(module, inputs).module() quantizer = QnnQuantizer() quantizer.add_custom_quant_annotations(custom_quant_annotations) diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index b17c181ffd..cf656b106f 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -58,6 +58,7 @@ from executorch.exir import ExirExportedProgram from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.lowered_backend_module import LoweredBackendModule +from executorch.exir.program._program import _get_updated_graph_signature from torch._decomp import core_aten_decompositions as torch_core_aten_decompositions from torch.export.exported_program import ExportedProgram from torch.fx import passes @@ -223,7 +224,12 @@ def capture_program( core_ep.transform(ConvertBinaryOpsWithScalar()) edge_ep = core_ep.to_edge(qnn_edge_config()) _transform(edge_ep.exported_program) - + # Since QDQ nodes are stripped, update graph signature again to validate program + edge_ep.exported_program._graph_signature = _get_updated_graph_signature( + edge_ep.exported_program.graph_signature, + edge_ep.exported_program.graph_module, + ) + edge_ep.exported_program._validate() return edge_ep diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt index 4849b55afe..94aae08de8 100644 --- a/examples/qualcomm/CMakeLists.txt +++ b/examples/qualcomm/CMakeLists.txt @@ -106,7 +106,9 @@ target_link_libraries( qnn_executor_runner qnn_executorch_backend full_portable_ops_lib etdump ${FLATCCRT_LIB} gflags ) -target_link_options(qnn_executor_runner PUBLIC -fsanitize=undefined) +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + target_link_options(qnn_executor_runner PUBLIC -fsanitize=undefined) +endif() # build llama runner add_executable(qnn_llama_runner ${_qnn_llama_runner__srcs}) diff --git a/examples/qualcomm/llama2/llama.py b/examples/qualcomm/llama2/llama.py index 0ed1c01306..c5214ea272 100644 --- a/examples/qualcomm/llama2/llama.py +++ b/examples/qualcomm/llama2/llama.py @@ -8,8 +8,6 @@ import getpass import json import os -import shutil -import stat import time from multiprocessing.connection import Client @@ -62,7 +60,6 @@ def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None: """ This function is specific for matmul op 16a8w. """ - from typing import Sequence from executorch.backends.qualcomm.quantizer.quantizer import ( get_16a8w_qnn_ptq_config, @@ -294,9 +291,9 @@ def quantize(self, quant_dtype, custom_annotations=()): fx_graph_module = None with torch.no_grad(): - fx_graph_module = torch._export.capture_pre_autograd_graph( + fx_graph_module = torch.export.export( self.llama_model, self.inputs - ) + ).module() fx_graph_module = prepare_pt2e(fx_graph_module, quantizer) print("Quantizing the model...") calibrate( @@ -343,16 +340,6 @@ def lowering_modules( constant_methods=self.llama_meta, compile_config=EdgeCompileConfig(_check_ir_validity=False), ) - - setattr( - edge_prog_mgr.exported_program(), - "_graph_signature", - _get_updated_graph_signature( - edge_prog_mgr.exported_program().graph_signature, - edge_prog_mgr.exported_program().graph_module, - ), - ) - edge_prog_mgr = edge_prog_mgr.to_backend(partitioner) exec_prog_mgr = edge_prog_mgr.to_executorch(config=executorch_config) with open(f"{work_space}/{pte_filename}.pte", "wb") as file: @@ -520,7 +507,6 @@ def post_process(): "-P", "--ptq", help="If specified, will do PTQ quantization. default is 16bits activation and 4bits weight. Support 8a8w and 16a4w.", - required=True, default="16a4w", ) diff --git a/examples/qualcomm/llama2/qaihub_runner/runner.cpp b/examples/qualcomm/llama2/qaihub_runner/runner.cpp index a905ecd3e7..32a89c9700 100644 --- a/examples/qualcomm/llama2/qaihub_runner/runner.cpp +++ b/examples/qualcomm/llama2/qaihub_runner/runner.cpp @@ -9,9 +9,9 @@ // A simple llama2 runner that includes preprocessing and post processing logic. // The module takes in a string as input and emits a string as output. -#include #include #include +#include #include #include diff --git a/examples/qualcomm/llama2/qaihub_runner/runner.h b/examples/qualcomm/llama2/qaihub_runner/runner.h index 4012c39629..3968388695 100644 --- a/examples/qualcomm/llama2/qaihub_runner/runner.h +++ b/examples/qualcomm/llama2/qaihub_runner/runner.h @@ -18,8 +18,8 @@ #include #include -#include #include +#include #include #include diff --git a/examples/qualcomm/llama2/runner/runner.h b/examples/qualcomm/llama2/runner/runner.h index 5128c365cb..8bfe27ebab 100644 --- a/examples/qualcomm/llama2/runner/runner.h +++ b/examples/qualcomm/llama2/runner/runner.h @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/examples/qualcomm/scripts/export_example.py b/examples/qualcomm/scripts/export_example.py index 63bba2a594..b12a44993d 100644 --- a/examples/qualcomm/scripts/export_example.py +++ b/examples/qualcomm/scripts/export_example.py @@ -66,7 +66,7 @@ quantizer.set_bit8_op_quant_config(quant_config) # Typical pytorch 2.0 quantization flow - m = torch._export.capture_pre_autograd_graph(model.eval(), example_inputs) + m = torch.export.export(model.eval(), example_inputs).module() m = prepare_pt2e(m, quantizer) # Calibration m(*example_inputs) diff --git a/examples/qualcomm/scripts/utils.py b/examples/qualcomm/scripts/utils.py index 6093f1dc1c..d803932179 100755 --- a/examples/qualcomm/scripts/utils.py +++ b/examples/qualcomm/scripts/utils.py @@ -204,7 +204,7 @@ def build_executorch_binary( else: raise AssertionError(f"No support for QuantDtype {quant_dtype}.") - captured_model = torch._export.capture_pre_autograd_graph(model, inputs) + captured_model = torch.export.export(model, inputs).module() annotated_model = prepare_pt2e(captured_model, quantizer) print("Quantizing the model...") # calibration