diff --git a/backends/qualcomm/_passes/decompose_einsum.py b/backends/qualcomm/_passes/decompose_einsum.py new file mode 100644 index 0000000000..c1924838b3 --- /dev/null +++ b/backends/qualcomm/_passes/decompose_einsum.py @@ -0,0 +1,65 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.exir.pass_base import ExportPass, PassResult +from torch.fx.experimental.proxy_tensor import make_fx + + +class DecomposeEinsum(ExportPass): + """ + Decompose einsum for quantization annotation to work properly. + """ + + def __init__(self) -> None: + super().__init__() + + def call(self, graph_module: torch.fx.GraphModule) -> PassResult: + graph = graph_module.graph + for node in graph.nodes: + if node.target == torch.ops.aten.einsum.default: + decomposed_module = make_fx( + node.target, + tracing_mode="fake", + )(node.args[0], [arg.meta["val"] for arg in node.args[1]]) + + with graph.inserting_before(node): + # remap is used to map original node values to new node values, + # which ensures that reference to nodes are correclty updated in the new graph + remap = {} + # Different from other nodes, einsum args[0] is the einsum equation, + # while input nodes are stored in args[1] + for i, arg in enumerate(node.args[1]): + remap[f"arg1_{i+1}"] = arg + + for decomposed_node in decomposed_module.graph.nodes: + # This is the arg[0] equation string, which is not required anymore after decomposition + if "arg0" in decomposed_node.name: + continue + + # no need to copy existent 'output' + if decomposed_node.op == "output": + for user in node.users.copy(): + # remap + user.replace_input_with( + node, + remap[decomposed_node.args[0][0]], + ) + # no need to copy existent placeholders + elif decomposed_node.op == "placeholder": + # replace node map from string to graph node + remap[decomposed_node] = remap.pop(decomposed_node.name) + else: + remap[decomposed_node] = graph.node_copy( + decomposed_node, + arg_transform=lambda x, remap=remap: remap[x], + ) + + graph.erase_node(node) + + graph.eliminate_dead_code() + graph_module.recompile() + return PassResult(graph_module, True) diff --git a/backends/qualcomm/_passes/insert_requantize.py b/backends/qualcomm/_passes/insert_requantize.py index 417d3b85b0..5291edeb9f 100644 --- a/backends/qualcomm/_passes/insert_requantize.py +++ b/backends/qualcomm/_passes/insert_requantize.py @@ -28,6 +28,7 @@ class InsertRequantize(ExportPass): # we don't use the 2nd output, 2nd output is an integer, etc. multi_output_op_ignore_set = { exir_ops.edge.aten._native_batch_norm_legit_no_training.default, + exir_ops.edge.aten.topk.default, } def __init__( diff --git a/backends/qualcomm/_passes/layout_transform.py b/backends/qualcomm/_passes/layout_transform.py index bdee2c8196..829c11fda4 100644 --- a/backends/qualcomm/_passes/layout_transform.py +++ b/backends/qualcomm/_passes/layout_transform.py @@ -65,6 +65,7 @@ class LayoutTransform(ExportPass): exir_ops.edge.aten.sqrt.default, exir_ops.edge.aten.sub.Tensor, exir_ops.edge.aten.sum.dim_IntList, + exir_ops.edge.aten.topk.default, exir_ops.edge.aten._to_copy.default, exir_ops.edge.aten.split_with_sizes.default, *q_ops, diff --git a/backends/qualcomm/builders/__init__.py b/backends/qualcomm/builders/__init__.py index 79c02e2207..74fd58a3ec 100644 --- a/backends/qualcomm/builders/__init__.py +++ b/backends/qualcomm/builders/__init__.py @@ -53,6 +53,7 @@ op_sum_int_list, op_tanh, op_to, + op_topk, op_transpose, op_unsqueeze, op_upsample_bilinear2d, @@ -107,6 +108,7 @@ op_sub, op_sum_int_list, op_tanh, + op_topk, op_to, op_transpose, op_unsqueeze, diff --git a/backends/qualcomm/builders/op_avg_pool2d.py b/backends/qualcomm/builders/op_avg_pool2d.py index 2f7e773b4f..5ad3fc36c9 100644 --- a/backends/qualcomm/builders/op_avg_pool2d.py +++ b/backends/qualcomm/builders/op_avg_pool2d.py @@ -3,6 +3,7 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -85,7 +86,10 @@ def define_node( if len(node.args) > 6: divisor_override = cast(int, node.args[6]) if divisor_override != pooling_region: - print("Not support divisor_override which is not equal to pooling region.") + warnings.warn( + "[QNN Delegate Op Builder]: Not support divisor_override which is not equal to pooling region.", + stacklevel=1, + ) return avg_pool2d_op = PyQnnWrapper.PyQnnOpWrapper( diff --git a/backends/qualcomm/builders/op_cat.py b/backends/qualcomm/builders/op_cat.py index bb68b24289..cf18690498 100644 --- a/backends/qualcomm/builders/op_cat.py +++ b/backends/qualcomm/builders/op_cat.py @@ -3,6 +3,7 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -43,8 +44,9 @@ def define_node( ) if len(list_of_tensors) != len(list_of_tensor_wrappers): - print( - "The number or input tensors is not equal to the number of input tensor wrappers." + warnings.warn( + "[QNN Delegate Op Builder]: The number or input tensors is not equal to the number of input tensor wrappers.", + stacklevel=1, ) return diff --git a/backends/qualcomm/builders/op_conv2d.py b/backends/qualcomm/builders/op_conv2d.py index b6e70c374e..30207a0392 100644 --- a/backends/qualcomm/builders/op_conv2d.py +++ b/backends/qualcomm/builders/op_conv2d.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -189,12 +190,18 @@ def _define_conv1d( # args[6] = transposed if cast(bool, node.args[6]): - print("Currently, No support for transposed convolution") + warnings.warn( + "[QNN Delegate Op Builder]: Currently, No support for transposed convolution.", + stacklevel=1, + ) return # args[7] = output padding if not all(out_pad == 0 for out_pad in cast(List[int], node.args[7])): - print("QNN does not support output padding") + warnings.warn( + "[QNN Delegate Op Builder]: QNN does not support output padding.", + stacklevel=1, + ) return stride_shape = [len(stride)] diff --git a/backends/qualcomm/builders/op_expand.py b/backends/qualcomm/builders/op_expand.py index dec352fef7..3f5c266cdd 100644 --- a/backends/qualcomm/builders/op_expand.py +++ b/backends/qualcomm/builders/op_expand.py @@ -3,6 +3,7 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -52,8 +53,9 @@ def define_node( output_dims = len(output_tensor.size()) if input_dims < output_dims: - print( - f"The rank of input tensor: {input_dims} is less than the rank of output tensor: {output_dims}." + warnings.warn( + f"[QNN Delegate Op Builder]: The rank of input tensor: {input_dims} is less than the rank of output tensor: {output_dims}.", + stacklevel=1, ) return diff --git a/backends/qualcomm/builders/op_layer_norm.py b/backends/qualcomm/builders/op_layer_norm.py index 18f5b76310..635e12d2ee 100644 --- a/backends/qualcomm/builders/op_layer_norm.py +++ b/backends/qualcomm/builders/op_layer_norm.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import Dict import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -44,7 +45,10 @@ def define_node( len(normalized_shapes) != 1 and normalized_shapes[0] != input_tensor.shape[-1] ): - print("Only supports normalization with last input dimension") + warnings.warn( + "[QNN Delegate Op Builder]: Only supports normalization with last input dimension.", + stacklevel=1, + ) return axis = [len(input_tensor.shape) - 1] axis_shape = [len(axis)] diff --git a/backends/qualcomm/builders/op_linear.py b/backends/qualcomm/builders/op_linear.py index 17afb21c6d..e4f16d4473 100644 --- a/backends/qualcomm/builders/op_linear.py +++ b/backends/qualcomm/builders/op_linear.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import Dict import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -70,8 +71,9 @@ def define_node( # TODO remove this when qnn sdk support if QCOM_SCALES in bias_node.meta.get(QCOM_QUANT_ATTRS, {}): - print( - f"[WARNING] Fallback linear bias, {bias_node}. per channel bias quantization is not support yet." + warnings.warn( + f"[QNN Delegate Op Builder]: Fallback linear bias, {bias_node}. per channel bias quantization is not support yet.", + stacklevel=1, ) bias_tensor = get_parameter(bias_node, self.edge_program) bias_tensor_wrapper = self.define_tensor( diff --git a/backends/qualcomm/builders/op_max_pool2d.py b/backends/qualcomm/builders/op_max_pool2d.py index 586556621b..27f14889bf 100644 --- a/backends/qualcomm/builders/op_max_pool2d.py +++ b/backends/qualcomm/builders/op_max_pool2d.py @@ -3,6 +3,7 @@ # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import cast, Dict, List import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -42,8 +43,9 @@ def define_node( if user.target.__name__ == "getitem": getitem_index = user.args[1] if getitem_index != 0: - print( - f"Expected second argument of getitem node for {node.target.__name__ } to be 0, got {getitem_index}" + warnings.warn( + f"[QNN Delegate Op Builder]: Expected second argument of getitem node for {node.target.__name__ } to be 0, got {getitem_index}", + stacklevel=1, ) return @@ -78,8 +80,9 @@ def define_node( if len(node.args) > 4: dilation = cast(List[int], node.args[4]) if not (dilation == 1 or dilation == [1, 1]): - print( - f"Not support dilation argument for max pool2d, but got {dilation}" + warnings.warn( + f"[QNN Delegate Op Builder]: Not support dilation argument for max pool2d, but got {dilation}", + stacklevel=1, ) return diff --git a/backends/qualcomm/builders/op_rms_norm.py b/backends/qualcomm/builders/op_rms_norm.py index e99b1f47ba..3a5101b12d 100644 --- a/backends/qualcomm/builders/op_rms_norm.py +++ b/backends/qualcomm/builders/op_rms_norm.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from typing import Dict import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper @@ -47,7 +48,10 @@ def define_node( len(normalized_shapes) != 1 and normalized_shapes[0] != input_tensor.shape[-1] ): - print("Only supports normalization with last input dimension") + warnings.warn( + "[QNN Delegate Op Builder]: Only supports normalization with last input dimension.", + stacklevel=1, + ) return axes = [node.args[0].meta["val"].dim() - 1] axes_shape = [len(axes)] diff --git a/backends/qualcomm/builders/op_topk.py b/backends/qualcomm/builders/op_topk.py new file mode 100644 index 0000000000..84c29925f2 --- /dev/null +++ b/backends/qualcomm/builders/op_topk.py @@ -0,0 +1,107 @@ +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import warnings +from typing import cast, Dict + +import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper + +import numpy as np +import torch +from executorch.backends.qualcomm.utils.constants import QCOM_AXIS_ORDER, QCOM_DATA + +from .node_visitor import NodeVisitor, register_node_visitor +from .qnn_constants import OpTopK, QNN_OP_PACKAGE_NAME_QTI_AISW + + +@register_node_visitor +class TopK(NodeVisitor): + target = ["aten.topk.default"] + + def __init__(self, *args) -> None: + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper], + ) -> PyQnnWrapper.PyQnnOpWrapper: + + input_node = node.args[0] + input_tensor = self.get_tensor(input_node, node) + input_tensor_wrapper = self.define_tensor( + input_node, + input_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC, + nodes_to_wrappers, + is_input_tensor=True, + ) + + k = cast(int, node.args[1]) + + if len(node.args) > 2: + dim = cast(int, node.args[2]) + if dim < 0: + dim = dim % len(input_tensor.shape) + if QCOM_AXIS_ORDER in node.meta: + dim = node.meta[QCOM_AXIS_ORDER].index(dim) + if dim != len(input_tensor.shape) - 1: + warnings.warn( + "[QNN Delegate Op Builder]: QNN currently only supports channel as dimension for topK.", + stacklevel=1, + ) + return + + topk_input_tensors = [input_tensor_wrapper] + + output_val_tensor = self.get_tensor(node, node, 0) + output_idx_tensor = self.get_tensor(node, node, 1).to(torch.int32) + + # QNN constraint, topk output_0 requires having the same quant config as input + node.meta["quant_attrs"] = input_node.meta.get("quant_attrs") + output_val_tensor_wrapper = self.define_tensor( + node, + output_val_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + is_input_tensor=False, + ) + + # topk output_1 is index, do not quantize it. + node.meta.pop("quant_attrs", None) + output_index_tensor_wrapper = self.define_tensor( + node, + output_idx_tensor, + PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE, + nodes_to_wrappers, + is_input_tensor=False, + wrapper_idx=1, + ) + topk_output_tensors = [output_val_tensor_wrapper, output_index_tensor_wrapper] + + topk_op = PyQnnWrapper.PyQnnOpWrapper( + node.name, + QNN_OP_PACKAGE_NAME_QTI_AISW, + OpTopK.op_name, + ) + topk_op.AddInputTensors(topk_input_tensors) + topk_op.AddOutputTensors(topk_output_tensors) + + topk_op.AddScalarParam( + OpTopK.param_k, + PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_UINT_32, + {"data": np.uint32(k)}, + ) + + # As of QNN 2.26, QNN HTP backend only allows users to set this value to 1, or else it will fail at op validation + if len(node.args) > 3: + largest = cast(bool, node.args[3]) + topk_op.AddScalarParam( + OpTopK.param_largest, + PyQnnWrapper.Qnn_DataType_t.QNN_DATATYPE_BOOL_8, + {QCOM_DATA: largest}, + ) + + return topk_op diff --git a/backends/qualcomm/builders/qnn_constants.py b/backends/qualcomm/builders/qnn_constants.py index 9c589c7678..307ad4a0c4 100644 --- a/backends/qualcomm/builders/qnn_constants.py +++ b/backends/qualcomm/builders/qnn_constants.py @@ -352,6 +352,13 @@ class OpTile: param_multiples: str = "multiples" +@dataclass(init=False, frozen=True) +class OpTopK: + op_name: str = "TopK" + param_k: str = "k" + param_largest: str = "largest" + + @dataclass(init=False, frozen=True) class OpTranspose: op_name: str = "Transpose" diff --git a/backends/qualcomm/quantizer/quantizer.py b/backends/qualcomm/quantizer/quantizer.py index ce8fab5e6f..3c8faee9c0 100644 --- a/backends/qualcomm/quantizer/quantizer.py +++ b/backends/qualcomm/quantizer/quantizer.py @@ -7,6 +7,7 @@ from typing import Callable, Dict, Optional, Sequence, Set import torch +from executorch.backends.qualcomm._passes.decompose_einsum import DecomposeEinsum from executorch.backends.qualcomm._passes.decompose_silu import DecomposeSilu from executorch.backends.qualcomm._passes.recompose_pixel_unshuffle import ( RecomposePixelUnshuffle, @@ -190,6 +191,7 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule: model = RecomposePixelUnshuffle(quantization_capture=True)(model).graph_module model = DecomposeScaledDotProductAttention()(model).graph_module model = DecomposeSilu()(model).graph_module + model = DecomposeEinsum()(model).graph_module model = ReplaceInfBuffer()(model).graph_module return model diff --git a/backends/qualcomm/quantizer/utils.py b/backends/qualcomm/quantizer/utils.py index 62407decce..d1ea35fa19 100644 --- a/backends/qualcomm/quantizer/utils.py +++ b/backends/qualcomm/quantizer/utils.py @@ -415,8 +415,8 @@ def _is_annotated(nodes: List[Node]): return annotated -def _is_input_float_tensor(node: Node): - """Check if the input is not a float tensor, so that we can skip quantization for the node +def _is_float_tensor(node: Node): + """Check if the node's tensor is a float tensor, so that we can skip quantization for the node since observers only works with float Tensors """ if ( @@ -474,7 +474,7 @@ def annotate_single_in_single_out( assert isinstance(input_act, Node) input_qspec_map[input_act] = quantization_config.input_activation - if _is_input_float_tensor(node): + if _is_float_tensor(node): node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, output_qspec=quantization_config.output_activation, @@ -482,22 +482,30 @@ def annotate_single_in_single_out( ) +@register_annotator([torch.ops.aten.topk.default]) +def annotate_topk(node: Node, quantization_config: QuantizationConfig) -> None: + if _is_annotated([node]): + return + # We can use single_in_single_out since we don't want to quantize indices output + annotate_single_in_single_out(node, quantization_config) + + def annotate_binary(node: Node, quantization_config: QuantizationConfig) -> None: if _is_annotated([node]): return input_act_qspec = quantization_config.input_activation output_act_qspec = ( - quantization_config.output_activation if _is_input_float_tensor(node) else None + quantization_config.output_activation if _is_float_tensor(node) else None ) input_qspec_map = {} input_act0 = node.args[0] - if _is_input_float_tensor(input_act0): + if _is_float_tensor(input_act0): input_qspec_map[input_act0] = input_act_qspec input_act1 = node.args[1] - if _is_input_float_tensor(input_act1): + if _is_float_tensor(input_act1): input_qspec_map[input_act1] = input_act_qspec node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( @@ -579,7 +587,7 @@ def _derive_div_qparams_fn( ) input_qspec_map = {} input_act0 = node.args[0] - if _is_input_float_tensor(input_act0): + if _is_float_tensor(input_act0): input_qspec_map[input_act0] = input_act_qspec node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( @@ -864,7 +872,7 @@ def annotate_sigmoid(node: Node, quantization_config: QuantizationConfig) -> Non qscheme=torch.torch.per_tensor_affine, ) - if _is_input_float_tensor(node): + if _is_float_tensor(node): node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, output_qspec=out_act_quantization_spec, @@ -1143,8 +1151,12 @@ def annotate_batch_norm(node: Node, quantization_config: QuantizationConfig) -> @register_annotator([operator.getitem]) def annotate_getitem(node: Node, quantization_config: QuantizationConfig) -> None: - _annotate_output_qspec(node, quantization_config.output_activation) - _mark_nodes_as_annotated([node]) + if _is_annotated([node]): + return + + if _is_float_tensor(node): + _annotate_output_qspec(node, quantization_config.output_activation) + _mark_nodes_as_annotated([node]) @register_annotator([torch.ops.aten.layer_norm.default]) diff --git a/backends/qualcomm/tests/models.py b/backends/qualcomm/tests/models.py index ee3d6cf93a..0b3553fd59 100644 --- a/backends/qualcomm/tests/models.py +++ b/backends/qualcomm/tests/models.py @@ -416,6 +416,17 @@ def forward(self, x): return torch.sum(self.first(x), dim=(2, 3), keepdim=False) +class Conv2dTopK(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv = torch.nn.Conv2d(3, 16, 3) + + def forward(self, x): + x = self.conv(x) + topk_values, topk_indices = torch.topk(x, 5, dim=1) + return topk_values + + class Div(torch.nn.Module): def __init__(self): super().__init__() @@ -440,6 +451,30 @@ def forward(self, x): return x / 10 +class EinsumBilinear(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, bn, anm, bm): + return torch.einsum("bn,anm,bm->ba", bn, anm, bm) + + +class EinsumOuterProduct(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, i, j): + return torch.einsum("i,j->ij", i, j) + + +class EinsumOuterProductRelu(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward(self, i, j): + return torch.relu(torch.einsum("i,j->ij", i, j)) + + class Embedding(torch.nn.Module): def __init__(self): super().__init__() @@ -978,6 +1013,16 @@ def forward(self, x): return torch.tanh(x) +class TopKandIndex(torch.nn.Module): + def __init__(self): + super().__init__() + self.idx_source = torch.rand(10, 3) + + def forward(self, x): + a, b = torch.topk(x, 3) + return a + self.idx_source[b] + + class Unbind(torch.nn.Module): def __init__(self): super().__init__() diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index bb90e0eb58..2e1bd0eff3 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -37,6 +37,11 @@ skip_annotation, ) +from executorch.examples.models.llama2.llama_transformer import ( + ModelArgs, + MOEFeedForward, +) + from executorch.examples.qualcomm.utils import setup_common_args_and_variables from executorch.backends.qualcomm.tests.models import * # noqa: F403 @@ -140,6 +145,28 @@ def test_qnn_backend_conv_transpose2d(self): with self.subTest(i=i): self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_einsum_outer_product(self): + module = EinsumOuterProduct() # noqa: F405 + x = torch.randn(5) + y = torch.randn(4) + sample_input = ( + x, + y, + ) + self.lower_module_and_test_output(module, sample_input) + + def test_qnn_backend_einsum_bilinear(self): + module = EinsumBilinear() # noqa: F405 + bn = torch.randn(2, 5) + anm = torch.randn(3, 5, 4) + bm = torch.randn(2, 4) + sample_input = ( + bn, + anm, + bm, + ) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_element_wise_add(self): test_comb = [ { @@ -546,6 +573,34 @@ def test_qnn_backend_conv2d_sum_reduce_dim(self): sample_input = (torch.randn([1, 1, 3, 3]),) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv2d_topk(self): + module = Conv2dTopK() # noqa: F405 + sample_input = (torch.randn(1, 3, 32, 32),) + self.lower_module_and_test_output(module, sample_input) + + def test_qnn_backend_einsum_outer_product_relu(self): + module = EinsumOuterProductRelu() # noqa: F405 + x = torch.randn(5) + y = torch.randn(4) + sample_input = ( + x, + y, + ) + self.lower_module_and_test_output(module, sample_input) + + @unittest.skip("Fail because of bad accuracy") + def test_qnn_backend_moe_feed_forward(self): + args = ModelArgs() + args.dim = 32 + args.n_heads = 8 + args.n_layers = 2 + self.head_dim = args.dim // args.n_heads + module = MOEFeedForward(args) # noqa: F405 + sample_input = ( + torch.randint(low=0, high=100, size=(1, 32), dtype=torch.float32), + ) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_pixel_unshuffle_math_equivalent(self): module = PixelUnshuffleMathEquivalent(2) # noqa: F405 sample_input = (torch.rand(2, 2, 6, 6),) @@ -561,6 +616,11 @@ def test_qnn_backend_simple_model(self): sample_input = (torch.ones(1, 32, 28, 28), torch.ones(1, 32, 28, 28)) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_topk_and_index(self): + module = TopKandIndex() # noqa: F405 + sample_input = (torch.randn(3, 10),) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_view_permute_matmul(self): module = ViewPermuteMatMul() # noqa: F405 torch.manual_seed(8) @@ -749,6 +809,30 @@ def test_qnn_backend_conv_transpose2d(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_einsum_outer_product(self): + module = EinsumOuterProduct() # noqa: F405 + x = torch.randn(5) + y = torch.randn(4) + sample_input = ( + x, + y, + ) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + + def test_qnn_backend_einsum_bilinear(self): + module = EinsumBilinear() # noqa: F405 + bn = torch.randn(2, 5) + anm = torch.randn(3, 5, 4) + bm = torch.randn(2, 4) + sample_input = ( + bn, + anm, + bm, + ) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_element_wise_add(self): test_comb = [ { @@ -1211,6 +1295,37 @@ def test_qnn_backend_conv2d_sum_reduce_dim(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_conv2d_topk(self): + module = Conv2dTopK() # noqa: F405 + sample_input = (torch.randn(1, 3, 32, 32),) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + + def test_qnn_backend_einsum_outer_product_relu(self): + module = EinsumOuterProductRelu() # noqa: F405 + x = torch.randn(5) + y = torch.randn(4) + sample_input = ( + x, + y, + ) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + + @unittest.skip("UT pass before QNN 2.26, segfault during partitioner") + def test_qnn_backend_moe_feed_forward(self): + args = ModelArgs() + args.dim = 32 + args.n_heads = 8 + args.n_layers = 2 + self.head_dim = args.dim // args.n_heads + module = MOEFeedForward(args) # noqa: F405 + sample_input = ( + torch.randint(low=0, high=100, size=(1, 32), dtype=torch.float32), + ) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_pixel_unshuffle_math_equivalent(self): module = PixelUnshuffleMathEquivalent(2) # noqa: F405 sample_input = (torch.rand(2, 2, 6, 6),) @@ -1229,6 +1344,12 @@ def test_qnn_backend_simple_model(self): module = self.get_qdq_module(module, sample_input) self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_topk_and_index(self): + module = TopKandIndex() # noqa: F405 + sample_input = (torch.randn(3, 10),) + module = self.get_qdq_module(module, sample_input) + self.lower_module_and_test_output(module, sample_input) + def test_qnn_backend_view_permute_matmul(self): module = ViewPermuteMatMul() # noqa: F405 torch.manual_seed(8) diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py index b8230abdc2..c58da42e84 100644 --- a/backends/qualcomm/tests/utils.py +++ b/backends/qualcomm/tests/utils.py @@ -166,7 +166,7 @@ def _save_model_and_expected_output( ref_outputs = [] if isinstance(ref_output, collections.OrderedDict): ref_outputs.append(ref_output["out"].detach()) - elif isinstance(ref_output, tuple): + elif isinstance(ref_output, (list, tuple)): for output in ref_output: ref_outputs.append(output.detach()) else: diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 7da1ccb4ed..d93f7fcb4b 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -52,6 +52,7 @@ from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader from executorch.backends.qualcomm.serialization.qnn_compile_spec_schema import ( _soc_info_table, + HtpArch, QcomChipset, QnnExecuTorchBackendOptions, QnnExecuTorchBackendType, @@ -854,6 +855,16 @@ def generate_qnn_executorch_compiler_spec( ] +def get_soc_to_arch_map(): + return { + "SSG2115P": HtpArch.V73, + "SM8650": HtpArch.V75, + "SM8550": HtpArch.V73, + "SM8475": HtpArch.V69, + "SM8450": HtpArch.V69, + } + + def get_soc_to_chipset_map(): return { "SSG2115P": QcomChipset.SSG2115P, diff --git a/backends/vulkan/partitioner/supported_ops.py b/backends/vulkan/partitioner/supported_ops.py index 7013a06880..4d0858953b 100644 --- a/backends/vulkan/partitioner/supported_ops.py +++ b/backends/vulkan/partitioner/supported_ops.py @@ -45,6 +45,12 @@ def __contains__(self, op): PRIM_OPS = [ operator.getitem, + # Quantization related ops will be fused via graph passes + exir_ops.edge.quantized_decomposed.quantize_per_channel.default, + exir_ops.edge.quantized_decomposed.quantize_per_tensor.default, + exir_ops.edge.quantized_decomposed.quantize_per_tensor.tensor, + exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default, + exir_ops.edge.quantized_decomposed.dequantize_per_tensor.tensor, exir_ops.edge.quantized_decomposed.dequantize_per_channel.default, ] diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift index 00fc4f6f54..69ece27f67 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/Application/ContentView.swift @@ -176,6 +176,7 @@ struct ContentView: View { .padding([.leading, .trailing, .bottom], 10) .sheet(isPresented: $isImagePickerPresented, onDismiss: addSelectedImageMessage) { ImagePicker(selectedImage: $selectedImage, sourceType: imagePickerSourceType) + .id(imagePickerSourceType.rawValue) } } .navigationBarTitle(title, displayMode: .inline) diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 2e6cb348b0..f10babc5bb 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -30,7 +30,7 @@ capture_program, generate_htp_compiler_spec, generate_qnn_executorch_compiler_spec, - get_soc_to_chipset_map, + get_soc_to_arch_map, ) from executorch.exir import EdgeCompileConfig, EdgeProgramManager, to_edge from executorch.exir.backend.backend_api import to_backend @@ -83,7 +83,7 @@ def __init__( self.dump_intermediate_outputs = dump_intermediate_outputs self.debug_output_path = f"{self.workspace}/debug_output.bin" self.output_folder = f"{self.workspace}/outputs" - self.soc_model = get_soc_to_chipset_map()[soc_model] + self.soc_model = get_soc_to_arch_map()[soc_model] self.error_only = error_only self.shared_buffer = shared_buffer self.runner = runner