From 01aeeb196ae97eb18b25b56f40363473fa44318e Mon Sep 17 00:00:00 2001 From: Manupa Karunaratne Date: Wed, 8 Sep 2021 18:15:17 +0100 Subject: [PATCH] [2/6] Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op (#8795) * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op This commit adds mainly the relay passes and ethosu_conv2d operator to relay. The relay passes include the legalizations and preprocessing of the relay graph in the paritioning. Co-authored-by: Matthew Barrett * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * skipping the test if vela is not in the container. Change-Id: I68cc4259dc33e1473e460956978f364fbf6596d8 * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * addressing Jared's comments Change-Id: Ief669f788c6bd1a1be1004cbce5129ed06b63c3c * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * addressing Elen's comments Change-Id: Iad6315bb63f12ba318deb9c5c9eff7459ff58c48 * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * cleanup passes Change-Id: I8e1cbedd2c4d3d0cdff481d775d9eb0577e44456 * Update TE comments Change-Id: I7e65c2714d017c8a4b64986b111a6b51d128c963 * Address ekalda's comments in TE Change-Id: I55cfbb3787c0aacdadf46c4859dff39287e65ddc * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op *addressing chris's comments *addressing Nicola's comments Change-Id: Id02788ddcdbc3679e0da37b2fa614cded0a4c1f5 * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op *addressing missed 'hidden' comments of Chris *addressing one missed comment of Nicola *adding type annotations Change-Id: Iadf4907b311e195731dbbed571e95a266341db8f * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op *further type fixes and one missed comment Change-Id: I6da69fd95d17dfeaf5940da4f8d8c8ea142b39d2 * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op *missed comment split_o Change-Id: I4a4b19ff2cd18e8f568a63ae827f44358ed85b8e * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * adding mypy check Change-Id: Iaf58dbba2a9d8e1098a10c589d91b63c7efe646d * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op *removing premature insertion of get_accel_type utility Change-Id: I210512e00a5eb46adf23d1d72eb16432db526d25 * Arm(R) Ethos(TM)-U NPU Relay passes and Conv2D op * rebase fixes Change-Id: I06c9b536a7598646efce2b664fcc405aa6008203 Co-authored-by: Matthew Barrett --- CMakeLists.txt | 1 + cmake/modules/contrib/EthosU.cmake | 21 + python/tvm/relay/backend/contrib/__init__.py | 2 +- .../relay/backend/contrib/ethosu/__init__.py | 23 + .../relay/backend/contrib/ethosu/_ffi_api.py | 20 + .../relay/backend/contrib/ethosu/errors.py | 35 ++ .../relay/backend/contrib/ethosu/legalize.py | 223 +++++++++ .../backend/contrib/ethosu/op/__init__.py | 19 + .../backend/contrib/ethosu/op/convolution.py | 204 ++++++++ .../backend/contrib/ethosu/preprocess.py | 36 ++ .../backend/contrib/ethosu/te/__init__.py | 19 + .../backend/contrib/ethosu/te/convolution.py | 151 ++++++ .../relay/backend/contrib/ethosu/te/dma.py | 309 ++++++++++++ .../tvm/relay/backend/contrib/ethosu/util.py | 185 +++++++ .../relay/backend/contrib/ethosu/vela_api.py | 310 ++++++++++++ python/tvm/relay/op/contrib/ethosu.py | 249 ++++++++++ .../backend/contrib/ethosu/preprocess.cc | 269 +++++++++++ src/relay/op/contrib/ethosu/common.cc | 65 +++ src/relay/op/contrib/ethosu/common.h | 58 +++ src/relay/op/contrib/ethosu/convolution.cc | 212 ++++++++ .../contrib/test_ethosu/relay_ir_builder.py | 295 ++++++++++++ .../contrib/test_ethosu/test_legalize.py | 337 +++++++++++++ .../contrib/test_ethosu/test_preprocess.py | 343 +++++++++++++ .../contrib/test_ethosu/test_vela_api.py | 455 ++++++++++++++++++ tests/scripts/task_config_build_cpu.sh | 1 + tests/scripts/task_mypy.sh | 3 + 26 files changed, 3844 insertions(+), 1 deletion(-) create mode 100644 cmake/modules/contrib/EthosU.cmake create mode 100644 python/tvm/relay/backend/contrib/ethosu/__init__.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/_ffi_api.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/errors.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/legalize.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/op/__init__.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/op/convolution.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/preprocess.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/te/__init__.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/te/convolution.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/te/dma.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/util.py create mode 100644 python/tvm/relay/backend/contrib/ethosu/vela_api.py create mode 100644 python/tvm/relay/op/contrib/ethosu.py create mode 100644 src/relay/backend/contrib/ethosu/preprocess.cc create mode 100644 src/relay/op/contrib/ethosu/common.cc create mode 100644 src/relay/op/contrib/ethosu/common.h create mode 100644 src/relay/op/contrib/ethosu/convolution.cc create mode 100644 tests/python/contrib/test_ethosu/relay_ir_builder.py create mode 100644 tests/python/contrib/test_ethosu/test_legalize.py create mode 100644 tests/python/contrib/test_ethosu/test_preprocess.py create mode 100644 tests/python/contrib/test_ethosu/test_vela_api.py diff --git a/CMakeLists.txt b/CMakeLists.txt index e02bb615aa41..62598cbdf4a7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -403,6 +403,7 @@ include(cmake/modules/LLVM.cmake) include(cmake/modules/Micro.cmake) include(cmake/modules/contrib/EthosN.cmake) include(cmake/modules/contrib/CMSISNN.cmake) +include(cmake/modules/contrib/EthosU.cmake) include(cmake/modules/contrib/BLAS.cmake) include(cmake/modules/contrib/CODEGENC.cmake) include(cmake/modules/contrib/DNNL.cmake) diff --git a/cmake/modules/contrib/EthosU.cmake b/cmake/modules/contrib/EthosU.cmake new file mode 100644 index 000000000000..8f3e09b8179b --- /dev/null +++ b/cmake/modules/contrib/EthosU.cmake @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(USE_ETHOSU) + file(GLOB ETHOSU_RELAY_CONTRIB_SRC src/relay/backend/contrib/ethosu/*) + list(APPEND COMPILER_SRCS ${ETHOSU_RELAY_CONTRIB_SRC}) +endif(USE_ETHOSU) \ No newline at end of file diff --git a/python/tvm/relay/backend/contrib/__init__.py b/python/tvm/relay/backend/contrib/__init__.py index 16b83612d797..bfc5b79bb2ee 100644 --- a/python/tvm/relay/backend/contrib/__init__.py +++ b/python/tvm/relay/backend/contrib/__init__.py @@ -14,5 +14,5 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -"""external backend codegen modules for relay.""" +"""External backend codegen modules for Relay.""" from . import cmsisnn diff --git a/python/tvm/relay/backend/contrib/ethosu/__init__.py b/python/tvm/relay/backend/contrib/ethosu/__init__.py new file mode 100644 index 000000000000..f5c595462e73 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/__init__.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Arm(R) Ethos(TM)-U NPU codegen modules for Relay.""" +from . import util +from . import legalize +from . import preprocess +from . import errors +from . import vela_api +from .util import partition_for_ethosu diff --git a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py new file mode 100644 index 000000000000..ccf1039a6994 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""FFI APIs for relay transformation passes.""" +import tvm._ffi # type: ignore + +tvm._ffi._init_api("relay.ext.ethosu", __name__) diff --git a/python/tvm/relay/backend/contrib/ethosu/errors.py b/python/tvm/relay/backend/contrib/ethosu/errors.py new file mode 100644 index 000000000000..65f3711838be --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/errors.py @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=super-init-not-called +"""This module defines all error types associated with the Arm(R) Ethos(TM)-U NPU code generator.""" + + +class EthosUCodegenError(Exception): + """Base class for all exceptions related to code generation""" + + def __init__(self, data): + self.message = "EthosUCodegenError:" + data + + def __str__(self): + return self.message + + +class UnsupportedLayout(EthosUCodegenError): + """Raised when unsupported layout is encountered during code generation.""" + + def __init__(self, layout): + super().__init__(f"Unsupported Layout {layout}") diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py new file mode 100644 index 000000000000..82b7f1e68cee --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py @@ -0,0 +1,223 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, import-outside-toplevel, no-value-for-parameter +"""A set of passes to legalize some of operations for the NPU""" +from typing import List +import numpy as np # type: ignore + +import tvm # type: ignore +from tvm import relay +from tvm import ir +from tvm.relay.dataflow_pattern import DFPatternCallback # type: ignore +from tvm.relay.dataflow_pattern import wildcard +from tvm.relay.dataflow_pattern import is_op +from tvm.relay.dataflow_pattern import rewrite +from tvm.relay.backend.contrib.ethosu import op as ethosu_ops # type: ignore +from tvm.relay.backend.contrib.ethosu.errors import UnsupportedLayout # type: ignore +from tvm.relay.backend.contrib.ethosu import vela_api +from tvm.relay.op.contrib import ethosu as ethosu_patterns # type: ignore + + +class SplitRewriter(DFPatternCallback): + """This rewriting converts split operations into a sequence of + strided_slice operations, because codegen is going to be based + on strided_slices that will define the slice of the tensor that + will be fed to the consumer. + """ + + def __init__(self): + super().__init__(require_type=True) + self.split_in = wildcard() + self.pattern = is_op("split")(self.split_in) + + @staticmethod + def get_section_begin_coords(split: tvm.relay.Expr) -> List[int]: + """Currently, the split operator takes an array of indices or an integer + indicating the number of splits. However, its an array of indices could + represent both cases, therefore this function just make it an array of + indices where each index represent the co-ordinate of beginning of each + section -- defines as section begins. + + Parameters + ---------- + split : tvm.relay.Expr + The Relay Call expression for a split operator + + Returns + ------- + section_begins : List[int] + A list containing integers corresponding to section + begins + """ + indices_or_sections = split.attrs.indices_or_sections + input_shape = split.args[0].checked_type.shape + split_axis = split.attrs.axis + + if isinstance(indices_or_sections, tvm.ir.container.Array): + # 0 is the beginning of the first section. + return [0] + list(indices_or_sections) + split_axis_len = input_shape[split_axis].value + section_length = split_axis_len // indices_or_sections.value + return list(range(0, split_axis_len, section_length)) + + def callback( + self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map + ) -> tvm.relay.Expr: + split_input = post.args[0] + split_begins = list() + split_ends = list() + section_begins_in_split_axis = self.get_section_begin_coords(post) + for split_cord in section_begins_in_split_axis: + # first begin is [0, 0, ... , 0] + begin_shape = [0 for i in range(len(split_input.checked_type.shape))] + begin_shape[post.attrs.axis] = split_cord + split_begins.append(begin_shape) + + end_shape = list(split_input.checked_type.shape) + # Only the split axis coordinate changes + end_shape[post.attrs.axis] = split_cord + split_ends.append(end_shape) + + # Coordinates needs to be shifted left because beginning + # of the next section is the end of the previous + split_ends = split_ends[1:] + # Last section end is the shape of the tensor itself. + split_ends.append(list(split_input.checked_type.shape)) + + strided_slices = list() + for sb, se in zip(split_begins, split_ends): + strided_slices.append(relay.strided_slice(split_input, sb, se)) + + return relay.Tuple(strided_slices) + + +@ir.transform.module_pass(opt_level=1) +class LegalizeSplit: + """This is the pass that wraps SplitRewriter""" + + def transform_module( + self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.ir.IRModule: + for global_var, func in mod.functions.items(): + func = rewrite(SplitRewriter(), func) + mod.update_func(global_var, func) + return mod + + def __call__(self, *args, **kwargs): + pass + + +class EthosUConv2DRewriter(DFPatternCallback): + """Convert conv2d related composite functions into ethosu_conv2d operators""" + + def __init__(self): + super().__init__(require_type=True) + self.pattern = (wildcard().has_attr({"Composite": "ethosu.qnn_conv2d"}))(wildcard()) + + def callback( + self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map + ) -> tvm.relay.Expr: + params = ethosu_patterns.QnnConv2DParams(post.op.body) + params.ifm.tensor = post.args[0] + channels_map = { + "NHWC": 3, + } + if str(params.ofm.layout) not in channels_map.keys(): + raise UnsupportedLayout(str(params.ofm.layout)) + kernel_size_map = { + "HWIO": params.weights.shape[0:2], + "OHWI": params.weights.shape[1:3], + "HWOI": params.weights.shape[0:2], + } + if str(params.weights.layout) not in kernel_size_map.keys(): + raise UnsupportedLayout(str(params.weights.layout)) + activation_map = {"clip": "CLIP"} + weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]} + weights_values = params.weights.values + weights_values_ohwi = np.transpose( + weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)] + ) + if params.activation: + activation = activation_map[params.activation.op.name] + clip_min = int(params.activation.attrs.a_min) + clip_max = int(params.activation.attrs.a_max) + else: + activation = "NONE" + clip_min = 0 + clip_max = 0 + scale_bias = vela_api.pack_biases( + biases=params.biases.tensor.data.asnumpy(), + ifm_scale=params.ifm.q_params.scale_f32, + ifm_dtype=np.dtype(params.ifm.dtype), + weight_scales=params.weights.q_params.scale_f32, + ofm_scale=params.ofm.q_params.scale_f32, + is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"], + ) + ethosu_conv2d = ethosu_ops.ethosu_conv2d( + ifm=post.args[0], + weight=relay.const(weights_values_ohwi, params.weights.values.dtype), + scale_bias=relay.const(scale_bias, "uint8"), + lut=relay.const([], dtype="int8"), + ifm_scale=float(params.ifm.q_params.scale_f32), + ifm_zero_point=int(params.ifm.q_params.zero_point), + weight_zero_point=int(params.weights.q_params.zero_point), + ofm_scale=float(params.ofm.q_params.scale_f32), + ofm_zero_point=int(params.ofm.q_params.zero_point), + kernel_shape=kernel_size_map[str(params.weights.layout)], + ofm_channels=params.ofm.shape[channels_map[str(params.ofm.layout)]], + strides=params.strides, + padding=params.padding, + dilation=params.dilation, + activation=activation, + clip_min=clip_min, + clip_max=clip_max, + upscale="NONE", + ifm_layout=str(params.ifm.layout), + ofm_layout=str(params.ofm.layout), + ) + return ethosu_conv2d + + +@ir.transform.module_pass(opt_level=1) +class LegalizeEthosUConv2D: + """This is the pass that wraps the EthosUConv2DRewriter""" + + def transform_module( + self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.ir.IRModule: + for global_var, func in mod.functions.items(): + func = rewrite(EthosUConv2DRewriter(), func) + mod.update_func(global_var, func) + return mod + + def __call__(self, *args, **kwargs): + pass + + +@ir.transform.module_pass(opt_level=1) +class LegalizeEthosU: + """This is the pass to call graph-rewrites to perform graph transformation + in a way such that the operations are replaced with hardware/codegen supported + operations. + """ + + def transform_module( + self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.ir.IRModule: + mod = LegalizeSplit()(mod) + mod = LegalizeEthosUConv2D()(mod) + return mod diff --git a/python/tvm/relay/backend/contrib/ethosu/op/__init__.py b/python/tvm/relay/backend/contrib/ethosu/op/__init__.py new file mode 100644 index 000000000000..0406298f23f4 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/op/__init__.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"Relay operators for the Arm(R) Ethos(TM)-U NPU" + +from .convolution import ethosu_conv2d diff --git a/python/tvm/relay/backend/contrib/ethosu/op/convolution.py b/python/tvm/relay/backend/contrib/ethosu/op/convolution.py new file mode 100644 index 000000000000..b159830ceaa9 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/op/convolution.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=unused-argument +"""Relay operators for convolutions for Arm(R) Ethos(TM)-U NPU""" +from typing import Tuple + +import tvm # type: ignore +from tvm.relay.op import _make # type: ignore +from tvm.topi.generic import schedule_injective # type: ignore +from tvm.relay.op.op import OpStrategy # type: ignore +from tvm.relay.op import strategy as _strategy + +from ..te import conv2d_compute + + +def _extract_ethosu_conv2d_params(attrs, args): + """Get the parameters necessary to construct a compute TE + from a ethosu_conv2d Relay call.""" + ifm = args[0] + weight = args[1] + scale_bias = args[2] + lut = args[3] + ifm_scale = attrs.ifm_scale + ifm_zero_point = attrs.ifm_zero_point + weight_zero_point = attrs.weight_zero_point + ofm_scale = attrs.ofm_scale + ofm_zero_point = attrs.ofm_zero_point + strides = attrs.strides + padding = attrs.padding + dilation = attrs.dilation + activation = attrs.activation + clip_min = attrs.clip_min + clip_max = attrs.clip_max + upscale = attrs.upscale + ifm_layout = attrs.ifm_layout + ofm_layout = attrs.ofm_layout + + return ( + ifm, + weight, + scale_bias, + lut, + ifm_scale, + ifm_zero_point, + weight_zero_point, + ofm_scale, + ofm_zero_point, + strides, + padding, + dilation, + activation, + clip_min, + clip_max, + upscale, + ifm_layout, + ofm_layout, + ) + + +@tvm.ir.register_op_attr("contrib.ethosu.conv2d", "FTVMCompute") +def create_ethosu_conv2d_compute(attrs, args, out_type): + """Create an ethosu_conv2d compute op.""" + params = _extract_ethosu_conv2d_params(attrs, args) + op = conv2d_compute(*params) + return [op] + + +@tvm.ir.register_op_attr("contrib.ethosu.conv2d", "FTVMStrategy") +def conv2d_strategy_ethosu(attrs, inputs, out_type, target): + strategy = OpStrategy() + strategy.add_implementation( + create_ethosu_conv2d_compute, + _strategy.wrap_topi_schedule(schedule_injective), + name="ethosu_conv2d", + ) + return strategy + + +def ethosu_conv2d( + ifm: tvm.relay.Expr, + weight: tvm.relay.Expr, + scale_bias: tvm.relay.Expr, + lut: tvm.relay.Expr, + ifm_scale: float, + ifm_zero_point: int, + weight_zero_point: int, + ofm_scale: float, + ofm_zero_point: int, + kernel_shape: Tuple[int, int], + ofm_channels: int, + strides: Tuple[int, int] = (1, 1), + padding: Tuple[int, int, int, int] = (0, 0, 0, 0), + dilation: Tuple[int, int] = (1, 1), + activation: str = "NONE", + clip_min: int = 0, + clip_max: int = 0, + upscale: str = "NONE", + ifm_layout: str = "NHWC", + ofm_layout: str = "NHWC", +) -> tvm.relay.Call: + """This is a quantized 2D convolution operation as supported by the + the NPU. It accepts either NHWC or NHCWB16 format + for the input data and OHWI format for the kernel weights. + + Reference: https://developer.arm.com/documentation/102420/0200/ + + Note that the per-channel weight scale and bias tensor must be + packed together into a combined tensor of uint80s. This is represented + in TVM by a (channels, 10) tensor of type uint8. For more detail, + refer to the Technical Reference Manual linked above. + + Parameters + ---------- + ifm : tvm.relay.Expr + The Input Feature Map tensor (IFM). + weight : tvm.relay.Expr + The weight tensor. + scale_bias : tvm.relay.Expr + The packed per-channel weight scale and bias tensor. + lut : tvm.relay.Expr + The look-up table values to use if activation = "LUT". + ifm_scale : float + The quantization scale for the Input Feature Map tensor. + ifm_zero_point : int + The quantization zero point for the Input Feature Map tensor. + weight_zero_point : int + The quantization zero point for the weight tensor. + ofm_scale : int + The quantization scale for the Output Feature Map tensor. + ofm_zero_point : int + The quantization zero point for the Output Feature Map tensor. + kernel_shape : tuple of int + The 2 dimensional kernel shape as (kernel_height, kernel_width). + ofm_channels : int + The number of OFM channels. + strides : tuple of int, optional + The 2 dimensional strides as (stride_height, stride_width). + padding : tuple of int, optional + The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right). + dilation : tuple of int, optional + The 2 dimensional dilation as (dilation_height, dilation_width). + activation : str, optional + The activation function to use. + "NONE" - no activation function. + "CLIP" - clip the output between clip_min and clip_max. + "TANH" - tanh activation function. + "SIGMOID" - sigmoid activation function. + "LUT" - use a look-up table to perform the activation function. + clip_min : int, optional + The minimum clipping value if activation = "CLIP" + clip_max : int, optional, + The maximum clipping value if activation = "CLIP" + upscale : str, optional + The 2x2 upscaling mode to apply to the Input Feature Map tensor. + "NONE" - no upscaling. + "NEAREST" - upscale using nearest neighbour. + "ZEROS" - upscale using zeros. + ifm_layout : str, optional + The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16". + ofm_layout : str, optional + The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16". + + Returns + ------- + tvm.relay.Call + A call to the ethosu_conv2d op. + + """ + return _make.ethosu_conv2d( + ifm, + weight, + scale_bias, + lut, + ifm_scale, + ifm_zero_point, + weight_zero_point, + ofm_scale, + ofm_zero_point, + kernel_shape, + ofm_channels, + strides, + padding, + dilation, + activation, + clip_min, + clip_max, + upscale, + ifm_layout, + ofm_layout, + ) diff --git a/python/tvm/relay/backend/contrib/ethosu/preprocess.py b/python/tvm/relay/backend/contrib/ethosu/preprocess.py new file mode 100644 index 000000000000..795adfc2fb1f --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/preprocess.py @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument, import-outside-toplevel +"""Set of passes to pre-process the IRModule to support Arm(R)-Ethos(TM)-U +NPU code generation. These set of passes will mutate both the main and the +external functions. +""" +import tvm # type: ignore +from . import _ffi_api # type: ignore + + +def preprocess_ext_io() -> tvm.transform.Pass: + """This pass mutates the number of inputs going to / outputs coming out to/from + external functions to one. This is achieved via concatenation + of inputs and splitting of outputs in around the call to the external function. + + Returns + ------- + ret : tvm.transform.Pass + The registered pass to mutate the IO of the external functions and their calls. + """ + return _ffi_api.PreprocessExternalFuncIO() # type: ignore # pylint: disable=no-member diff --git a/python/tvm/relay/backend/contrib/ethosu/te/__init__.py b/python/tvm/relay/backend/contrib/ethosu/te/__init__.py new file mode 100644 index 000000000000..7ca5de3c160c --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/te/__init__.py @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Tensor Expressions for the NPU""" + +from .convolution import * diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py new file mode 100644 index 000000000000..40015ac296a6 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py @@ -0,0 +1,151 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,unused-argument +"""Tensor Expressions for convolutions for the NPU""" +from typing import Tuple, Union, List + +from tvm import te # type: ignore +from .dma import dma_ofm_compute, dma_ifm_compute + + +def conv2d_compute( + ifm: te.Tensor, + weight: te.Tensor, + scale_bias: te.Tensor, + lut: te.Tensor, + ifm_scale: float, + ifm_zero_point: int, + weight_zero_point: int, + ofm_scale: float, + ofm_zero_point: int, + strides: Tuple[int, int], + padding: Tuple[int, int, int, int], + dilation: Union[Tuple[int, int], List[int]], + activation: str, + clip_min: int, + clip_max: int, + upscale: str, + ifm_layout: str, + ofm_layout: str, +) -> te.Tensor: + """A compute operator representing the capabilities of a 2D convolution for the NPU. + + Parameters + ---------- + ifm : te.Tensor + The Input Feature Map tensor (IFM). + weight : te.Tensor + The weight tensor. + scale_bias : te.Tensor + The packed per-channel weight scale and bias tensor. + lut : te.Tensor + The look-up table values to use if activation = "LUT". + ifm_scale : float + The quantization scale for the Input Feature Map tensor. + ifm_zero_point : int + The quantization zero point for the Input Feature Map tensor. + weight_zero_point : int + The quantization zero point for the weight tensor. + ofm_scale : float + The quantization scale for the Output Feature Map tensor. + ofm_zero_point : int + The quantization zero point for the Output Feature Map tensor. + strides : tuple + The 2 dimensional strides as (stride_height, stride_width). + padding : tuple + The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right). + dilation : Union[Tuple[int, int], List[int]] + The 2 dimensional dilation as (dilation_height, dilation_width). + activation : str + The activation function to use. + "NONE" - no activation function. + "CLIP" - clip the output between clip_min and clip_max. + "TANH" - tanh activation function. + "SIGMOID" - sigmoid activation function. + "LUT" - use a look-up table to perform the activation function. + clip_min : int + The minimum clipping value if activation = "CLIP". + clip_max : int + The maximum clipping value if activation = "CLIP". + upscale : str + The 2x2 upscaling mode to apply to the Input Feature Map tensor. + "NONE" - no upscaling. + "NEAREST" - upscale using nearest neighbour. + "ZEROS" - upscale using zeros. + ifm_layout : str + The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16". + ofm_layout : str + The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16". + + Returns + ------- + te.Tensor + The OFM tensor. + + """ + assert ifm.shape[0] == 1 + assert ifm_layout in {"NHWC", "NHCWB16"} + assert ofm_layout in {"NHWC", "NHCWB16"} + + stride_h, stride_w = strides + dilation_h, dilation_w = dilation + ofm_channels, kernel_h, kernel_w, ifm_channels = weight.shape + + # Compute operation for the IFM DMA pipeline + dmaed_ifm = dma_ifm_compute( + ifm, ifm_layout, ifm_zero_point, ifm_scale, weight.shape[3], padding + ) + + # 2D Convolution compute operation + dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 + dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 + ofm_height = (dmaed_ifm.shape[1] - dilated_kernel_h) // stride_h + 1 + ofm_width = (dmaed_ifm.shape[2] - dilated_kernel_w) // stride_w + 1 + rc = te.reduce_axis((0, ifm_channels), name="rc") + rh = te.reduce_axis((0, kernel_h), name="ry") + rw = te.reduce_axis((0, kernel_w), name="rx") + + conv2d_attrs = { + "op": "ethosu_conv2d", + "weight_zero_point": weight_zero_point, + "activation": activation, + "upscale": upscale, + "clip_min": clip_min, + "clip_max": clip_max, + "stride_h": stride_h, + "stride_w": stride_w, + "dilation_h": dilation_h, + "dilation_w": dilation_w, + } + + conv = te.compute( + (1, ofm_height, ofm_width, ofm_channels), + lambda nn, hh, ww, cc: te.sum( + dmaed_ifm( + nn, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, rc + ).astype(ifm.dtype) + * weight[cc, rh, rw, rc].astype(ifm.dtype) + # This is a trick to load 10 elements of the scale_bias at once, not accurate maths + + (scale_bias[cc, 0] * scale_bias[cc, 9]), + axis=[rh, rw, rc], + ), + name="ethosu_conv2d", + attrs=conv2d_attrs, + ) + + # Compute operation for the OFM DMA pipeline + return dma_ofm_compute(conv, ofm_layout, ofm_zero_point, ofm_scale, ofm_channels) diff --git a/python/tvm/relay/backend/contrib/ethosu/te/dma.py b/python/tvm/relay/backend/contrib/ethosu/te/dma.py new file mode 100644 index 000000000000..d19c8c56f7c2 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/te/dma.py @@ -0,0 +1,309 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,unnecessary-lambda +"""Tensor Expressions for operations supported by the NPU DMA engine""" +from typing import Callable, Tuple, Optional, List + +import tvm # type: ignore +from tvm import te +from tvm.topi.utils import equal_const_int # type: ignore + + +def _pad_tensor( + tensor: te.Tensor, pad_before: List[int], pad_after: Optional[List[int]] = None +) -> Callable: + """Generate a padded tensor. + + Parameters + ---------- + tensor : te.Tensor + The tensor to pad. + pad_before : tuple of int + The 'before' padding on each axis. + pad_after : tuple of int + The 'after' padding on each axis. + Returns + ------- + _pad : callable + The padded tensor. + + """ + pad_after = pad_after or pad_before + dims = len(tensor.shape) + assert len(pad_before) == dims + assert len(pad_after) == dims + + def _pad(*indices): + not_zero = [] # A list of padding conditions that aren't trivial (zero padding) + index_tuple = [] # The indices with which to access the padded tensor + for i in range(dims): + if equal_const_int(pad_before[i], 0) and equal_const_int(pad_after[i], 0): + index_tuple.append(indices[i]) + else: + index_tuple.append(indices[i] - pad_before[i]) + not_zero.append(indices[i] >= pad_before[i]) + not_zero.append(indices[i] < tensor.shape[i] + pad_before[i]) + if not_zero: + not_zero = tvm.tir.all(*not_zero) + return tvm.tir.if_then_else(not_zero, tensor(*index_tuple), tvm.tir.const(0, "uint8")) + return tensor(*index_tuple) + + return _pad + + +def read_compute(tensor: te.Tensor, layout: str, zero_point: int, scale: float) -> te.Tensor: + """A tensor expression which represents a read. + + Parameters + ---------- + tensor : te.Tensor + The tensor to read. + layout : str + The layout of the tensor, either NHWC or NHCWB16. + zero_point : int + The zero point of the tensor. + scale : float + The scale of the tensor. + + Returns + ------- + te.Tensor + The tensor having been read. + + """ + assert layout in {"NHWC", "NHCWB16"} + read_attrs = { + "op": "ethosu_read", + "layout": layout, + "zero_point": zero_point, + "scale": scale, + } + return te.compute(tensor.shape, lambda *i: tensor(*i), name="ethosu_read", attrs=read_attrs) + + +def write_compute(tensor: te.Tensor, layout: str, zero_point: int, scale: float) -> te.Tensor: + """A tensor expression which represents a write. + + Parameters + ---------- + tensor : te.Tensor + The tensor to write. + layout : str + The layout of the tensor, either NHWC or NHCWB16. + zero_point : int + The zero point of the tensor. + scale : float + The scale of the tensor. + + Returns + ------- + te.Tensor + The tensor having been written. + + """ + assert layout in {"NHWC", "NHCWB16"} + write_attrs = { + "op": "ethosu_write", + "layout": layout, + "zero_point": zero_point, + "scale": scale, + } + return te.compute( + tensor.shape, + lambda *i: tensor(*i), + name="ethosu_write", + attrs=write_attrs, + ) + + +def convert_to_nhwc_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor: + """Converts a tensor into NHWC layout if it's in NHWCB16 layout. + + Parameters + ---------- + tensor : te.Tensor + The tensor to convert. + layout : str + The layout of the tensor, either NHWC or NHCWB16. + channels : int + The number of valid channels for the tensor. + + Returns + ------- + te.Tensor + The converted tensor in NHWC layout. + + """ + assert layout in {"NHWC", "NHCWB16"} + convert_to_nhwc_attrs = { + "op": "ethosu_convert_to_nhwc", + "layout": layout, + } + if layout == "NHCWB16": + return te.compute( + (tensor.shape[0], tensor.shape[1], tensor.shape[3], channels), + lambda nn, hh, ww, cc: tensor(nn, hh, te.indexdiv(cc, 16), ww, te.indexmod(cc, 16)), + name="ethosu_convert_to_nhwc", + attrs=convert_to_nhwc_attrs, + ) + + return te.compute( + tensor.shape, + lambda *i: tensor(*i), + name="ethosu_convert_to_nhwc", + attrs=convert_to_nhwc_attrs, + ) + + +def convert_to_nhcwb16_compute(tensor: te.Tensor, layout: str, channels: int) -> te.Tensor: + """Converts a tensor into NHCWB16 layout if it's in NHWC layout. + + Parameters + ---------- + tensor : te.Tensor + The tensor to convert. + layout : str + The layout of the tensor, either NHWC or NHCWB16. + channels : int + The number of valid channels for the tensor. + + Returns + ------- + te.Tensor + The converted tensor in NHCWB16 layout. + + """ + assert layout in {"NHWC", "NHCWB16"} + convert_to_nhcwb16_attrs = { + "op": "ethosu_convert_to_nhcwb16", + "layout": layout, + } + if layout == "NHCWB16": + out_channel_bricks = te.indexdiv(channels - 1, 16) + 1 + output_shape = (1, tensor.shape[1], out_channel_bricks, tensor.shape[2], 16) + return te.compute( + output_shape, + lambda nn, hh, cc, ww, cb: tvm.tir.if_then_else( + cc * 16 + cb < channels, + tensor(nn, hh, ww, cc * 16 + cb), + tvm.tir.IntImm(tensor.dtype, 0), + ), + name="ethosu_convert_to_nhcwb16", + attrs=convert_to_nhcwb16_attrs, + ) + + return te.compute( + tensor.shape, + lambda *i: tensor(*i), + name="ethosu_convert_to_nhcwb16", + attrs=convert_to_nhcwb16_attrs, + ) + + +def pad_compute(tensor: te.Tensor, padding: tuple) -> te.Tensor: + """Pad an NHWC tensor in the height and width axes. + + Parameters + ---------- + tensor : te.Tensor + The tensor to pad. + padding : tuple + The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right). + + Returns + ------- + te.Tensor + The padded tensor. + + """ + pad_top, pad_left, pad_down, pad_right = padding + pad_before = [0, int(pad_top), int(pad_left), 0] + pad_after = [0, int(pad_down), int(pad_right), 0] + pad_attrs = { + "op": "ethosu_pad", + } + shape = tensor.shape + return te.compute( + (shape[0], shape[1] + pad_top + pad_down, shape[2] + pad_left + pad_right, shape[3]), + lambda nn, hh, ww, cc: _pad_tensor(tensor, pad_before, pad_after)(nn, hh, ww, cc), + name="ethosu_pad", + attrs=pad_attrs, + ) + + +def dma_ifm_compute( + ifm: te.Tensor, + layout: str, + zero_point: int, + scale: float, + channels: int, + padding: Tuple[int, int, int, int], +) -> te.Tensor: + """A sequence of compute operators representing the DMA capabilities for an IFM. + + Parameters + ---------- + ifm : te.Tensor + The Input Feature Map (IFM) tensor. + layout : str + The layout of the data, either NHWC or NHCWB16. + zero_point : int + The zero point of the data. + scale : float + The scale of the data. + channels : int + The number of valid channels for the data. + padding : tuple + The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right). + + Returns + ------- + te.Tensor + The dma-ed IFM tensor. + + """ + read_ifm = read_compute(ifm, layout, zero_point, scale) + convert_to_nhwc_ifm = convert_to_nhwc_compute(read_ifm, layout, channels) + return pad_compute(convert_to_nhwc_ifm, padding) + + +def dma_ofm_compute( + ofm: te.Tensor, layout: str, zero_point: int, scale: float, channels: int +) -> te.Tensor: + """A sequence of compute operators representing the DMA capabilities for an OFM. + + Parameters + ---------- + ofm : te.Tensor + The Output Feature Map (OFM) tensor. + layout : str + The layout of the data, either NHWC or NHCWB16. + zero_point : int + The zero point of the data. + scale : float + The scale of the data. + channels : int + The number of valid channels for the data. + + Returns + ------- + te.Tensor + The dma-ed OFM tensor. + + """ + convert_to_nhcwb16_ofm = convert_to_nhcwb16_compute(ofm, layout, channels) + return write_compute(convert_to_nhcwb16_ofm, layout, zero_point, scale) diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py new file mode 100644 index 000000000000..e9d89d33e6f0 --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/util.py @@ -0,0 +1,185 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Helper utility Enums and Functions used through out code generation. + +The rest of the utility functions are misc. +Refer to the description inside such functions +""" + +from enum import Enum +from typing import Union, Tuple, Dict, Optional +import numpy as np # type: ignore + +import tvm # type: ignore +from tvm import relay +from tvm.relay.build_module import bind_params_by_name # type: ignore +from tvm.relay.backend.contrib.ethosu import preprocess # type: ignore + + +class QConv2DArgs(Enum): + """ + This is a helper enum to obtain the correct index + of qnn.conv2d arguments. + """ + + IFM = 0 + WEIGHTS = 1 + IFM_ZERO_POINT = 2 + WEIGHTS_ZERO_POINT = 3 + IFM_SCALE = 4 + WEIGHTS_SCALE = 5 + + +class RequantArgs(Enum): + """ + This is a helper enum to obtain the correct index + of qnn.requantize arguments. + """ + + IFM_SCALE = 1 + IFM_ZERO_POINT = 2 + OFM_SCALE = 3 + OFM_ZERO_POINT = 4 + + +class BiasAddArgs(Enum): + """ + This is a helper enums to obtain the correct index + of qnn.bias_add arguments. + """ + + BIASES = 1 + + +class ClipArgs(Enum): + """ + This is a helper enums to obtain the correct index + of clip arguments. + """ + + A_MIN = 1 + A_MAX = 2 + + +def is_composite_func(func: relay.Function, name: str) -> bool: + """ + This method checks whether the call is to + a composite function of a given name. + + Parameters + ---------- + func : relay.Function + The header to be displayed along with the dump. + + name : str + The candidate name to be checked + + Returns + -------- + a boolean + """ + + if not hasattr(func, "attrs"): + return False + if "Composite" not in func.attrs.keys(): + return False + composite_name = func.attrs["Composite"] + + return composite_name == name + + +def get_range_for_dtype_str(dtype: str) -> Tuple[int, int]: + """ + Produce the min,max for a give data type. + + Parameters + ---------- + dtype : str + a type string (e.g., int8) + + Returns + ------- + type_info.min : int + the minimum of the range + type_info.max : int + the maximum of the range + """ + + try: + type_info = np.iinfo(dtype) + except ValueError: + type_info = np.finfo(dtype) + return type_info.min, type_info.max + + +def round_away_zero(f: Union[float, np.double, np.single, np.float32, np.float64]) -> np.float64: + """Round the number away from zero towards +inf / -inf""" + offset = -0.5 if (f < 0) else 0.5 + return np.trunc(f + offset) + + +def round_up(a: int, b: int) -> int: + """Round up to a multiple of b""" + return ((a + b - 1) // b) * b + + +# pylint: disable=unused-argument +def partition_for_ethosu( + mod: tvm.ir.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None, **opts +): + """This helper function partition the relay graph as produced by the + relay frontend for a given model into external functions + to be presented to the codegen. + + Parameters + ---------- + mod : tvm.ir.IRModule + The IRModule that gets generated from a relay frontend + params : Optional[Dict[str, tvm.runtime.NDArray]] + Constant input parameters. + + Returns + ------- + mod : IRModule + The partitioned IRModule with external global functions + """ + if params: + mod["main"] = bind_params_by_name(mod["main"], params) + + pattern = relay.op.contrib.get_pattern_table("ethosu") + mod = relay.transform.InferType()(mod) + mod = relay.transform.MergeComposite(pattern)(mod) + mod = relay.transform.AnnotateTarget("ethosu")(mod) + mod = relay.transform.MergeCompilerRegions()(mod) + mod = relay.transform.InferType()(mod) + mod = relay.transform.PartitionGraph()(mod) + mod = relay.transform.InferType()(mod) + mod = preprocess.preprocess_ext_io()(mod) + return mod + + +def get_dim_value(layout: str, dim: int): + """This is a helper function to retrieve the value + of the dimension given the shape and the layout + """ + assert isinstance(layout, str) + assert dim in list(layout) + for idx, dim_char in enumerate(layout): + if dim_char == dim: + return idx + return None diff --git a/python/tvm/relay/backend/contrib/ethosu/vela_api.py b/python/tvm/relay/backend/contrib/ethosu/vela_api.py new file mode 100644 index 000000000000..72ae18123b3d --- /dev/null +++ b/python/tvm/relay/backend/contrib/ethosu/vela_api.py @@ -0,0 +1,310 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +This is an adapter module for conversions between TVM and Vela. +The following conversion APIs are added : + *Obtaining the best block config + *Compressing weights + *Packing biases +""" +import logging +import math +from typing import Tuple, Optional, List +import numpy as np # type: ignore +from ethosu.vela import api as vapi # type: ignore + +from tvm.relay.backend.contrib.ethosu import util # type: ignore + +# pylint: disable=invalid-name +logger = logging.getLogger("Ethos-U") + +VELA_TO_NP_DTYPES = { + vapi.NpuDataType.UINT8: np.uint8, + vapi.NpuDataType.UINT16: np.uint16, + vapi.NpuDataType.INT8: np.int8, + vapi.NpuDataType.INT16: np.int16, + vapi.NpuDataType.INT32: np.int32, +} + +SCALE_BIAS_LENGTH = 10 + + +def get_optimal_block_config( + npu_op: vapi.NpuOperation, accel_type: vapi.NpuAccelerator +) -> vapi.NpuShape3D: + """ + "The NPU's unit of work is known as a block. It will fetch block(s) from Input + Feature Map (IFM) and a compute block for Output Feature Map (OFM). + Therefore, we need to pick an optimal block configuration considering bandwidth + to bring IFM blocks and the number of OFM block computes need to happen + to cover the OFM as indicated by the npu op. + + Parameters + ---------- + npu_op : ethosu.vela.api.NpuOperation + The NPU operation and its params + accel_type : ethosu.vela.api.NpuAccelerator + The NPU accelerator variant + + Returns + ------- + ethosu.vela.api.NpuShape3D : + The optimal block config for the operator + """ + all_valid_block_configs = vapi.npu_find_block_configs(npu_op, accel_type) + return _get_optimal_block_config(all_valid_block_configs) + + +def _get_optimal_block_config(all_valid_block_configs: List[vapi.NpuShape3D]) -> vapi.NpuShape3D: + """An internal function to get block config with largest depth + and then highest volume/area""" + assert isinstance(all_valid_block_configs, list) + for block_cfg in all_valid_block_configs: + assert isinstance(block_cfg, vapi.NpuShape3D) + + # Getting the largest volume block for benchmarking + all_valid_block_configs.sort( + key=lambda _cfg: _cfg.depth * _cfg.height * _cfg.width, reverse=True + ) + largest_volume_block_config = all_valid_block_configs[0] + largest_volume = ( + largest_volume_block_config.depth + * largest_volume_block_config.height + * largest_volume_block_config.width + ) + + all_valid_block_configs.sort(key=lambda _cfg: _cfg.depth, reverse=True) + max_d = all_valid_block_configs[0].depth + max_depth_block_configs = [_cfg for _cfg in all_valid_block_configs if _cfg.depth == max_d] + max_depth_block_configs.sort(key=lambda _cfg: _cfg.height * _cfg.width, reverse=True) + max_area = max_depth_block_configs[0].height * max_depth_block_configs[0].width + max_area_depth_block_configs = [ + _cfg for _cfg in max_depth_block_configs if _cfg.height * _cfg.width == max_area + ] + # This to get a deterministic anwser everytime + max_area_depth_block_configs.sort(key=lambda _cfg: _cfg.height, reverse=True) + assert len(max_area_depth_block_configs) > 0 + current_volume = ( + max_area_depth_block_configs[0].depth + * max_area_depth_block_configs[0].height + * max_area_depth_block_configs[0].width + ) + logger.info("Using block config=%s", max_area_depth_block_configs[0]) + logger.info( + "Quality of the block config w.r.t. max volume block config=%s", + 100.0 * (current_volume / largest_volume), + ) + return max_area_depth_block_configs[0] + + +def compress_weights( + weights: np.ndarray, + weights_zp: int, + weights_layout: str, + ifm_bitdepth: int, + block_depth: int, + dilation: Tuple[int, int], + accel_type: vapi.NpuAccelerator, + is_depthwise: Optional[bool] = False, +) -> bytearray: + """The NPU requires the weights to be compressed + to be executed. Therefore, this function calls into + the Vela APIs to compress the weights. + + Parameters + ---------- + weights : numpy.ndarray + The raw weights + weights_zp : int + The zero point of the weights + weights_layout : str + A string literal indicating the layout + Supported values : HWIO, HWOI, OHWI + ifm_bitdepth : int + The bit depth of the ifm the weights are used with + block_depth : int + The depth of the optimal block config for the operator + dilation : tuple + A tuple of 2 elements indicating dilation in h and w + accel_type : ethosu.vela.api.NpuAccelerator + The NPU accelerator variant + is_depthwise : bool, Optional + This indicates whether the weights are compressed for depthwise convolution + + Returns + ------- + compressed_weights : bytearray + Compressed weights + """ + layout_transform_indices = {"HWIO": (3, 0, 1, 2), "HWOI": (2, 0, 1, 3), "OHWI": (0, 1, 2, 3)} + assert weights_layout in layout_transform_indices.keys() + assert isinstance(weights_zp, np.int64) + weights = weights.astype(np.int64) - weights_zp + # Vela needs the weights in OHWI layout + weights_ohwi = np.transpose(weights, layout_transform_indices[weights_layout]) + shape_ohwi = [ + weights.shape[layout_transform_indices[weights_layout][0]], + weights.shape[layout_transform_indices[weights_layout][1]], + weights.shape[layout_transform_indices[weights_layout][2]], + weights.shape[layout_transform_indices[weights_layout][3]], + ] + block_traversal = calculate_block_traversal_mode(is_depthwise, shape_ohwi, ifm_bitdepth) + compressed_weights = vapi.npu_encode_weights( + accelerator=accel_type, + weights_volume=weights_ohwi, + dilation_xy=dilation, + ifm_bitdepth=ifm_bitdepth, + ofm_block_depth=block_depth, + is_depthwise=is_depthwise, + block_traversal=block_traversal, + ) + return compressed_weights + + +def calculate_block_traversal_mode( + is_depthwise: bool, weights_shape_ohwi: List[int], ifm_bitdepth: int +) -> vapi.NpuBlockTraversal: + """Calculate a block traversal mode given whether the op is depthwise convolution, + shape of weights and bit-depth of the ifm. + """ + + if is_depthwise: + return vapi.NpuBlockTraversal.DEPTH_FIRST + # Determine which block traversal strategy has better DPU utilization + kernel_size = weights_shape_ohwi[1] * weights_shape_ohwi[2] + depth_utilization = weights_shape_ohwi[3] / util.round_up( + weights_shape_ohwi[3], 32 if ifm_bitdepth == 8 else 16 + ) + part_kernel_utilization = (weights_shape_ohwi[3] / util.round_up(weights_shape_ohwi[3], 8)) * ( + kernel_size / util.round_up(kernel_size, 4 if ifm_bitdepth == 8 else 2) + ) + if part_kernel_utilization >= depth_utilization or weights_shape_ohwi[3] <= 8: + # Part-kernel first is always better for ifm depths <= 8 + return vapi.NpuBlockTraversal.PART_KERNEL_FIRST + return vapi.NpuBlockTraversal.DEPTH_FIRST + + +def pack_biases( + biases: np.ndarray, + ifm_scale: float, + ifm_dtype: np.dtype, + weight_scales: np.ndarray, + ofm_scale: float, + is_activation_tanh_or_sigmoid: bool = False, +) -> np.ndarray: + """ + The NPU requires the each bias value to be packed with + output scale parameters in a 80-bit format (that is returned + via npu_encode_bias API). This function will pack such values + to a binary artifact that the NPU will use in the execution. + + + Parameters + ---------- + biases : numpy.ndarray + The values of biases + ifm_scale : float + The quantization scale parameter of input feature map + ifm_dtype : numpy.dtype + The data type of input feature map data. + weight_scales : numpy.ndarray + The quantization scale parameter of weight feature map + This could be a tuple if per-channel quantization is present. + ofm_scale : float + The quantization scale parameter of output feature map. + is_activation_tanh_or_sigmoid : bool + Indicates whether the fused activation function is tanh or sigmoid. + + Returns + ------- + scale_bias : numpy.ndarray + Packed scales/biases as the hardware requires them. + """ + # The BYOC infra should not partition anything else. + supported_ifm_dtypes = (np.uint8, np.int8, np.int16) + assert ifm_dtype in supported_ifm_dtypes + + if weight_scales.size == 1: + weight_scales = [weight_scales] * biases.size + + hw_bias_scales = _calculate_hw_bias_scales( + ifm_scale, weight_scales, ofm_scale, ifm_dtype, is_activation_tanh_or_sigmoid + ) + assert len(hw_bias_scales) == biases.size + biases = biases.astype("int64") + packed_biases = bytearray() + for idx, scale in enumerate(hw_bias_scales): + packed_biases.extend(vapi.npu_encode_bias(biases[idx], *scale)) + scale_bias = np.frombuffer(packed_biases, dtype=np.uint8) + scale_bias = np.reshape(scale_bias, (-1, 10)) + return scale_bias + + +def _quantize_scale(scale: float) -> Tuple[int, int]: + """Quantize floating point scale into 32-bit int scale with a 6-bit shift. + This is to be used with 8-bit data. + """ + mantissa, exponent = math.frexp(scale) + mantissa_scaled = mantissa * (1 << 31) + mantissa_scaled = int(util.round_away_zero(mantissa_scaled)) + required_shift = 31 - exponent + assert 0 <= required_shift < (1 << 6) + return mantissa_scaled, required_shift + + +def _reduced_quantize_scale(scale: float) -> Tuple[int, int]: + """A reduction of precision is required for 16 bit data.""" + mantissa_scaled, required_shift = _quantize_scale(scale) + # This is max a signed 16-bit number could represent + max_reduced_mantissa_scaled = (1 << 15) - 1 + # if the current value is larger than pre-scaled max_reduced_mantissa_scaled + # we need to saturate the anwser to max_reduced_mantissa_scaled + if mantissa_scaled >= max_reduced_mantissa_scaled << 16: + reduced_mantissa_scaled = max_reduced_mantissa_scaled + else: + reduced_mantissa_scaled = (mantissa_scaled + (1 << 15)) >> 16 + reduced_shift = required_shift - 16 + return reduced_mantissa_scaled, reduced_shift + + +def _calculate_hw_bias_scales( + ifm_scale: float, + weight_scales: List[float], + ofm_scale: float, + ifm_dtype: np.dtype, + is_faf_tanh_sigmoid: bool = False, +) -> List[Tuple[int, int]]: + """This function will produce a scale that is calculated using scales of ifm, + weights and ofm. It is also important to note that if per-channel / per-value + quantization required they should go into hw bias scales""" + if is_faf_tanh_sigmoid: + ifm_scale = ifm_scale * 0x3000 + if ifm_dtype == np.uint8: + bias_scales = [np.double(ifm_scale * ws) / np.double(ofm_scale) for ws in weight_scales] + else: + assert ifm_dtype in (np.int8, np.int16) + ifm_scale_dbl = np.double(ifm_scale) + ofm_scale_dbl = np.double(ofm_scale) + bias_scales = [ifm_scale_dbl * np.double(ws) / ofm_scale_dbl for ws in weight_scales] + + if ifm_dtype == np.int16: + hw_bias_scales = [_reduced_quantize_scale(bs) for bs in bias_scales] + else: + assert ifm_dtype in (np.uint8, np.int8) + hw_bias_scales = [_quantize_scale(bs) for bs in bias_scales] + + return hw_bias_scales diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py new file mode 100644 index 000000000000..0da81101c77b --- /dev/null +++ b/python/tvm/relay/op/contrib/ethosu.py @@ -0,0 +1,249 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Arm(R) Ethos(TM)-U NPU supported operators.""" +from typing import List, Tuple, Callable +import numpy as np # type: ignore + +import tvm # type: ignore +from tvm.relay.expr import Constant # type: ignore +from tvm.relay.op.contrib.register import register_pattern_table # type: ignore +from tvm.relay.dataflow_pattern import wildcard, is_op, is_constant # type: ignore +from tvm.relay.backend.contrib.ethosu.util import QConv2DArgs # type: ignore +from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs +from tvm.relay.backend.contrib.ethosu.util import RequantArgs +from tvm.relay.backend.contrib.ethosu.util import get_dim_value +from ethosu.vela import api as vapi # type: ignore + + +class TensorParams: + """ + This class will parse a tvm Expr along with quantization scale + and zero point to populate parameters that are required + for the creation of tensors in Vela. + """ + + def __init__(self, tensor, layout=None, scale=None, zero_point=None): + self.tensor = tensor + if isinstance(tensor, Constant): + self.values = tensor.data.asnumpy() + else: + self.values = None + self.dtype = tensor.checked_type.dtype + self.shape = [int(i) for i in tensor.checked_type.shape] + self.layout = layout + + if scale is not None and zero_point is not None: + self.q_params = vapi.NpuQuantization( + scale.data.asnumpy().astype("float32"), zero_point.data.asnumpy().astype(self.dtype) + ) + else: + # put default values + self.q_params = vapi.NpuQuantization(1.0, 0) + + +def check_strides(strides: List[int]) -> bool: + """This function checks whether strides are within the limits supported by the NPU""" + stride_range = (1, 3) + smin, smax = stride_range + if not smax >= strides[0] >= smin: + return False + if not smax >= strides[1] >= smin: + return False + return True + + +def check_valid_dtypes(tensor_params: List[TensorParams]) -> bool: + """This function checks whether dtypes are supported by the NPU""" + supported_dtypes = (np.uint8, np.int8) + for tep in tensor_params: + # Check for dtypes + if np.dtype(tep.dtype) not in supported_dtypes: + return False + # Check for shape sizes + if any(dimlen > 65536 for dimlen in tep.shape): + return False + return True + + +def check_weights(weights: TensorParams, dilation: List[int]): + """This function checks whether weight tensor is compatible with the NPU""" + dilated_height_range = (1, 64) + dilated_hxw_range = (1, 64 * 64) + weights_limit = 127 * 65536 + dilated_width = (weights.shape[get_dim_value(weights.layout, "W")] - 1) * dilation[0] + 1 + dilated_height = (weights.shape[get_dim_value(weights.layout, "H")] - 1) * dilation[1] + 1 + dh_min, dh_max = dilated_height_range + if not dh_min <= dilated_height <= dh_max: + return False + dilated_hxw = dilated_height * dilated_width + dhxw_min, dhxw_max = dilated_hxw_range + if not dhxw_min <= dilated_hxw <= dhxw_max: + return False + # A saturation upper bound check for accumulators + weights.values = weights.values - weights.q_params.zero_point + axis = ( + get_dim_value(weights.layout, "H"), + get_dim_value(weights.layout, "W"), + get_dim_value(weights.layout, "I"), + ) + sum_weights = np.amax(np.sum(np.absolute(weights.values), axis=axis)) + return sum_weights <= weights_limit + + +def check_bias(bias: TensorParams): + """This function checks whether the bias values fit in 40 bits""" + if bias and bias.dtype == np.dtype("int64"): + valid = all(len(bin(bias_value)[2:]) <= 40 for bias_value in bias.values) + return valid + return True + + +def check_batch_size(ifm: TensorParams): + """This function checks for the number of batches vela currently supports""" + return ifm.shape[0] == 1 + + +def check_dilation(dilation: List[int]): + """This function checks whether dilation is within the limits supported by the NPU""" + dilation_range = (1, 2) + dmin, dmax = dilation_range + if not dmin <= dilation[0] <= dmax: + return False + if not dmin <= dilation[1] <= dmax: + return False + return True + + +def check_padding(padding: List[int], bounds: List[int]): + """This function checks whether padding is within the limits supported by the NPU""" + if len(padding) != 4 or len(bounds) != 4: + return False + top, left, bottom, right = padding + topb, leftb, bottomb, rightb = bounds + return not (top > topb or left > leftb or bottom > bottomb or right > rightb) + + +class QnnConv2DParams: + """ + This class will parse a Call to a ethosu.qnn_conv2d composite function + and extract quantization information of all the associated tensors. + """ + + composite_name = "ethosu.qnn_conv2d" + # The NPU only supports padding upto the numbers as follows + padding_bounds = [31, 31, 32, 32] + activation_map = {"clip": "CLIP"} + + def __init__(self, func_body: tvm.relay.Function): + activation = None + if str(func_body.op) in self.activation_map.keys(): + activation = func_body + requantize_op = activation.args[0] + else: + requantize_op = func_body + bias_add = requantize_op.args[0] + qnn_conv2d = bias_add.args[0] + data_layout = qnn_conv2d.attrs.data_layout + kernel_layout = qnn_conv2d.attrs.kernel_layout + # We consider the weights & biases as params as it should be a Constant + self.weights = TensorParams( + qnn_conv2d.args[QConv2DArgs.WEIGHTS.value], + kernel_layout, + qnn_conv2d.args[QConv2DArgs.WEIGHTS_SCALE.value], + qnn_conv2d.args[QConv2DArgs.WEIGHTS_ZERO_POINT.value], + ) + + self.biases = TensorParams( + bias_add.args[BiasAddArgs.BIASES.value], + data_layout, + requantize_op.args[RequantArgs.IFM_SCALE.value], + requantize_op.args[RequantArgs.IFM_ZERO_POINT.value], + ) + self.ifm = TensorParams( + qnn_conv2d.args[QConv2DArgs.IFM.value], + data_layout, + qnn_conv2d.args[QConv2DArgs.IFM_SCALE.value], + qnn_conv2d.args[QConv2DArgs.IFM_ZERO_POINT.value], + ) + self.ofm = TensorParams( + func_body, + data_layout, + requantize_op.args[RequantArgs.OFM_SCALE.value], + requantize_op.args[RequantArgs.OFM_ZERO_POINT.value], + ) + self.padding = qnn_conv2d.attrs.padding + self.strides = qnn_conv2d.attrs.strides + self.dilation = qnn_conv2d.attrs.dilation + self.activation = activation + + # If groups are equal to channel, its a depthwise_conv2d + self.groups = qnn_conv2d.attrs.groups + self.is_depthwise = False + channels_axis = {"HWIO": 3, "HWOI": 2} + if qnn_conv2d.attrs.groups == self.weights.shape[channels_axis[kernel_layout]]: + self.is_depthwise = True + + def is_valid(self) -> bool: + """ + This function checks whether QnnConv2D has compatible attributes with the NPU + """ + tensor_params = [self.weights, self.ifm, self.ofm] + if not check_valid_dtypes(tensor_params): + return False + if not check_weights(self.weights, self.dilation): + return False + if not check_bias(self.biases): + return False + if not check_strides(self.strides): + return False + if not check_batch_size(self.ifm): + return False + if not check_dilation(self.dilation): + return False + if not check_padding(self.padding, self.padding_bounds): + return False + legal_groups = [1, self.ofm.shape[3]] + if self.groups not in legal_groups: + return False + # This should be a valid QnnDepthwise2DParams, not QnnConv2DParams + return not self.is_depthwise + + +def qnn_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern: + """ + This function creates the pattern for qnn.conv2D with optional fused RELU activation. + """ + qnn_conv2d = is_op("qnn.conv2d")( + wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant() + ).has_attr({"kernel_layout": "HWIO"}) + bias_add = is_op("nn.bias_add")(qnn_conv2d, is_constant()) + req = is_op("qnn.requantize")( + qnn_conv2d | bias_add, is_constant(), is_constant(), is_constant(), is_constant() + ) + clip_or_req = req.optional(is_op("clip")) + return clip_or_req + + +@register_pattern_table("ethosu") +def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]: + return [ + ( + QnnConv2DParams.composite_name, + qnn_conv2d_pattern(), + lambda pat: QnnConv2DParams(pat).is_valid(), + ) + ] diff --git a/src/relay/backend/contrib/ethosu/preprocess.cc b/src/relay/backend/contrib/ethosu/preprocess.cc new file mode 100644 index 000000000000..ac52844091b4 --- /dev/null +++ b/src/relay/backend/contrib/ethosu/preprocess.cc @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../../../op/make_op.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosu { + +/*! + * \brief This expression rewriter will traverse the graph to find calls + * to all external functions. If they have multiple inputs and/or + * multiple outputs, the following has to be done : + * 1) If multiple inputs are present, they needed to be concat before the call. + * 2) Inside the external function they need to be split again to their original inputs. + * 3) If there are multiple outputs, they need to be concat at the end of external function. + * 4) Then, the concat output again need to be split and made the original tuple output in the + * main. + */ +class ExternalFuncIOHandler : public ExprRewriter { + public: + explicit ExternalFuncIOHandler(const IRModule& module) : module_(module) {} + int count = 0; + + Function InferType(const Function& expr, const IRModule& m) { + IRModule mod(m); + mod->Update(mod->GetGlobalVar("main"), expr); + mod = transform::InferType()(mod); + return Downcast(mod->Lookup("main")); + } + + /*! + * \brief This function will take shape and compute + * the scalar size value for it to be use to create + * flat single dimensional tensors. + */ + int64_t CalcSize(const Array& shape) { + int size = 1; + for (auto dim_size : shape) { + size = size * Downcast(dim_size)->value; + } + return size; + } + + /*! + * \brief This will take a tensor and create a flattened + * tensor to be used by the concat. + */ + Expr CreateFlattenTensor(const Expr& input) { + auto ishape = Downcast>(Downcast(input->checked_type())->shape); + int flatten_size = CalcSize(ishape); + Array output_shape = {Integer(flatten_size)}; + return MakeReshape(input, output_shape); + } + + /*! + * \brief This will take flattened tensors and create + * a single concat'd tensor. + */ + Expr CreateConcatTensor(const Array& inputs) { + auto tuple = Tuple(inputs); + return MakeConcatenate(tuple, 0); + } + + /*! + * \brief This will take a flattened concat'd tensor and use the original inputs shapes + * to recreate a Tuple of the original set of tensors. + */ + Expr CreateSplitReshapedTensors(const Expr& input, const Array& original_args) { + Array> shapes; + Array flatten_tensor_sizes; + Array split_indices; + Array rets; + + int total_size = 0; + for (auto orig_arg : original_args) { + auto shape = Downcast>(Downcast(orig_arg->checked_type())->shape); + shapes.push_back(shape); + flatten_tensor_sizes.push_back(CalcSize(shape)); + if (total_size != 0) { + split_indices.push_back(total_size); + } + total_size += CalcSize(shape); + } + auto split_outs = MakeSplit(input, split_indices, 0); + for (unsigned int i = 0; i < shapes.size(); i++) { + auto split_out = TupleGetItem(split_outs, i); + split_out->checked_type_ = original_args[i]->checked_type_; + rets.push_back(MakeReshape(split_out, shapes[i])); + } + return Tuple(rets); + } + + /*! + * \brief Modify the external function to split the input as the original compute + * as required originally. Moreover, the outputs will be flattened and concat'd + * to make a single output. Finaly, the external function should only have a single input + * and a single output. + */ + Function ModifyExternalFunction(const Function& func, const GlobalVar& gv, + const DataType& dtype) { + Array inputs; + Var ifms; + if (func->params.size() > 1) { + Array> shapes; + Array flatten_tensor_sizes; + Array split_indices; + + auto func_name = gv->name_hint; + int total_size = 0; + for (auto input : func->params) { + auto shape = Downcast>(Downcast(input->checked_type())->shape); + shapes.push_back(shape); + auto flat_size = CalcSize(shape); + flatten_tensor_sizes.push_back(flat_size); + if (total_size != 0) { + split_indices.push_back(total_size); + } + total_size += flat_size; + } + Array ifms_shape = {total_size}; + ifms = Var(func_name + "_ifms", TensorType(ifms_shape, dtype)); + auto split_outs = MakeSplit(ifms, split_indices, 0); + for (unsigned int i = 0; i < shapes.size(); i++) { + auto split_out = TupleGetItem(split_outs, i); + split_out->checked_type_ = func->params[i]->checked_type(); + inputs.push_back(MakeReshape(split_out, shapes[i])); + } + } else { + CHECK_EQ(func->params.size(), 1); + inputs.push_back(func->params[0]); + ifms = func->params[0]; + } + Map bind_map; + CHECK_EQ(func->params.size(), inputs.size()); + for (size_t i = 0; i < inputs.size(); i++) { + bind_map.Set(func->params[i], inputs[i]); + } + auto core_compute_expr = Bind(func->body, bind_map); + + // Creation of wrapper inside the external function + Array params = {ifms}; + if (func->body->IsInstance()) { + auto tuple_out = func->body.as(); + Array reshaped_outputs; + for (unsigned int i = 0; i < tuple_out->fields.size(); i++) { + auto out = Downcast(core_compute_expr)->fields[i]; + out->checked_type_ = tuple_out->fields[i]->checked_type_; + reshaped_outputs.push_back(CreateFlattenTensor(out)); + } + auto concat_out = CreateConcatTensor(reshaped_outputs); + auto f = Function(params, concat_out, concat_out->checked_type_, {}, func->attrs); + return InferType(f, this->module_); + } else { + auto f = + Function(params, core_compute_expr, core_compute_expr->checked_type_, {}, func->attrs); + return InferType(f, this->module_); + } + } + + Expr Rewrite_(const CallNode* call, const Expr& post) final { + auto post_call = Downcast(post); + + if (auto glb_var_node = post_call->op.as()) { + auto glb_var = GetRef(glb_var_node); + auto func = Downcast(module_->functions[glb_var]); + + // If the number of inputs and output are 1 --> no need to do anything + if (post_call->args.size() == 1 && !func->body->IsInstance()) { + return post; + } + if (auto compiler = func->GetAttr(attr::kCompiler)) { + if (compiler == "ethosu") { + auto ext_input = std::move(post_call->args[0]); + auto arg_dtype = Downcast(post_call->args[0]->checked_type())->dtype; + if (post_call->args.size() > 1) { + Array reshaped_inputs; + for (const auto& arg : post_call->args) { + // All arguments should be of same data type + CHECK_EQ(arg_dtype, Downcast(arg->checked_type())->dtype) + << "Currently NPU external functions require all inputs to be of same data " + "type"; + reshaped_inputs.push_back(CreateFlattenTensor(arg)); + } + ext_input = CreateConcatTensor(reshaped_inputs); + } + auto ext_func = ModifyExternalFunction(func, glb_var, arg_dtype); + Array new_args = {ext_input}; + module_->Add(glb_var, ext_func); + Expr new_call = Call(glb_var, new_args); + if (func->body->IsInstance()) { + auto orginal_tuple_out = Downcast(func->body); + new_call = CreateSplitReshapedTensors(new_call, orginal_tuple_out->fields); + } + return std::move(new_call); + } + } + } + return post; + } + + private: + IRModule module_; +}; + +IRModule PreprocessExternalFuncIO_(const IRModule& module) { + ExternalFuncIOHandler ex_func_io_handle(module); + auto func = GetRef(module->Lookup("main").as()); + auto preprocessed = PostOrderRewrite(func, &ex_func_io_handle); + module->Update(module->GetGlobalVar("main"), GetRef(preprocessed.as())); + return module; +} + +} // namespace ethosu +} // namespace contrib + +/*! + * \brief This is a pre-processing pass for all NPU external functions. + * Currently, the NPU runtime module expects a single input and a single output. + * Therefore, this pass will concat the inputs pre-call, split again inside ext. func, + * concat the output inside ext. func and re-split again after the call. + */ + +namespace transform { +Pass PreprocessExternalFuncIO() { + runtime::TypedPackedFunc pre_processed_ext_func = + [=](IRModule m, PassContext pc) { + auto _m = contrib::ethosu::PreprocessExternalFuncIO_(m); + return _m; + }; + auto preprocess_pass = + CreateModulePass(pre_processed_ext_func, 0, "PreprocessExternalFuncIO", {}); + return Sequential({preprocess_pass, InferType()}); +} + +TVM_REGISTER_GLOBAL("relay.ext.ethosu.PreprocessExternalFuncIO") + .set_body_typed(transform::PreprocessExternalFuncIO); + +} // namespace transform +} // namespace relay +} // namespace tvm diff --git a/src/relay/op/contrib/ethosu/common.cc b/src/relay/op/contrib/ethosu/common.cc new file mode 100644 index 000000000000..bdda81bc7708 --- /dev/null +++ b/src/relay/op/contrib/ethosu/common.cc @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/op/contrib/ethosu/common.cc + * \brief A set of utilities and common functionality for Arm(R) Ethos(TM)-U NPU QNN ops. + */ + +#include "common.h" + +#include "../../op_common.h" + +namespace tvm { +namespace relay { +namespace op { +namespace contrib { +namespace ethosu { + +Array EthosuInferKernelOutput(Array ifm_shape, String ifm_layout, + String ofm_layout, Array kernel_shape, + IndexExpr ofm_channels, Array dilation, + Array strides, Array padding) { + // In the case of NHCWB16, convert the ifm shape to NHW (C not required for this function) + if (ifm_layout == "NHCWB16") { + ifm_shape = {ifm_shape[0], ifm_shape[1], ifm_shape[3]}; + } + Array output_shape({ifm_shape[0], 0, 0, ofm_channels}); + + IndexExpr dilated_ksize_y = 1 + (kernel_shape[0] - 1) * dilation[0]; + IndexExpr dilated_ksize_x = 1 + (kernel_shape[1] - 1) * dilation[1]; + IndexExpr pad_h, pad_w; + GetPaddingHeightWidth(padding, &pad_h, &pad_w); + output_shape.Set(1, indexdiv(ifm_shape[1] + pad_h - dilated_ksize_y, strides[0]) + 1); + output_shape.Set(2, indexdiv(ifm_shape[2] + pad_w - dilated_ksize_x, strides[1]) + 1); + + // If the ofm is NHCWB16, convert the layout + if (ofm_layout == "NHCWB16") { + int channel_bricks = 1 + (output_shape[3].as()->value - 1) / 16; + output_shape = {output_shape[0], output_shape[1], channel_bricks, output_shape[2], 16}; + } + + return output_shape; +} + +} // namespace ethosu +} // namespace contrib +} // namespace op +} // namespace relay +} // namespace tvm diff --git a/src/relay/op/contrib/ethosu/common.h b/src/relay/op/contrib/ethosu/common.h new file mode 100644 index 000000000000..b5377e6e8bdf --- /dev/null +++ b/src/relay/op/contrib/ethosu/common.h @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/op/contrib/ethosu/common.h + * \brief Functions for all Arm(R) Ethos(TM)-U NPU operators to use. + */ + +#ifndef TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_ +#define TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_ + +#include + +namespace tvm { +namespace relay { +namespace op { +namespace contrib { +namespace ethosu { + +/*! \brief Infer the output tensor shape for convolution and pooling operators. + * \param ifm_shape The shape of Input Feature Map. + * \param ifm_layout The layout of the IFM (NHWC or NHCWB16). + * \param ofm_layout The layout of the OFM (NHWC or NHCWB16). + * \param kernel_shape Kernel shape in format (height, width). + * \param ofm_channels The number of Output Feature Map channels. + * \param dilation The 2-dimensional dilation as (dilation_height, dilation_width). + * \param strides The 2 dimensional strides as (stride_height, stride_width). + * \param padding The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right). + * \return The shape of the output tensor. + */ +Array EthosuInferKernelOutput(Array ifm_shape, String ifm_layout, + String ofm_layout, Array kernel_shape, + IndexExpr ofm_channels, Array dilation, + Array strides, Array padding); + +} // namespace ethosu +} // namespace contrib +} // namespace op +} // namespace relay +} // namespace tvm + +#endif // TVM_RELAY_OP_CONTRIB_ETHOSU_COMMON_H_ diff --git a/src/relay/op/contrib/ethosu/convolution.cc b/src/relay/op/contrib/ethosu/convolution.cc new file mode 100644 index 000000000000..bad10bf66f3a --- /dev/null +++ b/src/relay/op/contrib/ethosu/convolution.cc @@ -0,0 +1,212 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/op/contrib/ethosu/convolution.cc + * \brief Operator definitions for the Arm(R) Ethos(TM)-U NPU convolution ops. + */ +#include "../../nn/convolution.h" + +#include +#include +#include +#include +#include + +#include "../../../qnn/utils.h" +#include "common.h" + +namespace tvm { +namespace relay { +namespace op { +namespace contrib { +namespace ethosu { + +/*! \brief Attributes used by the Ethos(TM)-U NPU convolution operator */ +struct EthosuConv2DAttrs : public tvm::AttrsNode { + double ifm_scale; + int ifm_zero_point; + int weight_zero_point; + double ofm_scale; + int ofm_zero_point; + Array kernel_shape; + IndexExpr ofm_channels; + Array strides; + Array padding; + Array dilation; + String activation; + int clip_min; + int clip_max; + String upscale; + String ifm_layout; + String ofm_layout; + + TVM_DECLARE_ATTRS(EthosuConv2DAttrs, "relay.attrs.EthosuConv2DAttrs") { + TVM_ATTR_FIELD(ifm_scale).describe("The quantization scale for the Input Feature Map tensor."); + TVM_ATTR_FIELD(ifm_zero_point) + .describe("The quantization zero point for the Input Feature Map tensor."); + TVM_ATTR_FIELD(weight_zero_point) + .describe("The quantization zero point for the weight tensor."); + TVM_ATTR_FIELD(ofm_scale).describe("The quantization scale for the Output Feature Map tensor."); + TVM_ATTR_FIELD(ofm_zero_point) + .describe("The quantization zero point for the Output Feature Map tensor."); + TVM_ATTR_FIELD(kernel_shape) + .describe("The 2 dimensional kernel shape as (kernel_height, kernel_width).") + .set_default(NullValue>()); + TVM_ATTR_FIELD(ofm_channels) + .describe("The number of OFM channels.") + .set_default(NullValue()); + TVM_ATTR_FIELD(strides) + .set_default(Array({1, 1})) + .describe("The 2 dimensional strides as (stride_height, stride_width)."); + TVM_ATTR_FIELD(padding) + .set_default(Array({0, 0, 0, 0})) + .describe("The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right)."); + TVM_ATTR_FIELD(dilation) + .set_default(Array({1, 1})) + .describe("The 2 dimensional dilation as (dilation_height, dilation_width)."); + TVM_ATTR_FIELD(activation) + .describe( + "The activation function to use. " + "'NONE' - no activation function. " + "'CLIP' - clip the output between clip_min and clip_max. " + "'TANH' - tanh activation function. " + "'SIGMOID' - sigmoid activation function. " + "'LUT' - use a look-up table to perform the activation function.") + .set_default("NONE"); + TVM_ATTR_FIELD(clip_min) + .describe("The minimum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(clip_max) + .describe("The maximum clipping value if activation = 'CLIP'.") + .set_default(0); + TVM_ATTR_FIELD(upscale) + .describe( + "The 2x2 upscaling mode to apply to the Input Feature Map tensor. " + "'NONE' - no upscaling. " + "'NEAREST' - upscale using nearest neighbour. " + "'ZEROS' - upscale using zeros.") + .set_default("NONE"); + TVM_ATTR_FIELD(ifm_layout) + .set_default("NHWC") + .describe("The layout of the Input Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + TVM_ATTR_FIELD(ofm_layout) + .set_default("NHWC") + .describe("The layout of the Output Feature Map tensor. Can be 'NHWC' or 'NHCWB16'."); + } +}; + +TVM_REGISTER_NODE_TYPE(EthosuConv2DAttrs); + +bool EthosuConv2DRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 5); + const auto* ifm = types[0].as(); + const auto* weight = types[1].as(); + const auto* scale_bias = types[2].as(); + if (ifm == nullptr || weight == nullptr) return false; + const auto* param = attrs.as(); + CHECK(param != nullptr) << "EthosuConv2DAttrs cannot be nullptr."; + CHECK(ifm->dtype == DataType::UInt(8) || ifm->dtype == DataType::Int(8)) + << "Expected ethosu_conv2d type(uint8) or type(int8) for ifm but was " << ifm->dtype; + CHECK(weight->dtype == DataType::UInt(8) || weight->dtype == DataType::Int(8)) + << "Expected ethosu_conv2d type(uint8) or type(int8) for weight but was " << weight->dtype; + CHECK(scale_bias->dtype == DataType::UInt(8)) + << "Expected ethosu_conv2d type(uint8) for scale_bias but was " << scale_bias->dtype; + + // The scale_bias should be provided as a tensor of size {ofm_channels, 10} + reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8))); + + // Assign weight type {ofm_channels, kernel_height, kernel_width, ifm_channels} + reporter->Assign(types[1], TensorType({param->ofm_channels, param->kernel_shape[0], + param->kernel_shape[1], weight->shape[3]}, + weight->dtype)); + + // Assign ofm type + auto ofm_shape = + EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape, + param->ofm_channels, param->dilation, param->strides, param->padding); + reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype)); + return true; +} + +Expr MakeEthosuConv2D(Expr ifm, Expr weight, Expr scale_bias, Expr lut, double ifm_scale, + int ifm_zero_point, int weight_zero_point, double ofm_scale, + int ofm_zero_point, Array kernel_shape, IndexExpr ofm_channels, + Array strides, Array padding, Array dilation, + String activation, int clip_min, int clip_max, String upscale, + String ifm_layout, String ofm_layout) { + auto attrs = make_object(); + attrs->ifm_scale = ifm_scale; + attrs->ifm_zero_point = ifm_zero_point; + attrs->weight_zero_point = weight_zero_point; + attrs->ofm_scale = ofm_scale; + attrs->ofm_zero_point = ofm_zero_point; + attrs->kernel_shape = std::move(kernel_shape); + attrs->ofm_channels = std::move(ofm_channels); + attrs->strides = std::move(strides); + attrs->padding = std::move(padding); + attrs->dilation = std::move(dilation); + attrs->activation = std::move(activation); + attrs->clip_min = clip_min; + attrs->clip_max = clip_max; + attrs->upscale = std::move(upscale); + attrs->ifm_layout = std::move(ifm_layout); + attrs->ofm_layout = std::move(ofm_layout); + static const Op& op = Op::Get("contrib.ethosu.conv2d"); + return Call(op, {ifm, weight, scale_bias, lut}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.ethosu_conv2d").set_body_typed(MakeEthosuConv2D); + +RELAY_REGISTER_OP("contrib.ethosu.conv2d") + .describe(R"code(Arm(R) Ethos(TM)-U NPU 2D quantized convolution operator. + +This Relay operator corresponds to the hardware-implemented quantized +convolution operation found on Ethos(TM)-U NPUs. It accepts either NHWC +or NHCWB16 format for the input data (Input Feature Map, or IFM) and +OHWI format for the kernel weights. + +Reference: https://developer.arm.com/documentation/102420/0200/ + +Note that the per-channel weight scale and bias tensor must be packed together into +a combined tensor of uint80s. This is represented in TVM by a (channels, 10) tensor +of type uint8. For more detail, refer to the Technical Reference Manual linked above. + +- **ifm**: NHWC - (1, ifm_height, ifm_width, ifm_channels) + NHCWB16 - (1, ifm_height, ifm_channels // 16, ifm_width, 16) +- **weight**: (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels) +- **scale_bias**: (ofm_channels, 10) +- **ofm**: (1, ofm_height, ofm_width, ofm_channels) + +)code" TVM_ADD_FILELINE) + .set_attrs_type() + .set_num_inputs(4) + .add_argument("ifm", "Tensor", "The Input Feature Map tensor (IFM).") + .add_argument("weight", "Tensor", "The weight tensor.") + .add_argument("scale_bias", "Tensor", "The packed per-channel weight scale and bias tensor.") + .add_argument("lut", "Tensor", "The look-up table values to use if activation = 'LUT'.") + .set_support_level(11) + .add_type_rel("EthosuConv2D", EthosuConv2DRel); + +} // namespace ethosu +} // namespace contrib +} // namespace op +} // namespace relay +} // namespace tvm diff --git a/tests/python/contrib/test_ethosu/relay_ir_builder.py b/tests/python/contrib/test_ethosu/relay_ir_builder.py new file mode 100644 index 000000000000..6169a3e46520 --- /dev/null +++ b/tests/python/contrib/test_ethosu/relay_ir_builder.py @@ -0,0 +1,295 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Helper module to build relay operations for testing""" + +from pathlib import Path +import numpy as np +import math + +import tvm +from tvm import relay +from tvm.relay.op.contrib import get_pattern_table +from tvm.relay import qnn +from tvm.relay.backend.contrib.ethosu.util import get_range_for_dtype_str + + +class TensorType: + """A data structure to capture tensor parameters""" + + def __init__(self): + self.shape = None + self.dtype = None + self.zp = None + self.sc = None + self.layout = None + + def get_dim_size(self, dim): + for idx, char in enumerate(self.layout): + if dim == char: + return self.shape[idx] + return None + + def get_dim_index(self, dim): + for idx, char in enumerate(self.layout): + if dim == char: + return idx + return None + + +class QnnConv2DParams: + """A data structure to capture relay.qnn.op.conv2D parameters""" + + def __init__(self, dtype): + self.ifm = TensorType() + self.ofm = TensorType() + self.kernel = TensorType() + + # default values + self.ifm.dtype = dtype + self.ifm.layout = "NHWC" + ifm_min, ifm_max = get_range_for_dtype_str(self.ifm.dtype) + self.ifm.zp = relay.const(np.random.randint(ifm_min, ifm_max), "int32") + self.ifm.sc = relay.const(np.random.random() * 2, "float32") + self.kernel.dtype = dtype + self.kernel.layout = "HWIO" + kernel_min, kernel_max = get_range_for_dtype_str(self.kernel.dtype) + self.kernel.zp = relay.const(np.random.randint(kernel_min, kernel_max), "int32") + self.kernel.sc = relay.const(np.random.random() * 2, "float32") + self.ofm.layout = "NHWC" + self.ofm.dtype = dtype + ofm_min, ofm_max = get_range_for_dtype_str(self.ofm.dtype) + self.ofm.zp = relay.const(np.random.randint(ofm_min, ofm_max), "int32") + self.ofm.sc = relay.const(np.random.random() * 2, "float32") + self.dilation = (1, 1) + + self.strides = None + self.pad = None + self.activation = "NONE" + self.clip_min = 0 + self.clip_max = 0 + + def update_output_qnn_params( + self, input_dtype="uint8", kernel_dtype="uint8", output_dtype="uint8" + ): + _, dtype_max = get_range_for_dtype_str(input_dtype) + input_max = self.ifm.sc.data.asnumpy() * (dtype_max - self.ifm.zp.data.asnumpy()) + input_min = -self.ifm.sc.data.asnumpy() * self.ifm.zp.data.asnumpy() + _, dtype_max = get_range_for_dtype_str(kernel_dtype) + kernel_max = np.max( + self.kernel.sc.data.asnumpy() * (dtype_max - self.kernel.zp.data.asnumpy()) + ) + kernel_min = np.min(-self.kernel.sc.data.asnumpy() * self.kernel.zp.data.asnumpy()) + kernel_h = self.kernel.get_dim_size("H") + kernel_w = self.kernel.get_dim_size("W") + channels = self.kernel.get_dim_size("I") + output_limits = [ + kernel_max * kernel_h * kernel_w * channels * input_max, + kernel_min * kernel_h * kernel_w * channels * input_max, + kernel_min * kernel_h * kernel_w * channels * input_min, + kernel_max * kernel_h * kernel_w * channels * input_min, + ] + output_max = max(output_limits) + output_min = min(output_limits) + dtype_min, dtype_max = get_range_for_dtype_str(input_dtype) + self.ofm.sc = relay.const((output_max - output_min) / (dtype_max - dtype_min), "float32") + self.ofm.zp = relay.const(-int(output_min / self.ofm.sc.data.asnumpy()), "int32") + + +class PoolingParams: + """A data structure to capture relay.op.max_pool2d / + relay.op.avg_pool2d parameters + """ + + def __init__(self, dtype): + self.type = None + self.size = None + self.strides = None + self.pad = None + self.layout = None + self.ifm = TensorType() + self.ofm = TensorType() + + # default values + self.ifm.dtype = dtype + self.ifm.layout = "NHWC" + self.ifm.zp = relay.const(np.random.randint(0, 255), "int32") + self.ifm.sc = relay.const(np.random.random() * 2, "float32") + self.ofm.zp = relay.const(np.random.randint(0, 255), "int32") + self.ofm.sc = relay.const(np.random.random() * 2, "float32") + self.ofm.dtype = dtype + self.dilation = (1, 1) + + +class AddParams: + """A data structure to capture relay.qnn.op.add parameters""" + + def __init__(self, dtype): + self.ifm0 = TensorType() + self.ifm1 = TensorType() + self.ofm = TensorType() + + # default values + self.ifm0.dtype = dtype + self.ifm0.zp = relay.const(np.random.randint(0, 255), "int32") + self.ifm0.sc = relay.const(np.random.random() * 2, "float32") + self.ifm1.dtype = dtype + self.ifm1.zp = relay.const(np.random.randint(0, 255), "int32") + self.ifm1.sc = relay.const(np.random.random() * 2, "float32") + self.update_output_qnn_params() + self.ofm.dtype = dtype + + def update_output_qnn_params(self): + ti = np.iinfo(self.ifm0.dtype) + dtype_min, dtype_max = int(ti.min), int(ti.max) + input1_max = self.ifm0.sc.data.asnumpy() * (dtype_max - self.ifm0.zp.data.asnumpy()) + input1_min = (dtype_min - self.ifm0.sc.data.asnumpy()) * self.ifm0.zp.data.asnumpy() + input2_max = self.ifm1.sc.data.asnumpy() * (dtype_max - self.ifm1.zp.data.asnumpy()) + input2_min = (dtype_min - self.ifm1.sc.data.asnumpy()) * self.ifm1.zp.data.asnumpy() + output_max = input1_max + input2_max + output_min = input1_min + input2_min + self.ofm.sc = relay.const((output_max - output_min) / dtype_max, "float32") + self.ofm.zp = relay.const( + (dtype_min - int(output_min / self.ofm.sc.data.asnumpy())), "int32" + ) + + +def get_pad_value(data, kernel, stride): + """Get the pad tuple of value for SAME padding""" + + out = int(math.ceil(float(data) / float(stride))) + pad = max(0, (out - 1) * stride + kernel - data) + pad_before = pad // 2 + pad_after = pad - pad_before + return pad_before, pad_after + + +def create_qnn_conv2d(qnn_conv2d_params, ifm_expr): + """Create a relay.Expr of relay.qnn.conv2D given the parameters""" + v_params = list() + params = { + "kernel_size": [ + qnn_conv2d_params.kernel.get_dim_size("H"), + qnn_conv2d_params.kernel.get_dim_size("W"), + ], + "strides": [qnn_conv2d_params.strides[0], qnn_conv2d_params.strides[1]], + "dilation": [qnn_conv2d_params.dilation[0], qnn_conv2d_params.dilation[1]], + "padding": [0, 0, 0, 0], + "data_layout": qnn_conv2d_params.ifm.layout, + } + dilated_kernel_h = ( + qnn_conv2d_params.dilation[0] * (qnn_conv2d_params.kernel.get_dim_size("H") - 1) + 1 + ) + dilated_kernel_w = ( + qnn_conv2d_params.dilation[1] * (qnn_conv2d_params.kernel.get_dim_size("W") - 1) + 1 + ) + if qnn_conv2d_params.pad == "SAME": + pad_top, pad_bottom = get_pad_value( + qnn_conv2d_params.ifm.get_dim_size("H"), dilated_kernel_h, qnn_conv2d_params.strides[0] + ) + pad_left, pad_right = get_pad_value( + qnn_conv2d_params.ifm.get_dim_size("W"), dilated_kernel_w, qnn_conv2d_params.strides[1] + ) + do_pad = not (pad_top == 0 and pad_bottom == 0 and pad_left == 0 and pad_right == 0) + if do_pad: + params["padding"] = [pad_top, pad_left, pad_bottom, pad_right] + qnn_conv2d_params.pad = params["padding"] + params["input_zero_point"] = qnn_conv2d_params.ifm.zp + params["kernel_zero_point"] = qnn_conv2d_params.kernel.zp + params["out_dtype"] = "int32" + params["input_scale"] = qnn_conv2d_params.ifm.sc + params["kernel_scale"] = qnn_conv2d_params.kernel.sc + params["channels"] = int(qnn_conv2d_params.kernel.get_dim_size("O")) + params["kernel_layout"] = qnn_conv2d_params.kernel.layout + k_shape = qnn_conv2d_params.kernel.shape + k_dtype = qnn_conv2d_params.kernel.dtype + w = tvm.nd.array( + np.random.randint( + np.iinfo(k_dtype).min, high=np.iinfo(k_dtype).max, size=k_shape, dtype=k_dtype + ) + ) + weight_expr = relay.const(w, k_dtype) + v_params.append(w) + qnn_conv2d_expr = qnn.op.conv2d(ifm_expr, weight_expr, **params) + b = tvm.nd.array( + np.random.randint( + 0, high=10, size=(qnn_conv2d_params.kernel.get_dim_size("O")), dtype="int32" + ) + ) + v_params.append(b) + bias_expr = relay.const(b, "int32") + bias = relay.nn.bias_add( + qnn_conv2d_expr, bias_expr, axis=qnn_conv2d_params.ifm.get_dim_index("C") + ) + bias_scale = relay.const( + qnn_conv2d_params.ifm.sc.data.asnumpy() * qnn_conv2d_params.kernel.sc.data.asnumpy(), + "float32", + ) + req_expr = relay.qnn.op.requantize( + bias, + bias_scale, # input zero scale + relay.const(0, "int32"), # input zero point + qnn_conv2d_params.ofm.sc, # output zero scale + qnn_conv2d_params.ofm.zp, # output zero point + out_dtype=qnn_conv2d_params.ofm.dtype, + ) + if qnn_conv2d_params.activation != "NONE": + assert qnn_conv2d_params.activation == "CLIP" + clip_expr = relay.clip(req_expr, qnn_conv2d_params.clip_min, qnn_conv2d_params.clip_max) + return clip_expr, v_params + + return req_expr, v_params + + +def create_pool2d(pooling_params, ifm_expr): + """Create a relay pooling operation""" + assert pooling_params.ifm.layout == "NHWC" + params = { + "pool_size": (pooling_params.size[0], pooling_params.size[1]), + "strides": (pooling_params.strides[0], pooling_params.strides[1]), + "padding": [0, 0], + "layout": "NHWC", + } + if pooling_params.pad == "SAME": + pad_top, pad_bottom = get_pad_value( + pooling_params.ifm.shape[1], pooling_params.size[0], pooling_params.strides[0] + ) + pad_left, pad_right = get_pad_value( + pooling_params.ifm.shape[2], pooling_params.size[1], pooling_params.strides[1] + ) + params["padding"] = [pad_top, pad_left, pad_bottom, pad_right] + if pooling_params.type == "MAX": + out = relay.op.nn.max_pool2d(ifm_expr, **params) + else: + assert pooling_params.type == "AVG" + out = relay.op.cast(ifm_expr, dtype="int32") + out = relay.op.nn.avg_pool2d(out, **params) + out = relay.op.cast(out, dtype=pooling_params.ofm.dtype) + return out + + +def create_qnn_add(ifm0_expr, ifm1_expr, add_params): + add = relay.qnn.op.add( + lhs=ifm0_expr, + rhs=ifm1_expr, + lhs_scale=add_params.ifm0.sc, + lhs_zero_point=add_params.ifm0.zp, + rhs_scale=add_params.ifm1.sc, + rhs_zero_point=add_params.ifm1.zp, + output_scale=add_params.ofm.sc, + output_zero_point=add_params.ofm.zp, + ) + return add diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py new file mode 100644 index 000000000000..52f6995c3aaa --- /dev/null +++ b/tests/python/contrib/test_ethosu/test_legalize.py @@ -0,0 +1,337 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument + +import pytest + +pytest.importorskip("ethosu.vela") +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.backend.contrib import ethosu +from tvm.relay.backend.contrib.ethosu import legalize, preprocess +from tvm.relay.dataflow_pattern import * +from tvm.relay.op.contrib.ethosu import * +import relay_ir_builder + + +def test_split_indices_legalize(): + def create_graph(axis): + x = relay.var("x", shape=(1, 50, 50, 3)) + x_relu = relay.nn.relu(x) + split_output = relay.split(x_relu, [5, 20, 45], axis).tuple_value + return relay.Function([x], split_output) + + def expected_mod_axis1(): + expected_ir_string = """ + #[version = "0.0.5"] + def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 5, 50, 3), float32],\ + Tensor[(1, 15, 50, 3), float32],\ + Tensor[(1, 25, 50, 3), float32],\ + Tensor[(1, 5, 50, 3), float32]) { + %0 = nn.relu(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */; + %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 5, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 5, 50, 3), float32] */; + %2 = strided_slice(%0, begin=[0, 5, 0, 0], end=[1, 20, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 15, 50, 3), float32] */; + %3 = strided_slice(%0, begin=[0, 20, 0, 0], end=[1, 45, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 25, 50, 3), float32] */; + %4 = strided_slice(%0, begin=[0, 45, 0, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 5, 50, 3), float32] */; + (%1, %2, %3, %4) + } + """ + return tvm.parser.fromtext(expected_ir_string) + + def expected_mod_axis2(): + expected_ir_string = """ + #[version = "0.0.5"] + def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 5, 3), float32],\ + Tensor[(1, 50, 15, 3), float32],\ + Tensor[(1, 50, 25, 3), float32],\ + Tensor[(1, 50, 5, 3), float32]) { + %0 = nn.relu(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */; + %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 50, 5, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 5, 3), float32] */; + %2 = strided_slice(%0, begin=[0, 0, 5, 0], end=[1, 50, 20, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 15, 3), float32] */; + %3 = strided_slice(%0, begin=[0, 0, 20, 0], end=[1, 50, 45, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 25, 3), float32] */; + %4 = strided_slice(%0, begin=[0, 0, 45, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 5, 3), float32] */; + (%1, %2, %3, %4) + } + """ + return tvm.parser.fromtext(expected_ir_string) + + mod_axis1 = tvm.IRModule() + mod_axis1["tvmgen_default_ethosu_main_0"] = create_graph(1) + mod_axis1 = legalize.LegalizeSplit()(mod_axis1) + expected_axis1 = expected_mod_axis1() + tvm.ir.assert_structural_equal(mod_axis1, expected_axis1) + + mod_axis2 = tvm.IRModule() + mod_axis2["tvmgen_default_ethosu_main_0"] = create_graph(2) + mod_axis2 = legalize.LegalizeSplit()(mod_axis2) + expected_axis2 = expected_mod_axis2() + tvm.ir.assert_structural_equal(mod_axis2, expected_axis2) + + +def test_split_sections_legalize(): + def create_graph(axis, sections): + x = relay.var("x", shape=(1, 50, 50, 3)) + x_abs = relay.abs(x) + split_output = relay.split(x_abs, sections, axis).tuple_value + outputs = list() + for section_idx in range(sections): + split_single_out = relay.TupleGetItem(split_output, section_idx) + tanh = relay.tanh(split_single_out) + outputs.append(tanh) + tuple_out = relay.Tuple(outputs) + return relay.Function([x], tuple_out) + + def expected_mod_axis1(): + expected_ir_string = """ + #[version = "0.0.5"] + def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 10, 50, 3), float32],\ + Tensor[(1, 10, 50, 3), float32],\ + Tensor[(1, 10, 50, 3), float32],\ + Tensor[(1, 10, 50, 3), float32],\ + Tensor[(1, 10, 50, 3), float32]) { + %0 = abs(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */; + %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 10, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 10, 50, 3), float32] */; + %2 = strided_slice(%0, begin=[0, 10, 0, 0], end=[1, 20, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 10, 50, 3), float32] */; + %3 = strided_slice(%0, begin=[0, 20, 0, 0], end=[1, 30, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 10, 50, 3), float32] */; + %4 = strided_slice(%0, begin=[0, 30, 0, 0], end=[1, 40, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 10, 50, 3), float32] */; + %5 = strided_slice(%0, begin=[0, 40, 0, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 10, 50, 3), float32] */; + %6 = (%1, %2, %3, %4, %5); + %7 = %6.0; + %8 = tanh(%7) /* ty=Tensor[(1, 10, 50, 3), float32] */; + %9 = %6.1; + %10 = tanh(%9) /* ty=Tensor[(1, 10, 50, 3), float32] */; + %11 = %6.2; + %12 = tanh(%11) /* ty=Tensor[(1, 10, 50, 3), float32] */; + %13 = %6.3; + %14 = tanh(%13) /* ty=Tensor[(1, 10, 50, 3), float32] */; + %15 = %6.4; + %16 = tanh(%15) /* ty=Tensor[(1, 10, 50, 3), float32] */; + (%8, %10, %12, %14, %16) + } + """ + return tvm.parser.fromtext(expected_ir_string) + + def expected_mod_axis2(): + expected_ir_string = """ + #[version = "0.0.5"] + def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 10, 3), float32],\ + Tensor[(1, 50, 10, 3), float32],\ + Tensor[(1, 50, 10, 3), float32],\ + Tensor[(1, 50, 10, 3), float32],\ + Tensor[(1, 50, 10, 3), float32]) { + %0 = abs(%x) /* ty=Tensor[(1, 50, 50, 3), float32] */; + %1 = strided_slice(%0, begin=[0, 0, 0, 0], end=[1, 50, 10, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 10, 3), float32] */; + %2 = strided_slice(%0, begin=[0, 0, 10, 0], end=[1, 50, 20, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 10, 3), float32] */; + %3 = strided_slice(%0, begin=[0, 0, 20, 0], end=[1, 50, 30, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 10, 3), float32] */; + %4 = strided_slice(%0, begin=[0, 0, 30, 0], end=[1, 50, 40, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 10, 3), float32] */; + %5 = strided_slice(%0, begin=[0, 0, 40, 0], end=[1, 50, 50, 3], strides=[1], axes=None)\ + /* ty=Tensor[(1, 50, 10, 3), float32] */; + %6 = (%1, %2, %3, %4, %5); + %7 = %6.0; + %8 = tanh(%7) /* ty=Tensor[(1, 50, 10, 3), float32] */; + %9 = %6.1; + %10 = tanh(%9) /* ty=Tensor[(1, 50, 10, 3), float32] */; + %11 = %6.2; + %12 = tanh(%11) /* ty=Tensor[(1, 50, 10, 3), float32] */; + %13 = %6.3; + %14 = tanh(%13) /* ty=Tensor[(1, 50, 10, 3), float32] */; + %15 = %6.4; + %16 = tanh(%15) /* ty=Tensor[(1, 50, 10, 3), float32] */; + (%8, %10, %12, %14, %16) + } + """ + return tvm.parser.fromtext(expected_ir_string) + + mod_axis1 = tvm.IRModule() + mod_axis1["tvmgen_default_ethosu_main_0"] = create_graph(1, 5) + mod_axis1 = legalize.LegalizeSplit()(mod_axis1) + expected_axis1 = expected_mod_axis1() + tvm.ir.assert_structural_equal(mod_axis1, expected_axis1) + + mod_axis2 = tvm.IRModule() + mod_axis2["tvmgen_default_ethosu_main_0"] = create_graph(2, 5) + mod_axis2 = legalize.LegalizeSplit()(mod_axis2) + expected_axis2 = expected_mod_axis2() + tvm.ir.assert_structural_equal(mod_axis2, expected_axis2) + + +def infer_type_function_pass(func): + mod = tvm.IRModule() + mod["test"] = func + mod = relay.transform.InferType()(mod) + return mod["test"] + + +def get_shape_expr(in_expr, out_expr): + main_f = relay.Function([in_expr], out_expr) + main_f = infer_type_function_pass(main_f) + shape = [int(i) for i in main_f.body.checked_type.shape] + return shape + + +INVERSE_LAYOUT_TRANSFORM_OHWI_MAP = { + "HWIO": [1, 2, 3, 0], + "HWOI": [1, 2, 0, 3], + "OWHI": [0, 1, 2, 3], +} + + +def test_ethosu_conv2d_legalize(): + def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): + c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) + c1_params.ifm.shape = input_tensor_shape + c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) + c1_params.strides = (1, 1) + c1_params.pad = "VALID" + c1_params.activation = "CLIP" + c1_params.clip_min = 23 + c1_params.clip_max = 180 + input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) + c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) + c1_params.ofm.shape = get_shape_expr(input0, c1) + + f = relay.Function([input0], c1) + mod = tvm.IRModule() + mod["main"] = f + return mod, [c1_params] + + def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype): + c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) + c1_params.ifm.shape = input_tensor_shape + c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) + c1_params.strides = (2, 2) + c1_params.pad = "VALID" + c1_params.activation = "CLIP" + c1_params.clip_min = 10 + c1_params.clip_max = 240 + input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) + c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) + c1_params.ofm.shape = get_shape_expr(input0, c1) + + c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) + c2_params.ifm.shape = c1_params.ofm.shape + c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) + c2_params.strides = (1, 1) + c2_params.pad = "SAME" + c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) + c2_params.ofm.shape = get_shape_expr(input0, c2) + + f = relay.Function([input0], c2) + mod = tvm.IRModule() + mod["main"] = f + return mod, [c2_params, c1_params] + + def verify_tensor(tensor_type, expr): + assert list(tensor_type.shape) == list(expr.checked_type.shape) + assert str(tensor_type.dtype) == str(expr.checked_type.dtype) + + def verify_linear(ext_func, conv2d_params): + op = ext_func.body + for param in conv2d_params: + verify_tensor(param.ifm, op.args[0]) + verify_tensor(param.ofm, op) + + # This will be in OHWI layout + weights_ohwi = op.args[1].data.asnumpy() + weights_layout = str(param.kernel.layout) + weights = np.transpose(weights_ohwi, INVERSE_LAYOUT_TRANSFORM_OHWI_MAP[weights_layout]) + assert weights.shape == param.kernel.shape + assert weights.dtype == param.kernel.dtype + + assert list(op.args[2].checked_type.shape)[0] == weights_ohwi.shape[0] + + assert float(op.attrs.ifm_scale) == float(param.ifm.sc.data.asnumpy()) + assert int(op.attrs.ifm_zero_point) == int(param.ifm.zp.data.asnumpy()) + assert int(op.attrs.weight_zero_point) == int(param.kernel.zp.data.asnumpy()) + assert float(op.attrs.ofm_scale) == float(param.ofm.sc.data.asnumpy()) + assert int(op.attrs.ofm_zero_point) == int(param.ofm.zp.data.asnumpy()) + assert int(op.attrs.ofm_channels) == int(weights_ohwi.shape[0]) + assert list(op.attrs.padding) == list(param.pad) + assert list(op.attrs.strides) == list(param.strides) + assert list(op.attrs.dilation) == list(param.dilation) + assert str(op.attrs.activation) == str(param.activation) + assert int(op.attrs.clip_min) == int(param.clip_min) + assert int(op.attrs.clip_max) == int(param.clip_max) + op = op.args[0] + + test_cases = [ + (create_graph_single, ["input", (1, 299, 299, 3), "uint8"]), + (create_graph_double, ["input", (1, 128, 256, 4), "uint8"]), + ] + for test_case in test_cases: + mod, conv_params = test_case[0](*test_case[1]) + mod = ethosu.partition_for_ethosu(mod) + mod = legalize.LegalizeEthosUConv2D()(mod) + verify_linear(mod["tvmgen_default_ethosu_main_0"], conv_params) + + +def test_ethosu_conv2d_legalize_errors(): + def create_graph_single_unsupported_ifm_layout( + input_tensor_name, input_tensor_shape, input_tensor_dtype + ): + c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) + c1_params.ifm.shape = input_tensor_shape + c1_params.ifm.layout = "NCHW" + c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[1], 32) + c1_params.strides = (1, 1) + c1_params.pad = "VALID" + c1_params.activation = "CLIP" + c1_params.clip_min = 23 + c1_params.clip_max = 180 + input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) + c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) + c1_params.ofm.shape = get_shape_expr(input0, c1) + + f = relay.Function([input0], c1) + mod = tvm.IRModule() + mod["main"] = f + return mod, [c1_params] + + test_cases = [ + (create_graph_single_unsupported_ifm_layout, ["input", (1, 3, 299, 299), "uint8"]), + ] + + for test_case in test_cases: + mod, conv_params = test_case[0](*test_case[1]) + mod = ethosu.partition_for_ethosu(mod) + with pytest.raises( + tvm._ffi.base.TVMError, match="EthosUCodegenError: Unsupported Layout NCHW" + ): + mod = legalize.LegalizeEthosUConv2D()(mod) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/python/contrib/test_ethosu/test_preprocess.py b/tests/python/contrib/test_ethosu/test_preprocess.py new file mode 100644 index 000000000000..f2c7b0afafd8 --- /dev/null +++ b/tests/python/contrib/test_ethosu/test_preprocess.py @@ -0,0 +1,343 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument + +import pytest + +pytest.importorskip("ethosu.vela") +import numpy as np + +import tvm +from tvm import relay +from tvm.relay.backend.contrib.ethosu import preprocess + + +def set_func_attr(func, compile_name, symbol_name): + """ + Helper function to attach attributes to the external function. + """ + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", compile_name) + func = func.with_attr("global_symbol", symbol_name) + return func + + +def test_single_io(): + """ + This test will test the pass wont touch external functions that + have a single input and a single output. + """ + + def create_graph(): + def create_external_func1(mod_, compiler_name, symbol_name): + x_int = relay.var("x_int", shape=(10, 10)) + z0 = relay.nn.relu(x_int) + f1 = relay.Function([x_int], z0) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + x = relay.var("x", shape=(10, 10)) + + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + r = relay.Call(glb_symbol_f1, [x]) + main = relay.Function([x], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + mod = create_graph() + exp = create_graph() + mod = preprocess.preprocess_ext_io()(mod) + assert tvm.ir.structural_equal(mod, exp, map_free_vars=True) + + +def test_2ins_single_out(): + """ + The test is check two inputs and a single output of external function + """ + + def create_graph(): + def create_external_func1(mod_, compiler_name, symbol_name): + x_int = relay.var("x_int", shape=(10, 10)) + w0_int = relay.var("w0_int", shape=(10, 10)) + z0 = relay.add(x_int, w0_int) + + f1 = relay.Function([x_int, w0_int], z0) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + + x = relay.var("x", shape=(10, 10)) + w0 = relay.var("w0", shape=(10, 10)) + + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + r = relay.Call(glb_symbol_f1, [x, w0]) + main = relay.Function([x, w0], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + def expected(): + def create_external_func1(mod_, compiler_name, symbol_name): + ifms_int = relay.var("ifms_int", shape=[200]) + + # splits + (x_int_flat, w0_int_flat) = relay.split(ifms_int, [100]) + # reshapes + x_int = relay.reshape(x_int_flat, newshape=(10, 10)) + w0_int = relay.reshape(w0_int_flat, newshape=(10, 10)) + + z0 = relay.add(x_int, w0_int) + f1 = relay.Function([ifms_int], z0) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + + x = relay.var("x", shape=(10, 10)) + w0 = relay.var("w0", shape=(10, 10)) + + # reshapes + x_reshaped = relay.reshape(x, newshape=100) + w0_reshaped = relay.reshape(w0, newshape=100) + + # concat + ifms = relay.concatenate((x_reshaped, w0_reshaped), 0) + + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + r = relay.Call(glb_symbol_f1, [ifms]) + main = relay.Function([x, w0], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + mod = create_graph() + exp = expected() + mod = preprocess.preprocess_ext_io()(mod) + assert tvm.ir.structural_equal(mod, exp, map_free_vars=True) + + +def test_single_in_2outs(): + """ + The test is to check a single input and two outputs of external function + """ + + def create_graph(): + def create_external_func1(mod_, compiler_name, symbol_name): + x_int = relay.var("x_int", shape=(10, 10)) + + p0 = relay.nn.relu(x_int) + q0 = relay.tanh(x_int) + f1_o_tuple = relay.Tuple([p0, q0]) + + f1 = relay.Function([x_int], f1_o_tuple) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + x = relay.var("x", shape=(10, 10)) + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + pq_tuple = relay.Call(glb_symbol_f1, [x]) + p0 = relay.TupleGetItem(pq_tuple, 0) + q0 = relay.TupleGetItem(pq_tuple, 1) + r = relay.concatenate((p0, q0), axis=0) + main = relay.Function([x], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + def expected(): + def create_external_func1(mod_, compiler_name, symbol_name): + x_int = relay.var("x_int", shape=(10, 10)) + + p0 = relay.nn.relu(x_int) + q0 = relay.tanh(x_int) + + # reshapes + p0_reshaped = relay.reshape(p0, newshape=100) + q0_reshaped = relay.reshape(q0, newshape=100) + ofms = relay.concatenate((p0_reshaped, q0_reshaped), 0) + + f1 = relay.Function([x_int], ofms) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + x = relay.var("x", shape=(10, 10)) + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + ofms = relay.Call(glb_symbol_f1, [x]) + + # splits + (p0_flat, q0_flat) = relay.split(ofms, [100]) + # reshapes + p0_flat_reshaped = relay.reshape(p0_flat, newshape=(10, 10)) + q0_flat_reshaped = relay.reshape(q0_flat, newshape=(10, 10)) + # original output + tuple_out = relay.Tuple([p0_flat_reshaped, q0_flat_reshaped]) + + p0 = relay.TupleGetItem(tuple_out, 0) + q0 = relay.TupleGetItem(tuple_out, 1) + r = relay.concatenate((p0, q0), axis=0) + main = relay.Function([x], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + mod = create_graph() + exp = expected() + mod = relay.transform.InferType()(mod) + mod = preprocess.preprocess_ext_io()(mod) + assert tvm.ir.structural_equal(mod, exp, map_free_vars=True) + + +def test_4ins_2outs(): + """ + The test is to check a 4 inputs and two outputs of external function. + This just stand as a general test for multiple ins/outs. + """ + + def create_graph(): + def create_external_func1(mod_, compiler_name, symbol_name): + x_int = relay.var("x_int", shape=(10, 10)) + w0_int = relay.var("w0_int", shape=(10, 10)) + w1_int = relay.var("w1_int", shape=(10, 10)) + w2_int = relay.var("w2_int", shape=(10, 10)) + + z0 = relay.add(x_int, w0_int) + p0 = relay.subtract(z0, w1_int) + q0 = relay.multiply(z0, w2_int) + f1_o_tuple = relay.Tuple([p0, q0]) + + f1 = relay.Function([x_int, w0_int, w1_int, w2_int], f1_o_tuple) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + + x = relay.var("x", shape=(10, 10)) + w0 = relay.var("w0", shape=(10, 10)) + w1 = relay.var("w1", shape=(10, 10)) + w2 = relay.var("w2", shape=(10, 10)) + + glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + pq_tuple = relay.Call(glb_symbol_f1, [x, w0, w1, w2]) + + p0 = relay.TupleGetItem(pq_tuple, 0) + q0 = relay.TupleGetItem(pq_tuple, 1) + r = relay.concatenate((p0, q0), axis=0) + main = relay.Function([x, w0, w1, w2], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + def expected(): + def create_external_func1(mod_, compiler_name, symbol_name): + ifms_int = relay.var("ifms_int", shape=[400]) + + # splits + (x_int_flat, w0_int_flat, w1_int_flat, w2_int_flat) = relay.split( + ifms_int, [100, 200, 300] + ) + # reshapes + x_int = relay.reshape(x_int_flat, newshape=(10, 10)) + w0_int = relay.reshape(w0_int_flat, newshape=(10, 10)) + w1_int = relay.reshape(w1_int_flat, newshape=(10, 10)) + w2_int = relay.reshape(w2_int_flat, newshape=(10, 10)) + + z0 = relay.add(x_int, w0_int) + p0 = relay.subtract(z0, w1_int) + q0 = relay.multiply(z0, w2_int) + # f1_o_tuple = relay.Tuple([p0, q0]) + + # reshapes + p0_reshaped = relay.reshape(p0, newshape=100) + q0_reshaped = relay.reshape(q0, newshape=100) + ofms = relay.concatenate((p0_reshaped, q0_reshaped), 0) + + f1 = relay.Function([ifms_int], ofms) + f1 = set_func_attr(f1, compiler_name, symbol_name) + glb_f1 = relay.GlobalVar(symbol_name) + mod_[glb_f1] = f1 + mod_ = relay.transform.InferType()(mod_) + return glb_f1, mod_ + + mod = tvm.IRModule() + + x = relay.var("x", shape=(10, 10)) + w0 = relay.var("w0", shape=(10, 10)) + w1 = relay.var("w1", shape=(10, 10)) + w2 = relay.var("w2", shape=(10, 10)) + + # reshapes + x_reshaped = relay.reshape(x, newshape=100) + w0_reshaped = relay.reshape(w0, newshape=100) + w1_reshaped = relay.reshape(w1, newshape=100) + w2_reshaped = relay.reshape(w2, newshape=100) + + # concat + ifms = relay.concatenate((x_reshaped, w0_reshaped, w1_reshaped, w2_reshaped), 0) + + # call + glb_func, mod = create_external_func1(mod, "ethosu", "ethosu_0") + ofms = relay.Call(glb_func, [ifms]) + + # splits + (p0_flat, q0_flat) = relay.split(ofms, [100]) + # reshapes + p0_flat_reshaped = relay.reshape(p0_flat, newshape=(10, 10)) + q0_flat_reshaped = relay.reshape(q0_flat, newshape=(10, 10)) + # original output + tuple_out = relay.Tuple([p0_flat_reshaped, q0_flat_reshaped]) + + p0 = relay.TupleGetItem(tuple_out, 0) + q0 = relay.TupleGetItem(tuple_out, 1) + + r = relay.concatenate((p0, q0), axis=0) + main = relay.Function([x, w0, w1, w2], r) + mod["main"] = main + mod = relay.transform.InferType()(mod) + return mod + + mod = create_graph() + exp = expected() + mod = preprocess.preprocess_ext_io()(mod) + assert tvm.ir.structural_equal(mod, exp, map_free_vars=True) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/python/contrib/test_ethosu/test_vela_api.py b/tests/python/contrib/test_ethosu/test_vela_api.py new file mode 100644 index 000000000000..d9b22d10e9e2 --- /dev/null +++ b/tests/python/contrib/test_ethosu/test_vela_api.py @@ -0,0 +1,455 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +import pytest + +pytest.importorskip("ethosu.vela") +import numpy as np +from ethosu.vela import api as vapi +from unittest.mock import patch + +import tvm +from tvm import tir +from tvm.script import ty +from tvm.relay.backend.contrib.ethosu import vela_api + +ACCEL_TYPES = [ + vapi.NpuAccelerator.Ethos_U55_256, + vapi.NpuAccelerator.Ethos_U55_128, + vapi.NpuAccelerator.Ethos_U55_64, + vapi.NpuAccelerator.Ethos_U55_32, +] + + +"""Test case 1""" + + +@tvm.script.tir +class Module1: + def main( + placeholder: ty.handle, + placeholder_1: ty.handle, + placeholder_2: ty.handle, + ethosu_conv2d: ty.handle, + ) -> None: + # function attr dict + tir.func_attr({"global_symbol": "main", "tir.noalias": True}) + placeholder_3 = tir.match_buffer( + placeholder, [1, 8, 8, 3], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + placeholder_4 = tir.match_buffer( + placeholder_1, [48], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + placeholder_5 = tir.match_buffer( + placeholder_2, [16], dtype="int32", elem_offset=0, align=128, offset_factor=1 + ) + ethosu_conv2d_1 = tir.match_buffer( + ethosu_conv2d, [1, 8, 8, 16], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + # body + tir.evaluate( + tir.call_extern( + "ethosu_conv2d", + "uint8", + 8, + 8, + 3, + 8, + 0, + 8, + tir.load("uint8", placeholder_3.data, 0), + 0, + 0, + 0, + tir.float32(0.5), + 10, + "NHWC", + 24, + 3, + 1, + "uint8", + 8, + 8, + 16, + 8, + 0, + 8, + tir.load("uint8", ethosu_conv2d_1.data, 0), + 0, + 0, + 0, + tir.float32(0.25), + 14, + "NHWC", + 128, + 16, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + tir.load("uint8", placeholder_4.data, 0), + 0, + 12, + tir.load("uint8", placeholder_5.data, 0), + 0, + 0, + 0, + 0, + 0, + "CLIP", + 0, + 0, + "NONE", + dtype="uint8", + ) + ) + + __tvm_meta__ = None + + +"""Test case 2 with per-channel quantization""" + + +@tvm.script.tir +class Module2: + def main( + placeholder: ty.handle, + placeholder_1: ty.handle, + placeholder_2: ty.handle, + placeholder_6: ty.handle, + ethosu_conv2d: ty.handle, + ) -> None: + # function attr dict + tir.func_attr({"global_symbol": "main", "tir.noalias": True}) + placeholder_3 = tir.match_buffer( + placeholder, [1, 8, 8, 3], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + placeholder_4 = tir.match_buffer( + placeholder_1, [16, 1, 1, 3], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + placeholder_5 = tir.match_buffer( + placeholder_2, [16], dtype="int32", elem_offset=0, align=128, offset_factor=1 + ) + # Per-channel weight scales + placeholder_7 = tir.match_buffer( + placeholder_6, [16], dtype="float32", elem_offset=0, align=128, offset_factor=1 + ) + ethosu_conv2d_1 = tir.match_buffer( + ethosu_conv2d, [1, 8, 8, 16], dtype="uint8", elem_offset=0, align=128, offset_factor=1 + ) + # body + tir.evaluate( + tir.call_extern( + "ethosu_conv2d", + "uint8", + 8, + 8, + 3, + 8, + 0, + 8, + tir.load("uint8", placeholder_3.data, 0), + 0, + 0, + 0, + tir.float32(0.5), + 10, + "NHWC", + 24, + 3, + 1, + "uint8", + 8, + 8, + 16, + 8, + 0, + 8, + tir.load("uint8", ethosu_conv2d_1.data, 0), + 0, + 0, + 0, + tir.float32(0.25), + 14, + "NHWC", + 128, + 16, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + tir.load("uint8", placeholder_4.data, 0), + 0, + 12, + tir.load("uint8", placeholder_5.data, 0), + 0, + 0, + 0, + 0, + 0, + "CLIP", + 0, + 0, + "NONE", + dtype="uint8", + ) + ) + + __tvm_meta__ = None + + +def test_get_optimal_block_config(): + block_configs_cases = [ + { + "test": [ + vapi.NpuShape3D(10, 20, 8), + vapi.NpuShape3D(10, 30, 16), + vapi.NpuShape3D(10, 40, 32), + ], + "ref": vapi.NpuShape3D(10, 40, 32), + }, + { + "test": [ + vapi.NpuShape3D(10, 20, 8), + vapi.NpuShape3D(10, 50, 32), + vapi.NpuShape3D(10, 40, 32), + ], + "ref": vapi.NpuShape3D(10, 50, 32), + }, + { + "test": [ + vapi.NpuShape3D(50, 50, 8), + vapi.NpuShape3D(10, 30, 32), + vapi.NpuShape3D(8, 8, 64), + ], + "ref": vapi.NpuShape3D(8, 8, 64), + }, + ] + + for test_case in block_configs_cases: + assert vela_api._get_optimal_block_config(test_case["test"]) == test_case["ref"] + + +def test_compress_weights(): + test_vecs = [ + { + # Stimulus + "accel": vapi.NpuAccelerator.Ethos_U55_256, + "block_depth": 8, + "ifm_dtype": np.uint8, + "shape": (3, 3, 16, 64), + "layout": "HWIO", + "zero_point": np.int64(134), + "dilation": (1, 1), + "is_depthwise": False, + # Reference outputs + "block_traversal": vapi.NpuBlockTraversal.PART_KERNEL_FIRST, + }, + { + # Stimulus + "accel": vapi.NpuAccelerator.Ethos_U55_256, + "block_depth": 8, + "ifm_dtype": np.uint8, + "shape": (3, 3, 32, 64), + "layout": "HWIO", + "zero_point": np.int64(134), + "dilation": (1, 1), + "is_depthwise": False, + # Reference outputs + "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST, + }, + { + # Stimulus + "accel": vapi.NpuAccelerator.Ethos_U55_256, + "block_depth": 8, + "ifm_dtype": np.int16, + "shape": (3, 3, 16, 64), + "layout": "HWIO", + "zero_point": np.int64(134), + "dilation": (1, 1), + "is_depthwise": False, + # Reference outputs + "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST, + }, + # Pass-through value check + { + # Stimulus + "accel": vapi.NpuAccelerator.Ethos_U55_128, + "block_depth": 16, + "ifm_dtype": np.uint8, + "shape": (243, 152, 7, 1), + "layout": "HWOI", + "zero_point": np.int64(110), + "dilation": (2, 2), + "is_depthwise": True, + # Reference outputs + "block_traversal": vapi.NpuBlockTraversal.DEPTH_FIRST, + }, + { + # Stimulus + "accel": vapi.NpuAccelerator.Ethos_U55_128, + "block_depth": 32, + "ifm_dtype": np.uint8, + "shape": (64, 67, 35, 8), + "layout": "OHWI", + "zero_point": np.int64(100), + "dilation": (1, 2), + "is_depthwise": False, + # Reference outputs + "block_traversal": vapi.NpuBlockTraversal.PART_KERNEL_FIRST, + }, + ] + + def verify(test_vec, mock_obj): + layout_transform_indices = { + "HWIO": (3, 0, 1, 2), + "HWOI": (2, 0, 1, 3), + "OHWI": (0, 1, 2, 3), + } + + assert mock_obj + mock_obj.assert_called_once() + assert mock_obj.call_args[1]["accelerator"] == test_vec["accel"] + assert mock_obj.call_args[1]["accelerator"] == test_vec["accel"] + ishape = test_vec["shape"] + shape_owhi = ( + ishape[layout_transform_indices[test_vec["layout"]][0]], + ishape[layout_transform_indices[test_vec["layout"]][1]], + ishape[layout_transform_indices[test_vec["layout"]][2]], + ishape[layout_transform_indices[test_vec["layout"]][3]], + ) + assert mock_obj.call_args[1]["weights_volume"].shape == shape_owhi + assert mock_obj.call_args[1]["dilation_xy"] == test_vec["dilation"] + assert mock_obj.call_args[1]["ifm_bitdepth"] == np.iinfo(test_vec["ifm_dtype"]).bits + assert mock_obj.call_args[1]["ofm_block_depth"] == test_vec["block_depth"] + assert mock_obj.call_args[1]["is_depthwise"] == test_vec["is_depthwise"] + assert mock_obj.call_args[1]["block_traversal"] == test_vec["block_traversal"] + + def create_mock(test_vec): + with patch( + "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_encode_weights" + ) as mock_npu_encode_weights: + ifm_bitdepth = np.iinfo(test_vec["ifm_dtype"]).bits + ifm_dtype = test_vec["ifm_dtype"] + max = np.iinfo(ifm_dtype).max + min = np.iinfo(ifm_dtype).min + values = np.random.randint(min, max, test_vec["shape"], ifm_dtype) + compressed_weights = vela_api.compress_weights( + weights=values, + weights_zp=test_vec["zero_point"], + weights_layout=test_vec["layout"], + ifm_bitdepth=ifm_bitdepth, + block_depth=test_vec["block_depth"], + dilation=test_vec["dilation"], + accel_type=test_vec["accel"], + is_depthwise=test_vec["is_depthwise"], + ) + return mock_npu_encode_weights + return None + + for tv in test_vecs: + mock_obj = create_mock(tv) + verify(tv, mock_obj) + + +def test_pack_biases(): + test_vecs = [ + { + # Stimulus + "bias_length": 3, + "ifm_scale": np.single(1.11111111), + "ifm_dtype": np.uint8, + "weight_scales": np.array( + [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)] + ), + "ofm_scale": np.single(1.2), + "is_activation_tanh_or_sigmoid": False, + # Reference outputs + "hw_scales": (1811663288, 2010504240, 1104672703), + "hw_shifts": (31, 31, 30), + }, + { + # Stimulus + "bias_length": 3, + "ifm_scale": np.single(1.11111111), + "ifm_dtype": np.int8, + "weight_scales": np.array( + [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)] + ), + "ofm_scale": np.single(1.2), + "is_activation_tanh_or_sigmoid": False, + # Reference outputs + "hw_scales": (1811663185, 2010504312, 1104672720), + "hw_shifts": (31, 31, 30), + }, + { + # Stimulus + "bias_length": 3, + "ifm_scale": np.single(1.11111111), + "ifm_dtype": np.int16, + "weight_scales": np.array( + [np.single(0.91111111), np.single(1.01111111), np.single(1.11111111)] + ), + "ofm_scale": np.single(1.2), + "is_activation_tanh_or_sigmoid": False, + # Reference outputs + "hw_scales": (27644, 30678, 16856), + "hw_shifts": (15, 15, 14), + }, + ] + + def verify(test_vec, mock_obj, packed_biases): + assert mock_obj + for idx, val in enumerate(test_vec["bias_values"]): + assert val == mock_obj.call_args_list[idx][0][0] + assert test_vec["hw_scales"][idx] == mock_obj.call_args_list[idx][0][1] + assert test_vec["hw_shifts"][idx] == mock_obj.call_args_list[idx][0][2] + + def create_mock(test_vec): + with patch( + "tvm.relay.backend.contrib.ethosu.vela_api.vapi.npu_encode_bias" + ) as mock_npu_encode_bias: + mock_npu_encode_bias.return_value = bytearray(10) + ifm_dtype = test_vec["ifm_dtype"] + max = np.iinfo(ifm_dtype).max + min = np.iinfo(ifm_dtype).min + # tvm will always create biases in int32 + biases = np.random.randint(min, max, test_vec["bias_length"], np.int32) + packed_biases = vela_api.pack_biases( + biases=biases, + ifm_scale=test_vec["ifm_scale"], + ifm_dtype=test_vec["ifm_dtype"], + weight_scales=test_vec["weight_scales"], + ofm_scale=test_vec["ofm_scale"], + is_activation_tanh_or_sigmoid=test_vec["is_activation_tanh_or_sigmoid"], + ) + test_vec["bias_values"] = biases + return mock_npu_encode_bias, packed_biases + return None + + for _test_vec in test_vecs: + mock_obj, packed_biases = create_mock(_test_vec) + verify(_test_vec, mock_obj, packed_biases) + + +if __name__ == "__main__": + pytest.main([__file__]) diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh index 4c9d50ec90bb..ace88b46ae0c 100755 --- a/tests/scripts/task_config_build_cpu.sh +++ b/tests/scripts/task_config_build_cpu.sh @@ -48,3 +48,4 @@ echo set\(USE_VITIS_AI ON\) >> config.cmake echo set\(USE_VERILATOR ON\) >> config.cmake echo set\(USE_LIBBACKTRACE ON\) >> config.cmake echo set\(USE_CCACHE OFF\) >> config.cmake +echo set\(USE_ETHOSU ON\) >> config.cmake diff --git a/tests/scripts/task_mypy.sh b/tests/scripts/task_mypy.sh index b05acb090c2f..8507f311e9da 100755 --- a/tests/scripts/task_mypy.sh +++ b/tests/scripts/task_mypy.sh @@ -25,3 +25,6 @@ mypy --check-untyped-defs python/tvm/tir/analysis/ echo "Checking MyPy Type defs in the transofrm package." mypy --check-untyped-defs python/tvm/tir/transform/ + +echo "Checking MyPy Type defs in the tvm.relay.backend.contrib.ethosu package." +mypy --check-untyped-defs python/tvm/relay/backend/contrib/ethosu/