[microNPU] Add support for transpose convolution

Adds support for legalizing transpose convolution (deconvolution) to an NPU conv2d operation for the case when `strides==(2, 2)`, `dilation==(1, 1)` and no padding of the output is required. Change-Id: I473873f6bad526f093c6e9cdfe6e92ccf94a822c
apache · Jan 6, 2022 · 0e66db4 · 0e66db4
1 parent fc2fdab
commit 0e66db4
Show file tree

Hide file tree

Showing 12 changed files with 498 additions and 10 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -355,6 +355,87 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class Conv2DTransposeRewriter(DFPatternCallback):
+    """Convert conv2d_transpose related composite functions into
+    ethosu_conv2d_transpose operators."""
+
+    def __init__(self):
+        super().__init__(require_type=True)
+        self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))(
+            wildcard()
+        )
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
+        params.ifm.tensor = post.args[0]
+
+        weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
+        weights_values = params.weights.values
+        weights_values_ohwi = np.transpose(
+            weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
+        )
+        weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
+        weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype)
+
+        upscale_height = int(params.ofm.shape[1])
+        upscale_width = int(params.ofm.shape[2])
+
+        bias_values = (
+            params.biases.tensor.data.asnumpy()
+            if params.biases
+            else np.zeros((params.ifm.shape[-1]))
+        )
+        scale_bias = vela_api.pack_biases(
+            biases=bias_values,
+            ifm_scale=params.ifm.q_params.scale_f32,
+            ifm_dtype=np.dtype(params.ifm.dtype),
+            weight_scales=params.weights.q_params.scale_f32,
+            ofm_scale=params.ofm.q_params.scale_f32,
+            is_activation_tanh_or_sigmoid=False,
+        )
+
+        reduced_op = ethosu_ops.ethosu_conv2d(
+            ifm=post.args[0],
+            weight=weights,
+            scale_bias=relay.const(scale_bias, "uint8"),
+            lut=relay.const([], dtype="int8"),
+            ifm_scale=float(params.ifm.q_params.scale_f32),
+            ifm_zero_point=int(params.ifm.q_params.zero_point),
+            weight_zero_point=int(params.weights.q_params.zero_point),
+            ofm_scale=float(params.ofm.q_params.scale_f32),
+            ofm_zero_point=int(params.ofm.q_params.zero_point),
+            kernel_shape=params.kernel_shape,
+            ofm_channels=int(params.ofm.shape[-1]),
+            strides=(1, 1),
+            padding=params.legalize_padding,
+            dilation=params.dilation,
+            ifm_layout=str(params.ifm.layout),
+            ofm_layout=str(params.ofm.layout),
+            upscale="ZEROS",
+            upscale_height=upscale_height,
+            upscale_width=upscale_width,
+        )
+        return reduced_op
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeConv2DTranspose:
+    """This is the pass that wraps the Conv2DTransposeRewriter"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(Conv2DTransposeRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
 class DepthwiseConv2DRewriter(DFPatternCallback):
     """Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d
     operators"""
@@ -1365,6 +1446,7 @@ def transform_module(
         """
         mod = LegalizeSplit()(mod)
         mod = LegalizeConv2D()(mod)
+        mod = LegalizeConv2DTranspose()(mod)
         mod = LegalizeDepthwiseConv2D()(mod)
         mod = LegalizeMaxPooling()(mod)
         mod = LegalizeAvgPooling()(mod)

diff --git a/python/tvm/relay/backend/contrib/ethosu/op/convolution.py b/python/tvm/relay/backend/contrib/ethosu/op/convolution.py
@@ -47,6 +47,8 @@ def _extract_ethosu_conv2d_params(attrs, args):
     clip_max = attrs.clip_max
     rounding_mode = attrs.rounding_mode
     upscale = attrs.upscale
+    upscale_height = attrs.upscale_height
+    upscale_width = attrs.upscale_width
     ifm_layout = attrs.ifm_layout
     ofm_layout = attrs.ofm_layout
 
@@ -68,6 +70,8 @@ def _extract_ethosu_conv2d_params(attrs, args):
         clip_max,
         rounding_mode,
         upscale,
+        upscale_height,
+        upscale_width,
         ifm_layout,
         ofm_layout,
     )
@@ -112,6 +116,8 @@ def ethosu_conv2d(
     clip_max: int = 0,
     rounding_mode: str = "TFL",
     upscale: str = "NONE",
+    upscale_height: int = 0,
+    upscale_width: int = 0,
     ifm_layout: str = "NHWC",
     ofm_layout: str = "NHWC",
 ) -> tvm.relay.Call:
@@ -177,6 +183,14 @@ def ethosu_conv2d(
             "NONE" - no upscaling.
             "NEAREST" - upscale using nearest neighbour.
             "ZEROS" - upscale using zeros.
+    upscale_height: int, optional
+        The height of the Output Feature Map after applying upscaling. A value of
+        0 means the height of the Input Feature Map will be used. This parameter
+        has no effect when upscale is "NONE".
+    upscale_width: int, optional
+        The width of the Output Feature Map after applying upscaling. A value of
+        0 means the width of the Input Feature Map will be used. This parameter
+        has no effect when upscale is "NONE".
     ifm_layout : str, optional
         The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
     ofm_layout : str, optional
@@ -208,6 +222,8 @@ def ethosu_conv2d(
         clip_max,
         rounding_mode,
         upscale,
+        upscale_height,
+        upscale_width,
         ifm_layout,
         ofm_layout,
     )
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -20,6 +20,7 @@
 
 from tvm import te  # type: ignore
 from .dma import dma_ofm_compute, dma_ifm_compute
+from ..util import upscale_ofm
 
 
 def conv2d_compute(
@@ -40,6 +41,8 @@ def conv2d_compute(
     clip_max: int,
     rounding_mode: str,
     upscale: str,
+    upscale_height: int,
+    upscale_width: int,
     ifm_layout: str,
     ofm_layout: str,
 ) -> te.Tensor:
@@ -92,7 +95,14 @@ def conv2d_compute(
             "NONE" - no upscaling.
             "NEAREST" - upscale using nearest neighbour.
             "ZEROS" - upscale using zeros.
-            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
+    upscale_height: int
+        The height of the Output Feature Map after applying upscaling. A value of
+        0 means the height of the Input Feature Map will be used. This parameter
+        has no effect when upscale is "NONE".
+    upscale_width: int
+        The width of the Output Feature Map after applying upscaling. A value of
+        0 means the width of the Input Feature Map will be used. This parameter
+        has no effect when upscale is "NONE".
     ifm_layout : str
         The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
     ofm_layout : str
@@ -122,6 +132,8 @@ def conv2d_compute(
     dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
     ofm_height = (dmaed_ifm.shape[1] - dilated_kernel_h) // stride_h + 1
     ofm_width = (dmaed_ifm.shape[2] - dilated_kernel_w) // stride_w + 1
+    ofm_shape = (1, ofm_height, ofm_width, ofm_channels)
+    ofm_shape = upscale_ofm(ofm_shape, upscale_height, upscale_width)
     rc = te.reduce_axis((0, ifm_channels), name="rc")
     rh = te.reduce_axis((0, kernel_h), name="ry")
     rw = te.reduce_axis((0, kernel_w), name="rx")
@@ -150,7 +162,7 @@ def conv2d_compute(
         conv2d_attrs["lut"] = lut
 
     conv = te.compute(
-        (1, ofm_height, ofm_width, ofm_channels),
+        ofm_shape,
         lambda nn, hh, ww, cc: te.sum(
             dmaed_ifm(
                 nn, hh * stride_h + rh * dilation_h, ww * stride_w + rw * dilation_w, rc

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
@@ -102,7 +102,7 @@ def get_conv2d_params(stmt, producers, consumers):
             padding=serial_padding,
             activation=serial_activation,
             rounding_mode=attrs["rounding_mode"],
-            upscale="NONE",
+            upscale=attrs["upscale"],
         ),
         output_pointer,
         replace_pointer,

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py b/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py
@@ -655,7 +655,7 @@ def _create_npu_resampling_mode(
     mode_map = {
         "NONE": vapi.NpuResamplingMode.NONE,
         "NEAREST": vapi.NpuResamplingMode.NEAREST,
-        "TRANSPOSE": vapi.NpuResamplingMode.TRANSPOSE,
+        "ZEROS": vapi.NpuResamplingMode.TRANSPOSE,
     }
     mode = str(mode.value)
     assert mode in mode_map.keys()

diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py
@@ -47,6 +47,20 @@ class QConv2DArgs(Enum):
     WEIGHTS_SCALE = 5
 
 
+class QConv2DTransposeArgs(Enum):
+    """
+    This is a helper enum to obtain the correct index of
+    qnn.conv2d_transpose aruments.
+    """
+
+    IFM = 0
+    WEIGHTS = 1
+    IFM_ZERO_POINT = 2
+    WEIGHTS_ZERO_POINT = 3
+    IFM_SCALE = 4
+    WEIGHTS_SCALE = 5
+
+
 class RequantArgs(Enum):
     """
     This is a helper enum to obtain the correct index