[microNPU] Add support for transpose convolution (apache#9855)

Adds support for legalizing transpose convolution to a microNPU conv2d operation for the case when strides==(2, 2), dilation==(1, 1) and no padding of the output is required. Change-Id: I485e2571913b3dcd7c75c46304f2f9a82f630ee0
ylc · Feb 16, 2022 · 2721698 · 2721698
1 parent 5b379e7
commit 2721698
Show file tree

Hide file tree

Showing 11 changed files with 730 additions and 106 deletions.
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -353,6 +353,87 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class Conv2DTransposeRewriter(DFPatternCallback):
+    """Convert conv2d_transpose related composite functions into
+    ethosu_conv2d_transpose operators."""
+
+    def __init__(self):
+        super().__init__(require_type=True)
+        self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))(
+            wildcard()
+        )
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
+        params.ifm.tensor = post.args[0]
+
+        ofm_shape = params.ofm.shape
+        legalize_padding = params.legalize_padding
+
+        weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
+        weights_values = params.weights.values
+        weights_values_ohwi = np.transpose(
+            weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
+        )
+        weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
+        weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype)
+
+        bias_values = (
+            params.biases.tensor.data.asnumpy()
+            if params.biases
+            else np.zeros((params.ifm.shape[-1]))
+        )
+        scale_bias = vela_api.pack_biases(
+            biases=bias_values,
+            ifm_scale=params.ifm.q_params.scale_f32,
+            ifm_dtype=np.dtype(params.ifm.dtype),
+            weight_scales=params.weights.q_params.scale_f32,
+            ofm_scale=params.ofm.q_params.scale_f32,
+            is_activation_tanh_or_sigmoid=False,
+        )
+
+        reduced_op = ethosu_ops.ethosu_conv2d(
+            ifm=post.args[0],
+            weight=weights,
+            scale_bias=relay.const(scale_bias, "uint8"),
+            lut=relay.const([], dtype="int8"),
+            ifm_scale=float(params.ifm.q_params.scale_f32),
+            ifm_zero_point=int(params.ifm.q_params.zero_point),
+            weight_zero_point=int(params.weights.q_params.zero_point),
+            ofm_scale=float(params.ofm.q_params.scale_f32),
+            ofm_zero_point=int(params.ofm.q_params.zero_point),
+            kernel_shape=params.kernel_shape,
+            ofm_channels=int(ofm_shape[-1]),
+            strides=(1, 1),
+            padding=legalize_padding,
+            dilation=params.dilation,
+            ifm_layout=str(params.ifm.layout),
+            ofm_layout=str(params.ofm.layout),
+            upscale="ZEROS",
+        )
+
+        # Remove additional padding by 'cropping' back to expected size
+        return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeConv2DTranspose:
+    """This is the pass that wraps the Conv2DTransposeRewriter"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(Conv2DTransposeRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
 class DepthwiseConv2DRewriter(DFPatternCallback):
     """Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d
     operators"""
@@ -1379,6 +1460,7 @@ def transform_module(
         """
         mod = LegalizeSplit()(mod)
         mod = LegalizeConv2D()(mod)
+        mod = LegalizeConv2DTranspose()(mod)
         mod = LegalizeDepthwiseConv2D()(mod)
         mod = LegalizeMaxPooling()(mod)
         mod = LegalizeAvgPooling()(mod)

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -115,10 +115,17 @@ def conv2d_compute(
     stride_h, stride_w = [int(v) for v in strides]
     dilation_h, dilation_w = [int(v) for v in dilation]
     ofm_channels, kernel_h, kernel_w, ifm_channels = [int(v) for v in weight.shape]
+    upscale_factor = 2 if upscale != "NONE" else 1
 
     # Compute operation for the IFM DMA pipeline
     dmaed_ifm = dma_ifm_compute(
-        ifm, ifm_layout, ifm_zero_point, ifm_scale, weight.shape[3], padding
+        ifm,
+        ifm_layout,
+        ifm_zero_point,
+        ifm_scale,
+        weight.shape[3],
+        padding,
+        upscale_factor,
     )
 
     # 2D Convolution compute operation

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
@@ -102,7 +102,7 @@ def get_conv2d_params(stmt, producers, consumers):
             padding=serial_padding,
             activation=serial_activation,
             rounding_mode=attrs["rounding_mode"],
-            upscale="NONE",
+            upscale=attrs["upscale"],
         ),
         output_pointer,
         replace_pointer,

diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py
@@ -47,6 +47,20 @@ class QConv2DArgs(Enum):
     WEIGHTS_SCALE = 5
 
 
+class QConv2DTransposeArgs(Enum):
+    """
+    This is a helper enum to obtain the correct index
+    of qnn.conv2d_transpose arguments.
+    """
+
+    IFM = 0
+    WEIGHTS = 1
+    IFM_ZERO_POINT = 2
+    WEIGHTS_ZERO_POINT = 3
+    IFM_SCALE = 4
+    WEIGHTS_SCALE = 5
+
+
 class RequantArgs(Enum):
     """
     This is a helper enum to obtain the correct index

diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py
@@ -276,6 +276,137 @@ def is_valid(self) -> bool:
         return not self.is_depthwise
 
 
+class QnnConv2DTransposeParams:
+    """
+    This class will parse a Call to a ethosu.qnn_conv2d_transpose composite
+    function and extract quantization information of all the associated tensors.
+    """
+
+    composite_name = "ethos-u.qnn_conv2d_transpose"
+    # The NPU only supports padding upto the numbers as follows
+    padding_bounds = [31, 31, 32, 32]
+
+    @requires_vela
+    def __init__(self, func_body: tvm.relay.Function):
+        from tvm.relay.backend.contrib.ethosu.util import QConv2DTransposeArgs  # type: ignore
+        from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
+        from tvm.relay.backend.contrib.ethosu.util import RequantArgs
+
+        requantize = func_body
+        call = func_body.args[0]
+        if str(call.op) == "nn.bias_add":
+            bias_add = call
+            call = call.args[0]
+        else:
+            bias_add = None
+        qnn_conv2d_transpose = call
+
+        data_layout = qnn_conv2d_transpose.attrs.data_layout
+        self.kernel_layout = qnn_conv2d_transpose.attrs.kernel_layout
+
+        self.weights = TensorParams(
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS.value],
+            self.kernel_layout,
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_SCALE.value],
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_ZERO_POINT.value],
+        )
+        self.biases = (
+            TensorParams(
+                bias_add.args[BiasAddArgs.BIASES.value],
+                data_layout,
+                requantize.args[RequantArgs.IFM_SCALE.value],
+                requantize.args[RequantArgs.IFM_ZERO_POINT.value],
+            )
+            if bias_add
+            else None
+        )
+        self.ifm = TensorParams(
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM.value],
+            data_layout,
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_SCALE.value],
+            qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_ZERO_POINT.value],
+        )
+        self.ofm = TensorParams(
+            func_body,
+            data_layout,
+            requantize.args[RequantArgs.OFM_SCALE.value],
+            requantize.args[RequantArgs.OFM_ZERO_POINT.value],
+        )
+
+        attrs = qnn_conv2d_transpose.attrs
+        self.strides = attrs.strides
+        self.dilation = attrs.dilation
+        self.padding = attrs.padding
+        self.channels = attrs.channels
+        self.groups = attrs.groups
+        self.output_padding = attrs.output_padding
+
+        kernel_size_map = {
+            "IOHW": self.weights.shape[2:4],
+        }
+        self.kernel_shape = kernel_size_map[str(self.weights.layout)]
+
+        # Different padding is used in the legalization from conv2d_transpose
+        # to conv2d, so we to calculate it here to check that the new size fits
+        # within the bounds of the NPU before offloading.
+        pad_top = int(self.kernel_shape[0]) - 1 - int(self.padding[0])
+        pad_left = int(self.kernel_shape[1]) - 1 - int(self.padding[1])
+        pad_bottom = int(self.kernel_shape[0]) - 1 - int(self.padding[2])
+        pad_right = int(self.kernel_shape[1]) - 1 - int(self.padding[3])
+        if self.strides == [2, 2]:
+            pad_bottom -= 1
+            pad_right -= 1
+        self.legalize_padding = [pad_top, pad_left, pad_bottom, pad_right]
+
+    def is_valid(self) -> bool:
+        """
+        This function checks whether QnnConv2D has compatible attributes with the NPU
+        """
+
+        def check_compatible_output_size(ifm_shape, ofm_shape, padding, strides, kernel_shape):
+            is_valid_padding = padding == [0, 0, 0, 0]
+            if is_valid_padding:
+                expected_height = ifm_shape[1] * strides[0] + (kernel_shape[0] - strides[0])
+                expected_width = ifm_shape[2] * strides[1] + (kernel_shape[1] - strides[1])
+            else:
+                expected_height = ifm_shape[1] * strides[0]
+                expected_width = ifm_shape[2] * strides[1]
+            return ofm_shape[1] == expected_height and ofm_shape[2] == expected_width
+
+        tensor_params = [self.weights, self.ifm, self.ofm]
+        if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
+            return False
+        if not check_weights(self.weights, self.dilation):
+            return False
+        if self.biases and not check_bias(self.biases):
+            return False
+        if not check_strides(self.strides, stride_range=(2, 2)):
+            return False
+        if not check_batch_size(self.ifm):
+            return False
+        if not check_dilation(self.dilation, dilation_range=(1, 1)):
+            return False
+        if not check_compatible_output_size(
+            self.ifm.shape,
+            self.ofm.shape,
+            [int(x) for x in self.padding],
+            self.strides,
+            self.kernel_shape,
+        ):
+            return False
+        if not check_padding(self.legalize_padding, self.padding_bounds):
+            return False
+        if self.kernel_shape[0] - 2 - int(self.padding[2]) < 0:
+            return False
+        if self.kernel_shape[1] - 2 - int(self.padding[3]) < 0:
+            return False
+        if self.groups != 1:
+            return False
+        if list(self.output_padding) != [0, 0]:
+            return False
+        return True
+
+
 class QnnDepthwiseConv2DParams(QnnConv2DParams):
     """
     This class will parse a call to a ethosu.depthwise_conv2d composite function
@@ -348,6 +479,22 @@ def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
     return clip_or_req
 
 
+def qnn_conv2d_transpose_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
+    """
+    This function creates the pattern for qnn.conv2d_transpose.
+    """
+    qnn_conv2d_transpose = is_op("qnn.conv2d_transpose")(
+        wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
+    ).has_attr({"kernel_layout": "IOHW"})
+    optional_bias_add = (
+        is_op("nn.bias_add")(qnn_conv2d_transpose, is_constant()) | qnn_conv2d_transpose
+    )
+    req = is_op("qnn.requantize")(
+        optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant()
+    )
+    return req
+
+
 class MaxPool2DParams:
     """
     This class will parse a call to a ethos-u.maxpool2d composite function
@@ -1299,6 +1446,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
             qnn_depthwise_conv2d_pattern(),
             lambda pat: QnnDepthwiseConv2DParams(pat).is_valid(),
         ),
+        (
+            QnnConv2DTransposeParams.composite_name,
+            qnn_conv2d_transpose_pattern(),
+            lambda pat: QnnConv2DTransposeParams(pat).is_valid(),
+        ),
         (
             MaxPool2DParams.composite_name,
             qnn_maxpool2d_pattern(),

diff --git a/src/relay/op/contrib/ethosu/convolution.cc b/src/relay/op/contrib/ethosu/convolution.cc
@@ -154,6 +154,15 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
     return false;
   }
 
+  const std::unordered_set<std::string> upscale_methods = {"NONE", "ZEROS", "NEAREST"};
+  if (upscale_methods.find(param->upscale) == upscale_methods.end()) {
+    reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
+                                     << "Invalid operator: Expected upsample method to be 'NONE', "
+                                        "'ZEROS' or 'NEAREST' but got "
+                                     << param->upscale);
+    return false;
+  }
+
   // The scale_bias should be provided as a tensor of size {ofm_channels, 10}
   reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8)));
 
@@ -162,10 +171,16 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
                                          param->kernel_shape[1], weight->shape[3]},
                                         weight->dtype));
 
+  Array<IndexExpr> ifm_shape = ifm->shape;
+  if (param->upscale != "NONE") {
+    ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout);
+  }
+
   // Assign ofm type
   auto ofm_shape =
-      EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
+      EthosuInferKernelOutput(ifm_shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
                               param->ofm_channels, param->dilation, param->strides, param->padding);
+
   reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype));
   return true;
 }

diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py
@@ -423,6 +423,7 @@ def make_ethosu_conv2d(
     weight_dtype="int8",
     scale_bias_dtype="uint8",
     rounding_mode="TFL",
+    upscale="NONE",
 ):
     # conv params
     weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels)
@@ -451,7 +452,7 @@ def make_ethosu_conv2d(
         clip_min=10 if activation == "CLIP" else 0,
         clip_max=100 if activation == "CLIP" else 0,
         rounding_mode=rounding_mode,
-        upscale="NONE",
+        upscale=upscale,
         ifm_layout=ifm_layout,
         ofm_layout=ofm_layout,
     )