diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py index eb029dac2cbc..6be03a6883fa 100644 --- a/python/tvm/relay/backend/contrib/ethosu/legalize.py +++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py @@ -353,6 +353,87 @@ def __call__(self, *args, **kwargs): pass +class Conv2DTransposeRewriter(DFPatternCallback): + """Convert conv2d_transpose related composite functions into + ethosu_conv2d_transpose operators.""" + + def __init__(self): + super().__init__(require_type=True) + self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))( + wildcard() + ) + + def callback( + self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map + ) -> tvm.relay.Expr: + params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body) + params.ifm.tensor = post.args[0] + + ofm_shape = params.ofm.shape + legalize_padding = params.legalize_padding + + weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]} + weights_values = params.weights.values + weights_values_ohwi = np.transpose( + weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)] + ) + weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2)) + weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype) + + bias_values = ( + params.biases.tensor.data.asnumpy() + if params.biases + else np.zeros((params.ifm.shape[-1])) + ) + scale_bias = vela_api.pack_biases( + biases=bias_values, + ifm_scale=params.ifm.q_params.scale_f32, + ifm_dtype=np.dtype(params.ifm.dtype), + weight_scales=params.weights.q_params.scale_f32, + ofm_scale=params.ofm.q_params.scale_f32, + is_activation_tanh_or_sigmoid=False, + ) + + reduced_op = ethosu_ops.ethosu_conv2d( + ifm=post.args[0], + weight=weights, + scale_bias=relay.const(scale_bias, "uint8"), + lut=relay.const([], dtype="int8"), + ifm_scale=float(params.ifm.q_params.scale_f32), + ifm_zero_point=int(params.ifm.q_params.zero_point), + weight_zero_point=int(params.weights.q_params.zero_point), + ofm_scale=float(params.ofm.q_params.scale_f32), + ofm_zero_point=int(params.ofm.q_params.zero_point), + kernel_shape=params.kernel_shape, + ofm_channels=int(ofm_shape[-1]), + strides=(1, 1), + padding=legalize_padding, + dilation=params.dilation, + ifm_layout=str(params.ifm.layout), + ofm_layout=str(params.ofm.layout), + upscale="ZEROS", + ) + + # Remove additional padding by 'cropping' back to expected size + return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape) + + +@ir.transform.module_pass(opt_level=1) +class LegalizeConv2DTranspose: + """This is the pass that wraps the Conv2DTransposeRewriter""" + + def transform_module( + self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext + ) -> tvm.ir.IRModule: + for global_var, func in mod.functions.items(): + func = rewrite(Conv2DTransposeRewriter(), func) + mod.update_func(global_var, func) + return mod + + def __call__(self, *args, **kwargs): + pass + + class DepthwiseConv2DRewriter(DFPatternCallback): """Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d operators""" @@ -1379,6 +1460,7 @@ def transform_module( """ mod = LegalizeSplit()(mod) mod = LegalizeConv2D()(mod) + mod = LegalizeConv2DTranspose()(mod) mod = LegalizeDepthwiseConv2D()(mod) mod = LegalizeMaxPooling()(mod) mod = LegalizeAvgPooling()(mod) diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py index 040d1e26fba9..77bc5a300cbe 100644 --- a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py +++ b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py @@ -115,10 +115,17 @@ def conv2d_compute( stride_h, stride_w = [int(v) for v in strides] dilation_h, dilation_w = [int(v) for v in dilation] ofm_channels, kernel_h, kernel_w, ifm_channels = [int(v) for v in weight.shape] + upscale_factor = 2 if upscale != "NONE" else 1 # Compute operation for the IFM DMA pipeline dmaed_ifm = dma_ifm_compute( - ifm, ifm_layout, ifm_zero_point, ifm_scale, weight.shape[3], padding + ifm, + ifm_layout, + ifm_zero_point, + ifm_scale, + weight.shape[3], + padding, + upscale_factor, ) # 2D Convolution compute operation diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py index fe0b456727ec..50c27cc01689 100644 --- a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py +++ b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py @@ -102,7 +102,7 @@ def get_conv2d_params(stmt, producers, consumers): padding=serial_padding, activation=serial_activation, rounding_mode=attrs["rounding_mode"], - upscale="NONE", + upscale=attrs["upscale"], ), output_pointer, replace_pointer, diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py index fcc8e9e9df30..8234dc047fd8 100644 --- a/python/tvm/relay/backend/contrib/ethosu/util.py +++ b/python/tvm/relay/backend/contrib/ethosu/util.py @@ -47,6 +47,20 @@ class QConv2DArgs(Enum): WEIGHTS_SCALE = 5 +class QConv2DTransposeArgs(Enum): + """ + This is a helper enum to obtain the correct index + of qnn.conv2d_transpose arguments. + """ + + IFM = 0 + WEIGHTS = 1 + IFM_ZERO_POINT = 2 + WEIGHTS_ZERO_POINT = 3 + IFM_SCALE = 4 + WEIGHTS_SCALE = 5 + + class RequantArgs(Enum): """ This is a helper enum to obtain the correct index diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py index 72c83605ff04..6df4611acffa 100644 --- a/python/tvm/relay/op/contrib/ethosu.py +++ b/python/tvm/relay/op/contrib/ethosu.py @@ -276,6 +276,137 @@ def is_valid(self) -> bool: return not self.is_depthwise +class QnnConv2DTransposeParams: + """ + This class will parse a Call to a ethosu.qnn_conv2d_transpose composite + function and extract quantization information of all the associated tensors. + """ + + composite_name = "ethos-u.qnn_conv2d_transpose" + # The NPU only supports padding upto the numbers as follows + padding_bounds = [31, 31, 32, 32] + + @requires_vela + def __init__(self, func_body: tvm.relay.Function): + from tvm.relay.backend.contrib.ethosu.util import QConv2DTransposeArgs # type: ignore + from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs + from tvm.relay.backend.contrib.ethosu.util import RequantArgs + + requantize = func_body + call = func_body.args[0] + if str(call.op) == "nn.bias_add": + bias_add = call + call = call.args[0] + else: + bias_add = None + qnn_conv2d_transpose = call + + data_layout = qnn_conv2d_transpose.attrs.data_layout + self.kernel_layout = qnn_conv2d_transpose.attrs.kernel_layout + + self.weights = TensorParams( + qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS.value], + self.kernel_layout, + qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_SCALE.value], + qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_ZERO_POINT.value], + ) + self.biases = ( + TensorParams( + bias_add.args[BiasAddArgs.BIASES.value], + data_layout, + requantize.args[RequantArgs.IFM_SCALE.value], + requantize.args[RequantArgs.IFM_ZERO_POINT.value], + ) + if bias_add + else None + ) + self.ifm = TensorParams( + qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM.value], + data_layout, + qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_SCALE.value], + qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_ZERO_POINT.value], + ) + self.ofm = TensorParams( + func_body, + data_layout, + requantize.args[RequantArgs.OFM_SCALE.value], + requantize.args[RequantArgs.OFM_ZERO_POINT.value], + ) + + attrs = qnn_conv2d_transpose.attrs + self.strides = attrs.strides + self.dilation = attrs.dilation + self.padding = attrs.padding + self.channels = attrs.channels + self.groups = attrs.groups + self.output_padding = attrs.output_padding + + kernel_size_map = { + "IOHW": self.weights.shape[2:4], + } + self.kernel_shape = kernel_size_map[str(self.weights.layout)] + + # Different padding is used in the legalization from conv2d_transpose + # to conv2d, so we to calculate it here to check that the new size fits + # within the bounds of the NPU before offloading. + pad_top = int(self.kernel_shape[0]) - 1 - int(self.padding[0]) + pad_left = int(self.kernel_shape[1]) - 1 - int(self.padding[1]) + pad_bottom = int(self.kernel_shape[0]) - 1 - int(self.padding[2]) + pad_right = int(self.kernel_shape[1]) - 1 - int(self.padding[3]) + if self.strides == [2, 2]: + pad_bottom -= 1 + pad_right -= 1 + self.legalize_padding = [pad_top, pad_left, pad_bottom, pad_right] + + def is_valid(self) -> bool: + """ + This function checks whether QnnConv2D has compatible attributes with the NPU + """ + + def check_compatible_output_size(ifm_shape, ofm_shape, padding, strides, kernel_shape): + is_valid_padding = padding == [0, 0, 0, 0] + if is_valid_padding: + expected_height = ifm_shape[1] * strides[0] + (kernel_shape[0] - strides[0]) + expected_width = ifm_shape[2] * strides[1] + (kernel_shape[1] - strides[1]) + else: + expected_height = ifm_shape[1] * strides[0] + expected_width = ifm_shape[2] * strides[1] + return ofm_shape[1] == expected_height and ofm_shape[2] == expected_width + + tensor_params = [self.weights, self.ifm, self.ofm] + if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]): + return False + if not check_weights(self.weights, self.dilation): + return False + if self.biases and not check_bias(self.biases): + return False + if not check_strides(self.strides, stride_range=(2, 2)): + return False + if not check_batch_size(self.ifm): + return False + if not check_dilation(self.dilation, dilation_range=(1, 1)): + return False + if not check_compatible_output_size( + self.ifm.shape, + self.ofm.shape, + [int(x) for x in self.padding], + self.strides, + self.kernel_shape, + ): + return False + if not check_padding(self.legalize_padding, self.padding_bounds): + return False + if self.kernel_shape[0] - 2 - int(self.padding[2]) < 0: + return False + if self.kernel_shape[1] - 2 - int(self.padding[3]) < 0: + return False + if self.groups != 1: + return False + if list(self.output_padding) != [0, 0]: + return False + return True + + class QnnDepthwiseConv2DParams(QnnConv2DParams): """ This class will parse a call to a ethosu.depthwise_conv2d composite function @@ -348,6 +479,22 @@ def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern: return clip_or_req +def qnn_conv2d_transpose_pattern() -> tvm.relay.dataflow_pattern.DFPattern: + """ + This function creates the pattern for qnn.conv2d_transpose. + """ + qnn_conv2d_transpose = is_op("qnn.conv2d_transpose")( + wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant() + ).has_attr({"kernel_layout": "IOHW"}) + optional_bias_add = ( + is_op("nn.bias_add")(qnn_conv2d_transpose, is_constant()) | qnn_conv2d_transpose + ) + req = is_op("qnn.requantize")( + optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant() + ) + return req + + class MaxPool2DParams: """ This class will parse a call to a ethos-u.maxpool2d composite function @@ -1299,6 +1446,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal qnn_depthwise_conv2d_pattern(), lambda pat: QnnDepthwiseConv2DParams(pat).is_valid(), ), + ( + QnnConv2DTransposeParams.composite_name, + qnn_conv2d_transpose_pattern(), + lambda pat: QnnConv2DTransposeParams(pat).is_valid(), + ), ( MaxPool2DParams.composite_name, qnn_maxpool2d_pattern(), diff --git a/src/relay/op/contrib/ethosu/convolution.cc b/src/relay/op/contrib/ethosu/convolution.cc index 0785bb6eb61c..7b11f61acc12 100644 --- a/src/relay/op/contrib/ethosu/convolution.cc +++ b/src/relay/op/contrib/ethosu/convolution.cc @@ -154,6 +154,15 @@ bool EthosuConv2DRel(const Array& types, int num_inputs, const Attrs& attr return false; } + const std::unordered_set upscale_methods = {"NONE", "ZEROS", "NEAREST"}; + if (upscale_methods.find(param->upscale) == upscale_methods.end()) { + reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan()) + << "Invalid operator: Expected upsample method to be 'NONE', " + "'ZEROS' or 'NEAREST' but got " + << param->upscale); + return false; + } + // The scale_bias should be provided as a tensor of size {ofm_channels, 10} reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8))); @@ -162,10 +171,16 @@ bool EthosuConv2DRel(const Array& types, int num_inputs, const Attrs& attr param->kernel_shape[1], weight->shape[3]}, weight->dtype)); + Array ifm_shape = ifm->shape; + if (param->upscale != "NONE") { + ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout); + } + // Assign ofm type auto ofm_shape = - EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape, + EthosuInferKernelOutput(ifm_shape, param->ifm_layout, param->ofm_layout, param->kernel_shape, param->ofm_channels, param->dilation, param->strides, param->padding); + reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype)); return true; } diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py index 52bc8ef69435..d5bd28039feb 100644 --- a/tests/python/contrib/test_ethosu/infra.py +++ b/tests/python/contrib/test_ethosu/infra.py @@ -423,6 +423,7 @@ def make_ethosu_conv2d( weight_dtype="int8", scale_bias_dtype="uint8", rounding_mode="TFL", + upscale="NONE", ): # conv params weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels) @@ -451,7 +452,7 @@ def make_ethosu_conv2d( clip_min=10 if activation == "CLIP" else 0, clip_max=100 if activation == "CLIP" else 0, rounding_mode=rounding_mode, - upscale="NONE", + upscale=upscale, ifm_layout=ifm_layout, ofm_layout=ofm_layout, ) diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py index 455be799a822..ebcd1a0ba1fc 100644 --- a/tests/python/contrib/test_ethosu/test_codegen.py +++ b/tests/python/contrib/test_ethosu/test_codegen.py @@ -1061,5 +1061,44 @@ def resize_model(x): _compare_tvm_with_tflite(resize_model, [ifm_shape], accel_type, output_tolerance=1) +@pytest.mark.parametrize("accel_type", ACCEL_TYPES) +@pytest.mark.parametrize( + "ifm_shape,ofm_shape,kernel_shape,padding", + [ + [(1, 2, 2, 1), (1, 4, 4, 1), (3, 3), "SAME"], + [(1, 2, 2, 1), (1, 9, 9, 1), (7, 7), "VALID"], + [(1, 2, 4, 3), (1, 4, 8, 3), (5, 3), "SAME"], + [(1, 10, 5, 3), (1, 21, 13, 3), (3, 5), "VALID"], + ], +) +@pytest.mark.parametrize("has_bias", [False, True]) +def test_tflite_transpose_convolution( + accel_type, ifm_shape, ofm_shape, kernel_shape, padding, has_bias +): + dilations = (1, 1) + strides = (2, 2) + + @tf.function + def conv2d_transpose(x): + weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], ofm_shape[3]] + weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32) + bias_shape = ofm_shape[3] + bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32) + tf_strides = [1, strides[0], strides[1], 1] + op = tf.nn.conv2d_transpose( + x, + weight, + output_shape=ofm_shape, + strides=tf_strides, + padding=padding, + dilations=dilations, + ) + if has_bias: + op = tf.nn.bias_add(op, bias) + return op + + _compare_tvm_with_tflite(conv2d_transpose, [ifm_shape], accel_type=accel_type) + + if __name__ == "__main__": pytest.main([__file__]) diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py index f77e15c9334e..ab304a7b0c2b 100644 --- a/tests/python/contrib/test_ethosu/test_legalize.py +++ b/tests/python/contrib/test_ethosu/test_legalize.py @@ -33,6 +33,7 @@ from tvm.relay.op.contrib import ethosu from tvm.relay.backend.contrib.ethosu import util from tvm.relay.build_module import bind_params_by_name +from tvm.relay.frontend.tflite import get_pad_value from . import infra @@ -1774,5 +1775,133 @@ def verify(ext_func): verify(mod["tvmgen_default_ethos_u_main_0"]) +@pytest.mark.parametrize( + "ifm_shape,ofm_shape,kernel_shape,padding", + [ + [(1, 2, 2, 1), (1, 4, 4, 1), (3, 3), "SAME"], + [(1, 2, 2, 1), (1, 9, 9, 1), (7, 7), "VALID"], + [(1, 2, 4, 3), (1, 4, 8, 3), (3, 3), "SAME"], + [(1, 10, 5, 3), (1, 21, 13, 3), (3, 5), "VALID"], + ], +) +@pytest.mark.parametrize("has_bias", [False, True]) +def test_tflite_transpose_convolution(ifm_shape, ofm_shape, kernel_shape, padding, has_bias): + dtype = "int8" + dilations = (1, 1) + strides = (2, 2) + + def create_tflite_graph(): + @tf.function + def conv2d_transpose(x): + bias_shape = ofm_shape[3] + bias = tf.constant(np.random.uniform(size=bias_shape), dtype=tf.float32) + weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], ofm_shape[3]] + weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32) + tf_strides = [1, strides[0], strides[1], 1] + op = tf.nn.conv2d_transpose( + x, + weight, + output_shape=ofm_shape, + strides=tf_strides, + padding=padding, + dilations=dilations, + ) + if has_bias: + op = tf.nn.bias_add(op, bias) + return op + + concrete_func = conv2d_transpose.get_concrete_function( + tf.TensorSpec(ifm_shape, dtype=tf.float32) + ) + + def representative_dataset(): + for _ in range(100): + data = np.random.rand(*tuple(ifm_shape)) + yield [data.astype(np.float32)] + + converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func]) + converter.optimizations = [tf.lite.Optimize.DEFAULT] + converter.representative_dataset = representative_dataset + converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8] + converter.inference_input_type = tf.int8 + converter.inference_output_type = tf.int8 + tflite_model = converter.convert() + tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model, 0) + + mod, params = relay.frontend.from_tflite( + tflite_model, + shape_dict={"input": ifm_shape}, + dtype_dict={"input": dtype}, + ) + return mod, params + + def verify(ext_func): + strided_slice = ext_func.body + conv = strided_slice.args[0] + ofm_channels = conv.attrs.ofm_channels + + # Check IFM + ifm = conv.args[0].checked_type + assert list(ifm.shape) == list(ifm_shape) + assert str(ifm.dtype) == dtype + assert ifm.shape[3] == ofm_channels + + # Check OFM + ofm = strided_slice.checked_type + assert list(ofm.shape) == list(ofm_shape) + assert str(ofm.dtype) == dtype + assert ofm.shape[3] == ofm_channels + + # Check weights + weights_ohwi = conv.args[1].data.asnumpy() + assert str(weights_ohwi.dtype) == dtype + assert list(weights_ohwi.shape) == [ + ofm_channels, + kernel_shape[0], + kernel_shape[1], + ifm_shape[3], + ] + + # Check that scale_bias matches weight tensor + assert list(conv.args[2].checked_type.shape)[0] == ofm_channels + + # Calculate expected padding for conv2d op + if padding == "VALID": + expected_padding = [0, 0, 0, 0] + elif padding == "SAME": + pad_top, pad_bottom = get_pad_value(ofm_shape[1], kernel_shape[0], strides[0]) + pad_left, pad_right = get_pad_value(ofm_shape[2], kernel_shape[1], strides[1]) + expected_padding = [pad_top, pad_left, pad_bottom, pad_right] + pad_top = kernel_shape[0] - 1 - expected_padding[0] + pad_left = kernel_shape[1] - 1 - expected_padding[1] + pad_bottom = kernel_shape[0] - 1 - expected_padding[2] + pad_right = kernel_shape[1] - 1 - expected_padding[3] + if strides == [2, 2]: + pad_bottom -= 1 + pad_right -= 1 + expected_padding = [pad_top, pad_left, pad_bottom, pad_right] + assert list(conv.attrs.padding) == list(expected_padding) + + assert list(conv.attrs.strides) == [1, 1] + + rewriter = legalize.Conv2DTransposeRewriter() + pattern_table = [ + ( + ethosu.QnnConv2DTransposeParams.composite_name, + ethosu.qnn_conv2d_transpose_pattern(), + lambda pat: ethosu.QnnConv2DTransposeParams(pat).is_valid(), + ), + ] + + mod, params = create_tflite_graph() + mod["main"] = bind_params_by_name(mod["main"], params) + mod = partition_ethosu_by_table(mod, pattern_table) + + mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite( + rewriter, mod["tvmgen_default_ethos_u_main_0"] + ) + verify(mod["tvmgen_default_ethos_u_main_0"]) + + if __name__ == "__main__": pytest.main([__file__]) diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py index 2136b9f6d1b3..67fb2c760962 100644 --- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py +++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py @@ -26,78 +26,251 @@ from .infra import make_ethosu_conv2d, get_convolutional_args +def _create_serial_conv2d_params( + ifm_shape, + ifm_channels, + ofm_channels, + kernel_shape, + padding, + strides, + dilation, + activation="NONE", + ifm_layout="NHWC", + ofm_layout="NHWC", + rounding_mode="TFL", + upscale="NONE", +): + dtype = "int8" + dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1 + dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1 + upscale_factor = 2 if upscale != "NONE" else 1 + + if ifm_layout == "NHWC": + ifm_stride_c = 1 + ifm_stride_w = ifm_shape[3] + ifm_stride_h = ifm_shape[2] * ifm_shape[3] + ofm_height = ( + ifm_shape[1] * upscale_factor - dilated_kernel_h + padding[0] + padding[2] + ) // strides[0] + 1 + ofm_width = ( + ifm_shape[2] * upscale_factor - dilated_kernel_w + padding[1] + padding[3] + ) // strides[1] + 1 + else: + ifm_stride_w = 16 + ifm_stride_c = 16 * ifm_shape[3] + ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] + ofm_height = ( + ifm_shape[1] * upscale_factor - dilated_kernel_h + padding[0] + padding[2] + ) // strides[0] + 1 + ofm_width = ( + ifm_shape[3] * upscale_factor - dilated_kernel_w + padding[1] + padding[3] + ) // strides[1] + 1 + + if ofm_layout == "NHWC": + ofm_stride_c = 1 + ofm_stride_w = ofm_channels if ofm_width > 1 else 1 + ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1 + else: + ofm_stride_w = 16 + ofm_stride_c = 16 * ofm_width + ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1) + + return [ + dtype, + ifm_shape[1], + ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], + ifm_channels, + ifm_shape[1], + 0, + ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], + 0, + 0, + 0, + 0, + 0.5, + 10, + ifm_layout, + ifm_stride_h, + ifm_stride_w, + ifm_stride_c, + dtype, + ofm_height, + ofm_width, + ofm_channels, + ofm_height, + 0, + ofm_width, + 0, + 0, + 0, + 0, + 0.25, + 14, + ofm_layout, + ofm_stride_h, + ofm_stride_w, + ofm_stride_c, + kernel_shape[1], + kernel_shape[0], + strides[1], + strides[0], + dilation[1], + dilation[0], + 12, + padding[0], + padding[1], + padding[2], + padding[3], + activation, + 10 if activation == "CLIP" else 0, + 100 if activation == "CLIP" else 0, + rounding_mode, + upscale, + ] + + @pytest.mark.parametrize( "trial", [ - [(1, 8, 8, 3), 3, 16, (1, 1), (2, 1), (1, 1), (1, 1), "CLIP", "NHWC", "NHWC", "TFL"], - [(1, 8, 8, 3), 3, 16, (1, 1), (0, 0), (1, 1), (1, 1), "NONE", "NHWC", "NHWC", "NATURAL"], - [(1, 1, 1, 1), 1, 16, (1, 1), (0, 0), (1, 1), (1, 1), "CLIP", "NHWC", "NHWC", "TRUNCATE"], - [(1, 7, 9, 4), 4, 13, (3, 2), (1, 2), (2, 1), (1, 2), "NONE", "NHWC", "NHWC", "TFL"], + [ + (1, 8, 8, 3), + 3, + 16, + (1, 1), + (2, 1, 2, 1), + (1, 1), + (1, 1), + "CLIP", + "NHWC", + "NHWC", + "TFL", + "NONE", + ], + [ + (1, 8, 8, 3), + 3, + 16, + (1, 1), + (0, 0, 0, 0), + (1, 1), + (1, 1), + "NONE", + "NHWC", + "NHWC", + "NATURAL", + "NONE", + ], + [ + (1, 1, 1, 1), + 1, + 16, + (1, 1), + (0, 0, 0, 0), + (1, 1), + (1, 1), + "CLIP", + "NHWC", + "NHWC", + "TRUNCATE", + "NONE", + ], + [ + (1, 7, 9, 4), + 4, + 13, + (3, 2), + (1, 2, 1, 2), + (2, 1), + (1, 2), + "NONE", + "NHWC", + "NHWC", + "TFL", + "NONE", + ], [ (1, 8, 2, 8, 16), 18, 12, (1, 1), - (2, 1), + (2, 1, 2, 1), (1, 1), (1, 1), "CLIP", "NHCWB16", "NHWC", "NATURAL", + "ZEROS", ], [ (1, 7, 9, 4), 4, 71, (3, 2), - (1, 2), + (1, 2, 0, 2), (2, 1), (1, 2), "CLIP", "NHWC", "NHCWB16", "TRUNCATE", + "ZEROS", ], [ (1, 4, 12, 9, 16), 182, 67, (2, 3), - (6, 3), + (6, 3, 6, 2), (2, 2), (1, 1), "CLIP", "NHCWB16", "NHCWB16", "TFL", + "ZEROS", + ], + [ + (1, 7, 9, 4), + 4, + 13, + (3, 2), + (1, 2, 0, 3), + (2, 1), + (2, 2), + "CLIP", + "NHWC", + "NHWC", + "NATURAL", + "NEAREST", ], - [(1, 7, 9, 4), 4, 13, (3, 2), (1, 2), (2, 1), (2, 2), "CLIP", "NHWC", "NHWC", "NATURAL"], [ (1, 7, 9, 4), 4, 71, (3, 2), - (1, 2), + (1, 2, 0, 2), (2, 1), (2, 2), "CLIP", "NHWC", "NHCWB16", "TRUNCATE", + "NEAREST", ], [ (1, 13, 12, 19, 16), 182, 67, (1, 3), - (5, 3), + (5, 3, 2, 3), (2, 1), (2, 1), "CLIP", "NHCWB16", "NHCWB16", "TFL", + "NEAREST", ], ], ) @@ -114,6 +287,7 @@ def _get_func( ifm_layout, ofm_layout, rounding_mode, + upscale, ): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") conv = make_ethosu_conv2d( @@ -128,6 +302,7 @@ def _get_func( ifm_layout=ifm_layout, ofm_layout=ofm_layout, rounding_mode=rounding_mode, + upscale=upscale, ) func = relay.Function(relay.analysis.free_vars(conv), conv) func = run_opt_pass(func, relay.transform.InferType()) @@ -149,95 +324,8 @@ def _visit(stmt): data.append(get_convolutional_args(stmt, remove_constants=True)) tvm.tir.stmt_functor.post_order_visit(mod["main"].body, _visit) - ( - ifm_shape, - ifm_channels, - ofm_channels, - kernel_shape, - padding, - strides, - dilation, - activation, - ifm_layout, - ofm_layout, - rounding_mode, - ) = trial - dilated_kernel_h = (kernel_shape[0] - 1) * dilation[0] + 1 - dilated_kernel_w = (kernel_shape[1] - 1) * dilation[1] + 1 - if ifm_layout == "NHWC": - ifm_stride_c = 1 - ifm_stride_w = ifm_shape[3] - ifm_stride_h = ifm_shape[2] * ifm_shape[3] - ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 - ofm_width = (ifm_shape[2] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 - else: - ifm_stride_w = 16 - ifm_stride_c = 16 * ifm_shape[3] - ifm_stride_h = 16 * ifm_shape[2] * ifm_shape[3] - ofm_height = (ifm_shape[1] - dilated_kernel_h + padding[0] + padding[0]) // strides[0] + 1 - ofm_width = (ifm_shape[3] - dilated_kernel_w + padding[1] + padding[1]) // strides[1] + 1 - - if ofm_layout == "NHWC": - ofm_stride_c = 1 - ofm_stride_w = ofm_channels if ofm_width > 1 else 1 - ofm_stride_h = ofm_channels * ofm_width if ofm_height > 1 else 1 - else: - ofm_stride_w = 16 - ofm_stride_c = 16 * ofm_width - ofm_stride_h = 16 * ofm_width * ((ofm_channels - 1) // 16 + 1) - answer = [ - "int8", - ifm_shape[1], - ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], - ifm_channels, - ifm_shape[1], - 0, - ifm_shape[2] if ifm_layout == "NHWC" else ifm_shape[3], - 0, - 0, - 0, - 0, - 0.5, - 10, - ifm_layout, - ifm_stride_h, - ifm_stride_w, - ifm_stride_c, - "int8", - ofm_height, - ofm_width, - ofm_channels, - ofm_height, - 0, - ofm_width, - 0, - 0, - 0, - 0, - 0.25, - 14, - ofm_layout, - ofm_stride_h, - ofm_stride_w, - ofm_stride_c, - kernel_shape[1], - kernel_shape[0], - strides[1], - strides[0], - dilation[1], - dilation[0], - 12, - padding[0], - padding[1], - padding[0], - padding[1], - activation, - 10 if activation == "CLIP" else 0, - 100 if activation == "CLIP" else 0, - rounding_mode, - "NONE", - ] + answer = _create_serial_conv2d_params(*trial) assert data[0] == answer, data[0] @@ -318,6 +406,42 @@ def main(placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write_1: T.Bu T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, T.load("int8", placeholder_5.data, 256), 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, T.load("int8", ethosu_write_2, 0), 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, T.load("uint8", buffer, 0), 1456, 12, T.load("uint8", buffer_1, 0), 352, 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, T.load("int8", ethosu_write_2, 0), 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, T.load("int8", ethosu_write_1.data, 1024), 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, T.load("uint8", buffer_3, 0), 11040, 12, T.load("uint8", buffer_2, 0), 272, 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", dtype="handle")) __tvm_meta__ = None + + +@tvm.script.ir_module +class Conv2dDoubleCascade5: + @T.prim_func + def main(placeholder: T.Buffer[(1, 8, 8, 3), "int8"], ethosu_write: T.Buffer[(1, 32, 32, 8), "int8"]) -> None: + # function attr dict + T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) + buffer = T.buffer_var("uint8", "") + buffer_1 = T.buffer_var("uint8", "") + buffer_2 = T.buffer_var("uint8", "") + buffer_3 = T.buffer_var("uint8", "") + # body + ethosu_write_1 = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, T.load("int8", placeholder.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", buffer, 0), 160, 12, T.load("uint8", buffer_1, 0), 320, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", dtype="handle")) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, T.load("int8", ethosu_write.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", buffer_2, 0), 304, 12, T.load("uint8", buffer_3, 0), 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", dtype="handle")) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, T.load("int8", placeholder.data, 96), 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", buffer, 0), 160, 12, T.load("uint8", buffer_1, 0), 320, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", dtype="handle")) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, T.load("int8", ethosu_write.data, 4096), 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, T.load("uint8", buffer_2, 0), 304, 12, T.load("uint8", buffer_3, 0), 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", dtype="handle")) + __tvm_meta__ = None + + +@tvm.script.ir_module +class Conv2dDoubleCascade6: + @T.prim_func + def main(placeholder: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write: T.Buffer[(1, 32, 2, 32, 16), "int8"]) -> None: + # function attr dict + T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) + buffer = T.buffer_var("uint8", "") + buffer_1 = T.buffer_var("uint8", "") + buffer_2 = T.buffer_var("uint8", "") + buffer_3 = T.buffer_var("uint8", "") + # body + ethosu_write_1 = T.allocate([12288], "int8", "global", annotations={"disable_lower_builtin":True}) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 3, 8, 0, 8, T.load("int8", placeholder.data, 0), 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 16, 16, 35, 16, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 768, 16, 256, 3, 3, 1, 1, 1, 1, T.load("uint8", buffer, 0), 1456, 12, T.load("uint8", buffer_1, 0), 352, 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", dtype="handle")) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 35, 16, 0, 16, T.load("int8", ethosu_write_1, 0), 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 768, 16, 256, "int8", 32, 32, 26, 32, 0, 32, T.load("int8", ethosu_write.data, 0), 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 1024, 16, 512, 3, 3, 1, 1, 1, 1, T.load("uint8", buffer_2, 0), 11040, 12, T.load("uint8", buffer_3, 0), 272, 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", dtype="handle")) + __tvm_meta__ = None # fmt: on @@ -331,10 +455,11 @@ def main(placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write_1: T.Bu 32, 8, (1, 1), - (0, 0), + (0, 0, 0, 0), (1, 1), (1, 1), "NHWC", + "NONE", (1, 8, 4, 8), ], [ @@ -344,10 +469,11 @@ def main(placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write_1: T.Bu 32, 8, (3, 3), - (1, 1), + (1, 1, 1, 1), (1, 1), (1, 1), "NHWC", + "NONE", (1, 4, 8, 8), ], [ @@ -357,10 +483,11 @@ def main(placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write_1: T.Bu 32, 8, (3, 2), - (2, 1), + (2, 1, 2, 1), (1, 2), (1, 2), "NHWC", + "NONE", (1, 8, 4, 8), ], [ @@ -370,12 +497,41 @@ def main(placeholder_5: T.Buffer[(1, 8, 1, 8, 16), "int8"], ethosu_write_1: T.Bu 35, 26, (3, 3), - (1, 1), + (1, 1, 1, 1), (1, 1), (1, 1), "NHCWB16", + "NONE", (1, 4, 2, 8, 16), ], + [ + Conv2dDoubleCascade5, + (1, 8, 8, 3), + 3, + 32, + 8, + (1, 1), + (0, 0, 0, 0), + (1, 1), + (1, 1), + "NHWC", + "ZEROS", + (1, 16, 32, 8), + ], + [ + Conv2dDoubleCascade6, + (1, 8, 1, 8, 16), + 3, + 35, + 26, + (3, 3), + (1, 1, 1, 1), + (1, 1), + (1, 1), + "NHCWB16", + "NEAREST", + (1, 32, 2, 32, 16), + ], ], ) def test_conv2d_double_cascade(trial): @@ -389,6 +545,7 @@ def _get_func( strides, dilation, layout, + upscale, ): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") conv1 = make_ethosu_conv2d( @@ -402,6 +559,7 @@ def _get_func( activation="NONE", ifm_layout=layout, ofm_layout=layout, + upscale=upscale, ) conv2 = make_ethosu_conv2d( conv1, @@ -414,6 +572,7 @@ def _get_func( activation="NONE", ifm_layout=layout, ofm_layout=layout, + upscale=upscale, ) func = relay.Function(relay.analysis.free_vars(conv2), conv2) func = run_opt_pass(func, relay.transform.InferType()) diff --git a/tests/python/contrib/test_ethosu/test_type_inference.py b/tests/python/contrib/test_ethosu/test_type_inference.py index b25f92edc274..1f304c117ffd 100644 --- a/tests/python/contrib/test_ethosu/test_type_inference.py +++ b/tests/python/contrib/test_ethosu/test_type_inference.py @@ -91,6 +91,32 @@ def test_ethosu_conv2d_invalid_dtypes(ifm_dtype, weight_dtype, scale_bias_dtype) run_opt_pass(func, relay.transform.InferType()) +def test_ethosu_conv2d_invalid_upscale_method(): + invalid_upscale_method = "FOO" + ifm_channels = 55 + ofm_channels = 122 + kernel_shape = (3, 2) + padding = (0, 1, 2, 3) + strides = (1, 2) + dilation = (2, 1) + ifm = relay.var("ifm", shape=(1, 56, 72, 55), dtype="int8") + conv2d = make_ethosu_conv2d( + ifm, + ifm_channels, + ofm_channels, + kernel_shape, + padding, + strides, + dilation, + weight_dtype="int8", + scale_bias_dtype="uint8", + upscale=invalid_upscale_method, + ) + func = relay.Function([ifm], conv2d) + with pytest.raises(TVMError): + run_opt_pass(func, relay.transform.InferType()) + + @pytest.mark.parametrize( "ifm_shape, ifm_layout", [((1, 46, 71, 55), "NHWC"), ((1, 46, 4, 71, 16), "NHCWB16")] )