From d590490a8b28232ac73e173f3b18ce34c4ca9d10 Mon Sep 17 00:00:00 2001
From: Dhruv Chauhan <89972057+dchauhan-arm@users.noreply.github.com>
Date: Thu, 9 Dec 2021 13:28:26 +0000
Subject: [PATCH] [microNPU] Update Conv2D Tests to Use TF API to Gen Test
 Cases (#9508)

* Current conv2d tests compare the conv2d operator against tvm's execution of the default schedule of conv2d as defined in TOPI and that is not bitexact with tflite runtime's implemention. Therefore a tolerance of "1" in quantized 8-bit domain is used.

* Converts the current conv2d tests to use TensorFlow APIs to create a test cases for conv2D and compare against TFLite runtime.
---
 .../relay/backend/contrib/ethosu/__init__.py  |   1 -
 .../relay/backend/contrib/ethosu/errors.py    |  35 ---
 .../relay/backend/contrib/ethosu/legalize.py  |   3 -
 .../contrib/test_ethosu/relay_ir_builder.py   | 295 -----------------
 .../contrib/test_ethosu/test_codegen.py       | 297 +++++++++++-------
 .../contrib/test_ethosu/test_legalize.py      | 226 +++++++------
 6 files changed, 295 insertions(+), 562 deletions(-)
 delete mode 100644 python/tvm/relay/backend/contrib/ethosu/errors.py
 delete mode 100644 tests/python/contrib/test_ethosu/relay_ir_builder.py

diff --git a/python/tvm/relay/backend/contrib/ethosu/__init__.py b/python/tvm/relay/backend/contrib/ethosu/__init__.py
index ed04c202d8af..c4948d54dc26 100644
--- a/python/tvm/relay/backend/contrib/ethosu/__init__.py
+++ b/python/tvm/relay/backend/contrib/ethosu/__init__.py
@@ -18,7 +18,6 @@
 from . import util
 from . import legalize
 from . import preprocess
-from . import errors
 from . import codegen
 from . import vela_api
 from . import tir_to_cs_translator
diff --git a/python/tvm/relay/backend/contrib/ethosu/errors.py b/python/tvm/relay/backend/contrib/ethosu/errors.py
deleted file mode 100644
index 65f3711838be..000000000000
--- a/python/tvm/relay/backend/contrib/ethosu/errors.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# pylint: disable=super-init-not-called
-"""This module defines all error types associated with the Arm(R) Ethos(TM)-U NPU code generator."""
-
-
-class EthosUCodegenError(Exception):
-    """Base class for all exceptions related to code generation"""
-
-    def __init__(self, data):
-        self.message = "EthosUCodegenError:" + data
-
-    def __str__(self):
-        return self.message
-
-
-class UnsupportedLayout(EthosUCodegenError):
-    """Raised when unsupported layout is encountered during code generation."""
-
-    def __init__(self, layout):
-        super().__init__(f"Unsupported Layout {layout}")
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
index b2264f32611e..0db8db912a51 100644
--- a/python/tvm/relay/backend/contrib/ethosu/legalize.py
+++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -30,7 +30,6 @@
 from tvm.relay.dataflow_pattern import rewrite
 from tvm.relay.dataflow_pattern import CallPattern
 from tvm.relay.backend.contrib.ethosu import op as ethosu_ops  # type: ignore
-from tvm.relay.backend.contrib.ethosu.errors import UnsupportedLayout  # type: ignore
 from tvm.relay.backend.contrib.ethosu import vela_api
 from tvm.relay.backend.contrib.ethosu import util
 from tvm.relay.op.contrib import ethosu as ethosu_patterns  # type: ignore
@@ -266,8 +265,6 @@ def callback(
         channels_map = {
             "NHWC": 3,
         }
-        if str(params.ofm.layout) not in channels_map.keys():
-            raise UnsupportedLayout(str(params.ofm.layout))
         kernel_size_map = {
             "HWIO": params.weights.shape[0:2],
             "OHWI": params.weights.shape[1:3],
diff --git a/tests/python/contrib/test_ethosu/relay_ir_builder.py b/tests/python/contrib/test_ethosu/relay_ir_builder.py
deleted file mode 100644
index 6169a3e46520..000000000000
--- a/tests/python/contrib/test_ethosu/relay_ir_builder.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Helper module to build relay operations for testing"""
-
-from pathlib import Path
-import numpy as np
-import math
-
-import tvm
-from tvm import relay
-from tvm.relay.op.contrib import get_pattern_table
-from tvm.relay import qnn
-from tvm.relay.backend.contrib.ethosu.util import get_range_for_dtype_str
-
-
-class TensorType:
-    """A data structure to capture tensor parameters"""
-
-    def __init__(self):
-        self.shape = None
-        self.dtype = None
-        self.zp = None
-        self.sc = None
-        self.layout = None
-
-    def get_dim_size(self, dim):
-        for idx, char in enumerate(self.layout):
-            if dim == char:
-                return self.shape[idx]
-        return None
-
-    def get_dim_index(self, dim):
-        for idx, char in enumerate(self.layout):
-            if dim == char:
-                return idx
-        return None
-
-
-class QnnConv2DParams:
-    """A data structure to capture relay.qnn.op.conv2D parameters"""
-
-    def __init__(self, dtype):
-        self.ifm = TensorType()
-        self.ofm = TensorType()
-        self.kernel = TensorType()
-
-        # default values
-        self.ifm.dtype = dtype
-        self.ifm.layout = "NHWC"
-        ifm_min, ifm_max = get_range_for_dtype_str(self.ifm.dtype)
-        self.ifm.zp = relay.const(np.random.randint(ifm_min, ifm_max), "int32")
-        self.ifm.sc = relay.const(np.random.random() * 2, "float32")
-        self.kernel.dtype = dtype
-        self.kernel.layout = "HWIO"
-        kernel_min, kernel_max = get_range_for_dtype_str(self.kernel.dtype)
-        self.kernel.zp = relay.const(np.random.randint(kernel_min, kernel_max), "int32")
-        self.kernel.sc = relay.const(np.random.random() * 2, "float32")
-        self.ofm.layout = "NHWC"
-        self.ofm.dtype = dtype
-        ofm_min, ofm_max = get_range_for_dtype_str(self.ofm.dtype)
-        self.ofm.zp = relay.const(np.random.randint(ofm_min, ofm_max), "int32")
-        self.ofm.sc = relay.const(np.random.random() * 2, "float32")
-        self.dilation = (1, 1)
-
-        self.strides = None
-        self.pad = None
-        self.activation = "NONE"
-        self.clip_min = 0
-        self.clip_max = 0
-
-    def update_output_qnn_params(
-        self, input_dtype="uint8", kernel_dtype="uint8", output_dtype="uint8"
-    ):
-        _, dtype_max = get_range_for_dtype_str(input_dtype)
-        input_max = self.ifm.sc.data.asnumpy() * (dtype_max - self.ifm.zp.data.asnumpy())
-        input_min = -self.ifm.sc.data.asnumpy() * self.ifm.zp.data.asnumpy()
-        _, dtype_max = get_range_for_dtype_str(kernel_dtype)
-        kernel_max = np.max(
-            self.kernel.sc.data.asnumpy() * (dtype_max - self.kernel.zp.data.asnumpy())
-        )
-        kernel_min = np.min(-self.kernel.sc.data.asnumpy() * self.kernel.zp.data.asnumpy())
-        kernel_h = self.kernel.get_dim_size("H")
-        kernel_w = self.kernel.get_dim_size("W")
-        channels = self.kernel.get_dim_size("I")
-        output_limits = [
-            kernel_max * kernel_h * kernel_w * channels * input_max,
-            kernel_min * kernel_h * kernel_w * channels * input_max,
-            kernel_min * kernel_h * kernel_w * channels * input_min,
-            kernel_max * kernel_h * kernel_w * channels * input_min,
-        ]
-        output_max = max(output_limits)
-        output_min = min(output_limits)
-        dtype_min, dtype_max = get_range_for_dtype_str(input_dtype)
-        self.ofm.sc = relay.const((output_max - output_min) / (dtype_max - dtype_min), "float32")
-        self.ofm.zp = relay.const(-int(output_min / self.ofm.sc.data.asnumpy()), "int32")
-
-
-class PoolingParams:
-    """A data structure to capture relay.op.max_pool2d /
-    relay.op.avg_pool2d parameters
-    """
-
-    def __init__(self, dtype):
-        self.type = None
-        self.size = None
-        self.strides = None
-        self.pad = None
-        self.layout = None
-        self.ifm = TensorType()
-        self.ofm = TensorType()
-
-        # default values
-        self.ifm.dtype = dtype
-        self.ifm.layout = "NHWC"
-        self.ifm.zp = relay.const(np.random.randint(0, 255), "int32")
-        self.ifm.sc = relay.const(np.random.random() * 2, "float32")
-        self.ofm.zp = relay.const(np.random.randint(0, 255), "int32")
-        self.ofm.sc = relay.const(np.random.random() * 2, "float32")
-        self.ofm.dtype = dtype
-        self.dilation = (1, 1)
-
-
-class AddParams:
-    """A data structure to capture relay.qnn.op.add parameters"""
-
-    def __init__(self, dtype):
-        self.ifm0 = TensorType()
-        self.ifm1 = TensorType()
-        self.ofm = TensorType()
-
-        # default values
-        self.ifm0.dtype = dtype
-        self.ifm0.zp = relay.const(np.random.randint(0, 255), "int32")
-        self.ifm0.sc = relay.const(np.random.random() * 2, "float32")
-        self.ifm1.dtype = dtype
-        self.ifm1.zp = relay.const(np.random.randint(0, 255), "int32")
-        self.ifm1.sc = relay.const(np.random.random() * 2, "float32")
-        self.update_output_qnn_params()
-        self.ofm.dtype = dtype
-
-    def update_output_qnn_params(self):
-        ti = np.iinfo(self.ifm0.dtype)
-        dtype_min, dtype_max = int(ti.min), int(ti.max)
-        input1_max = self.ifm0.sc.data.asnumpy() * (dtype_max - self.ifm0.zp.data.asnumpy())
-        input1_min = (dtype_min - self.ifm0.sc.data.asnumpy()) * self.ifm0.zp.data.asnumpy()
-        input2_max = self.ifm1.sc.data.asnumpy() * (dtype_max - self.ifm1.zp.data.asnumpy())
-        input2_min = (dtype_min - self.ifm1.sc.data.asnumpy()) * self.ifm1.zp.data.asnumpy()
-        output_max = input1_max + input2_max
-        output_min = input1_min + input2_min
-        self.ofm.sc = relay.const((output_max - output_min) / dtype_max, "float32")
-        self.ofm.zp = relay.const(
-            (dtype_min - int(output_min / self.ofm.sc.data.asnumpy())), "int32"
-        )
-
-
-def get_pad_value(data, kernel, stride):
-    """Get the pad tuple of value for SAME padding"""
-
-    out = int(math.ceil(float(data) / float(stride)))
-    pad = max(0, (out - 1) * stride + kernel - data)
-    pad_before = pad // 2
-    pad_after = pad - pad_before
-    return pad_before, pad_after
-
-
-def create_qnn_conv2d(qnn_conv2d_params, ifm_expr):
-    """Create a relay.Expr of relay.qnn.conv2D given the parameters"""
-    v_params = list()
-    params = {
-        "kernel_size": [
-            qnn_conv2d_params.kernel.get_dim_size("H"),
-            qnn_conv2d_params.kernel.get_dim_size("W"),
-        ],
-        "strides": [qnn_conv2d_params.strides[0], qnn_conv2d_params.strides[1]],
-        "dilation": [qnn_conv2d_params.dilation[0], qnn_conv2d_params.dilation[1]],
-        "padding": [0, 0, 0, 0],
-        "data_layout": qnn_conv2d_params.ifm.layout,
-    }
-    dilated_kernel_h = (
-        qnn_conv2d_params.dilation[0] * (qnn_conv2d_params.kernel.get_dim_size("H") - 1) + 1
-    )
-    dilated_kernel_w = (
-        qnn_conv2d_params.dilation[1] * (qnn_conv2d_params.kernel.get_dim_size("W") - 1) + 1
-    )
-    if qnn_conv2d_params.pad == "SAME":
-        pad_top, pad_bottom = get_pad_value(
-            qnn_conv2d_params.ifm.get_dim_size("H"), dilated_kernel_h, qnn_conv2d_params.strides[0]
-        )
-        pad_left, pad_right = get_pad_value(
-            qnn_conv2d_params.ifm.get_dim_size("W"), dilated_kernel_w, qnn_conv2d_params.strides[1]
-        )
-        do_pad = not (pad_top == 0 and pad_bottom == 0 and pad_left == 0 and pad_right == 0)
-        if do_pad:
-            params["padding"] = [pad_top, pad_left, pad_bottom, pad_right]
-    qnn_conv2d_params.pad = params["padding"]
-    params["input_zero_point"] = qnn_conv2d_params.ifm.zp
-    params["kernel_zero_point"] = qnn_conv2d_params.kernel.zp
-    params["out_dtype"] = "int32"
-    params["input_scale"] = qnn_conv2d_params.ifm.sc
-    params["kernel_scale"] = qnn_conv2d_params.kernel.sc
-    params["channels"] = int(qnn_conv2d_params.kernel.get_dim_size("O"))
-    params["kernel_layout"] = qnn_conv2d_params.kernel.layout
-    k_shape = qnn_conv2d_params.kernel.shape
-    k_dtype = qnn_conv2d_params.kernel.dtype
-    w = tvm.nd.array(
-        np.random.randint(
-            np.iinfo(k_dtype).min, high=np.iinfo(k_dtype).max, size=k_shape, dtype=k_dtype
-        )
-    )
-    weight_expr = relay.const(w, k_dtype)
-    v_params.append(w)
-    qnn_conv2d_expr = qnn.op.conv2d(ifm_expr, weight_expr, **params)
-    b = tvm.nd.array(
-        np.random.randint(
-            0, high=10, size=(qnn_conv2d_params.kernel.get_dim_size("O")), dtype="int32"
-        )
-    )
-    v_params.append(b)
-    bias_expr = relay.const(b, "int32")
-    bias = relay.nn.bias_add(
-        qnn_conv2d_expr, bias_expr, axis=qnn_conv2d_params.ifm.get_dim_index("C")
-    )
-    bias_scale = relay.const(
-        qnn_conv2d_params.ifm.sc.data.asnumpy() * qnn_conv2d_params.kernel.sc.data.asnumpy(),
-        "float32",
-    )
-    req_expr = relay.qnn.op.requantize(
-        bias,
-        bias_scale,  # input zero scale
-        relay.const(0, "int32"),  # input zero point
-        qnn_conv2d_params.ofm.sc,  # output zero scale
-        qnn_conv2d_params.ofm.zp,  # output zero point
-        out_dtype=qnn_conv2d_params.ofm.dtype,
-    )
-    if qnn_conv2d_params.activation != "NONE":
-        assert qnn_conv2d_params.activation == "CLIP"
-        clip_expr = relay.clip(req_expr, qnn_conv2d_params.clip_min, qnn_conv2d_params.clip_max)
-        return clip_expr, v_params
-
-    return req_expr, v_params
-
-
-def create_pool2d(pooling_params, ifm_expr):
-    """Create a relay pooling operation"""
-    assert pooling_params.ifm.layout == "NHWC"
-    params = {
-        "pool_size": (pooling_params.size[0], pooling_params.size[1]),
-        "strides": (pooling_params.strides[0], pooling_params.strides[1]),
-        "padding": [0, 0],
-        "layout": "NHWC",
-    }
-    if pooling_params.pad == "SAME":
-        pad_top, pad_bottom = get_pad_value(
-            pooling_params.ifm.shape[1], pooling_params.size[0], pooling_params.strides[0]
-        )
-        pad_left, pad_right = get_pad_value(
-            pooling_params.ifm.shape[2], pooling_params.size[1], pooling_params.strides[1]
-        )
-        params["padding"] = [pad_top, pad_left, pad_bottom, pad_right]
-    if pooling_params.type == "MAX":
-        out = relay.op.nn.max_pool2d(ifm_expr, **params)
-    else:
-        assert pooling_params.type == "AVG"
-        out = relay.op.cast(ifm_expr, dtype="int32")
-        out = relay.op.nn.avg_pool2d(out, **params)
-        out = relay.op.cast(out, dtype=pooling_params.ofm.dtype)
-    return out
-
-
-def create_qnn_add(ifm0_expr, ifm1_expr, add_params):
-    add = relay.qnn.op.add(
-        lhs=ifm0_expr,
-        rhs=ifm1_expr,
-        lhs_scale=add_params.ifm0.sc,
-        lhs_zero_point=add_params.ifm0.zp,
-        rhs_scale=add_params.ifm1.sc,
-        rhs_zero_point=add_params.ifm1.zp,
-        output_scale=add_params.ofm.sc,
-        output_zero_point=add_params.ofm.zp,
-    )
-    return add
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
index 21e86c866512..0707ec27ca27 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -18,22 +18,25 @@
 import pytest
 
 pytest.importorskip("ethosu.vela")
+
 import numpy as np
 import tflite.Model
 
 import tvm
 import tensorflow as tf
 from tvm import relay
+
 from tvm.relay.expr_functor import ExprMutator
 from tvm.relay.op.annotation import compiler_begin, compiler_end
 from tvm.relay.backend.contrib.ethosu import util
 from tvm.relay.backend.contrib.ethosu import preprocess
+
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tests.python.relay.aot.aot_test_utils import generate_ref_data
 
-from . import relay_ir_builder
 from . import infra
 
+
 ACCEL_TYPES = ["ethos-u55-256", "ethos-u55-128", "ethos-u55-64", "ethos-u55-32"]
 
 
@@ -51,122 +54,192 @@ def get_shape_expr(in_expr, out_expr):
     return shape
 
 
-@pytest.mark.parametrize(
-    "accel_type",
-    ACCEL_TYPES,
-)
-def test_ethosu_conv2d(accel_type):
-    def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
-        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
-        c1_params.strides = (1, 1)
-        c1_params.pad = "VALID"
-        c1_params.update_output_qnn_params(
-            input_tensor_dtype, input_tensor_dtype, input_tensor_dtype
-        )
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        f = relay.Function([input0], c1)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c1_params]
-
-    def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
-        c1_params.strides = (2, 2)
-        c1_params.pad = "VALID"
-        c1_params.update_output_qnn_params(
-            input_tensor_dtype, input_tensor_dtype, input_tensor_dtype
-        )
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c2_params.ifm.shape = c1_params.ofm.shape
-        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
-        c2_params.strides = (1, 1)
-        c2_params.pad = "SAME"
-        c2_params.update_output_qnn_params()
-        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
-        c2_params.ofm.shape = get_shape_expr(input0, c2)
-
-        f = relay.Function([input0], c2)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c2_params, c1_params]
-
-    def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_dtype):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
-        c1_params.strides = (2, 2)
-        c1_params.pad = "VALID"
-        c1_params.activation = "CLIP"
-        c1_params.clip_min = 90
-        c1_params.clip_max = 110
-        c1_params.update_output_qnn_params(
-            input_tensor_dtype, input_tensor_dtype, input_tensor_dtype
-        )
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c2_params.ifm.shape = c1_params.ofm.shape
-        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
-        c2_params.strides = (1, 1)
-        c2_params.pad = "SAME"
-        c2_params.update_output_qnn_params()
-        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
-        c2_params.ofm.shape = get_shape_expr(input0, c2)
-
-        f = relay.Function([input0], c2)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c2_params, c1_params]
-
-    test_cases = [
-        (create_graph_single, ["input", (1, 300, 300, 3), "int8"]),
-        (create_graph_double, ["input", (1, 128, 256, 4), "int8"]),
-        (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]),
-    ]
-    np.random.seed(42)
-    for test_case in test_cases:
-        relay_module, conv_params = test_case[0](*test_case[1])
-        input_tensor, input_shape, input_dtype = test_case[1]
-        mod = partition_for_ethosu(relay_module)
-
-        # Generate reference data
-        in_min, in_max = util.get_range_for_dtype_str(input_dtype)
-        input_data = {
-            input_tensor: np.random.randint(
-                in_min, high=in_max, size=input_shape, dtype=input_dtype
-            )
-        }
-        output_data = generate_ref_data(relay_module, input_data)
+@pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 3), (1, 55, 55, 3)])
+@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
+@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
+@pytest.mark.parametrize("activation", ["NONE", "RELU"])
+def test_ethosu_conv2d_single(
+    ifm_shape,
+    kernel_shape,
+    strides,
+    dilation,
+    padding,
+    accel_type,
+    activation,
+):
+    dtype = "int8"
+
+    def create_tflite_graph_single():
+        class Model(tf.Module):
+            @tf.function
+            def tf_function(self, x):
+                # Use tf.nn API to create the model
+                tf_strides = [1, strides[0], strides[1], 1]
+                op = tf.nn.conv2d(
+                    x,
+                    filters=tf.constant(
+                        np.random.uniform(size=[kernel_shape[0], kernel_shape[1], 3, 3]),
+                        dtype=tf.float32,
+                    ),
+                    strides=tf_strides,
+                    padding=padding,
+                    dilations=dilation,
+                )
+                if activation:
+                    op = tf.nn.relu(op)
+                return op
 
-        compiled_models = infra.build_source(
-            mod, input_data, output_data, accel_type, output_tolerance=1
+        model = Model()
+        concrete_func = model.tf_function.get_concrete_function(
+            tf.TensorSpec(ifm_shape, dtype=tf.float32)
         )
 
-        # Assumes only two runtime.Modules are created -- i.e. single offload module
-        ethosu_module = (
-            compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+        # Convert the model
+        def representative_dataset():
+            for _ in range(100):
+                data = np.random.rand(*tuple(ifm_shape))
+                yield [data.astype(np.float32)]
+
+        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
+        converter.optimizations = [tf.lite.Optimize.DEFAULT]
+        converter.representative_dataset = representative_dataset
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        tflite_model = converter.convert()
+        return tflite_model
+
+    tflite_graph = create_tflite_graph_single()
+    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+
+    relay_module, params = relay.frontend.from_tflite(
+        tflite_model,
+        shape_dict={"input": ifm_shape},
+        dtype_dict={"input": dtype},
+    )
+    mod = partition_for_ethosu(relay_module, params)
+
+    # Generate reference data
+    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
+
+    compiled_models = infra.build_source(
+        mod,
+        input_data,
+        output_data,
+        accel_type,
+    )
+
+    # Assumes only two runtime.Modules are created -- i.e. single offload module
+    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+
+    # Verify generated C source
+    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
+    compilation_artifacts = get_artifacts(ethosu_module)
+    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
+    infra.print_payload(cmms)
+    infra.verify_source(compiled_models, accel_type)
+
+
+@pytest.mark.parametrize("ifm_shape", [(1, 214, 227, 3), (1, 27, 42, 3)])
+@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
+@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
+@pytest.mark.parametrize("activation", ["NONE", "RELU"])
+def test_ethosu_conv2d_double(
+    ifm_shape,
+    kernel_shape,
+    strides,
+    dilation,
+    padding,
+    accel_type,
+    activation,
+):
+    dtype = "int8"
+
+    def create_tflite_graph_double():
+        class Model(tf.Module):
+            @tf.function
+            def tf_function_double(self, x):
+                # Use tf.nn API to create the model with two convolutions
+                op = tf.nn.conv2d(
+                    x,
+                    filters=tf.constant(
+                        np.random.uniform(size=[kernel_shape[0], kernel_shape[1], 3, 3]),
+                        dtype=tf.float32,
+                    ),
+                    strides=strides,
+                    padding=padding,
+                    data_format="NHWC",
+                    dilations=dilation,
+                )
+                # Second convolution
+                op2 = tf.nn.conv2d(
+                    op,
+                    filters=tf.constant(
+                        np.random.uniform(size=(kernel_shape[0], kernel_shape[1], 3, 3)),
+                        dtype=tf.float32,
+                    ),
+                    strides=strides,
+                    padding=padding,
+                    data_format="NHWC",
+                    dilations=dilation,
+                )
+                if activation:
+                    op2 = tf.nn.relu(op2)
+                return op2
+
+        model = Model()
+        concrete_func = model.tf_function_double.get_concrete_function(
+            tf.TensorSpec(ifm_shape, dtype=tf.float32)
         )
 
-        # Verify generated C source
-        get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-        compilation_artifacts = get_artifacts(ethosu_module)
-        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-        infra.print_payload(cmms)
-        infra.verify_source(compiled_models, accel_type)
+        # Convert the model
+        def representative_dataset():
+            for _ in range(100):
+                data = np.random.rand(*tuple(ifm_shape))
+                yield [data.astype(np.float32)]
+
+        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
+        converter.optimizations = [tf.lite.Optimize.DEFAULT]
+        converter.representative_dataset = representative_dataset
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        tflite_model = converter.convert()
+        return tflite_model
+
+    tflite_graph = create_tflite_graph_double()
+    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+
+    relay_module, params = relay.frontend.from_tflite(
+        tflite_model,
+        shape_dict={"input": ifm_shape},
+        dtype_dict={"input": dtype},
+    )
+    mod = partition_for_ethosu(relay_module, params)
+
+    # Generate reference data
+    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
+
+    compiled_models = infra.build_source(
+        mod,
+        input_data,
+        output_data,
+        accel_type,
+    )
+
+    # Assumes only two runtime.Modules are created -- i.e. single offload module
+    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+
+    # Verify generated C source
+    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
+    compilation_artifacts = get_artifacts(ethosu_module)
+    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
+    infra.print_payload(cmms)
+    infra.verify_source(compiled_models, accel_type)
 
 
 def _compare_ethosu_with_reference(
diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py
index 946aa951679b..9dc94d96fb27 100644
--- a/tests/python/contrib/test_ethosu/test_legalize.py
+++ b/tests/python/contrib/test_ethosu/test_legalize.py
@@ -19,6 +19,8 @@
 import pytest
 
 pytest.importorskip("ethosu.vela")
+
+import math
 import numpy as np
 import tensorflow as tf
 import tflite.Model
@@ -31,7 +33,6 @@
 from tvm.relay.backend.contrib.ethosu import util
 from tvm.relay.build_module import bind_params_by_name
 
-from . import relay_ir_builder
 from . import infra
 
 
@@ -229,128 +230,121 @@ def get_shape_expr(in_expr, out_expr):
 }
 
 
-def test_ethosu_conv2d_legalize():
-    def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
-        c1_params.strides = (1, 1)
-        c1_params.pad = "VALID"
-        c1_params.activation = "CLIP"
-        c1_params.clip_min = 23
-        c1_params.clip_max = 180
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        f = relay.Function([input0], c1)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c1_params]
-
-    def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
-        c1_params.strides = (2, 2)
-        c1_params.pad = "VALID"
-        c1_params.activation = "CLIP"
-        c1_params.clip_min = 10
-        c1_params.clip_max = 240
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c2_params.ifm.shape = c1_params.ofm.shape
-        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
-        c2_params.strides = (1, 1)
-        c2_params.pad = "SAME"
-        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
-        c2_params.ofm.shape = get_shape_expr(input0, c2)
-
-        f = relay.Function([input0], c2)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c2_params, c1_params]
+@pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 3), (1, 55, 55, 3)])
+@pytest.mark.parametrize("kernel_shape", [(3, 2), (1, 3)])
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 1)), ((3, 2), (1, 1))])
+@pytest.mark.parametrize("activation", [None, "RELU"])
+def test_tflite_conv2d_legalize(ifm_shape, kernel_shape, padding, strides, dilation, activation):
+    dtype = "int8"
 
-    def verify_tensor(tensor_type, expr):
-        assert list(tensor_type.shape) == list(expr.checked_type.shape)
-        assert str(tensor_type.dtype) == str(expr.checked_type.dtype)
+    def create_tflite_graph_single():
+        class Model(tf.Module):
+            @tf.function
+            def tf_function(self, input_shape):
+                op = tf.nn.conv2d(
+                    input_shape,
+                    filters=tf.constant(
+                        np.random.uniform(size=(kernel_shape[0], kernel_shape[1], 3, 3)),
+                        dtype=tf.float32,
+                    ),
+                    strides=strides,
+                    padding=padding,
+                    data_format="NHWC",
+                    dilations=dilation,
+                )
+                if activation:
+                    op = tf.nn.relu(op)
+                return op
 
-    def verify_linear(ext_func, conv2d_params):
+        model = Model()
+        concrete_func = model.tf_function.get_concrete_function(
+            tf.TensorSpec(ifm_shape, dtype=tf.float32)
+        )
+        # Convert the model
+        def representative_dataset():
+            for _ in range(100):
+                data = np.random.rand(*tuple(ifm_shape))
+                yield [data.astype(np.float32)]
+
+        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
+        converter.optimizations = [tf.lite.Optimize.DEFAULT]
+        converter.representative_dataset = representative_dataset
+        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+        converter.inference_input_type = tf.int8
+        converter.inference_output_type = tf.int8
+        tflite_model = converter.convert()
+        return tflite_model
+
+    def verify(ext_func):
         op = ext_func.body
-        for param in conv2d_params:
-            verify_tensor(param.ifm, op.args[0])
-            verify_tensor(param.ofm, op)
-
-            # This will be in OHWI layout
-            weights_ohwi = op.args[1].data.asnumpy()
-            weights_layout = str(param.kernel.layout)
-            weights = np.transpose(weights_ohwi, INVERSE_LAYOUT_TRANSFORM_OHWI_MAP[weights_layout])
-            assert weights.shape == param.kernel.shape
-            assert weights.dtype == param.kernel.dtype
-
-            assert list(op.args[2].checked_type.shape)[0] == weights_ohwi.shape[0]
-
-            assert float(op.attrs.ifm_scale) == float(param.ifm.sc.data.asnumpy())
-            assert int(op.attrs.ifm_zero_point) == int(param.ifm.zp.data.asnumpy())
-            assert int(op.attrs.weight_zero_point) == int(param.kernel.zp.data.asnumpy())
-            assert float(op.attrs.ofm_scale) == float(param.ofm.sc.data.asnumpy())
-            assert int(op.attrs.ofm_zero_point) == int(param.ofm.zp.data.asnumpy())
-            assert int(op.attrs.ofm_channels) == int(weights_ohwi.shape[0])
-            assert list(op.attrs.padding) == list(param.pad)
-            assert list(op.attrs.strides) == list(param.strides)
-            assert list(op.attrs.dilation) == list(param.dilation)
-            assert str(op.attrs.activation) == str(param.activation)
-            assert int(op.attrs.clip_min) == int(param.clip_min)
-            assert int(op.attrs.clip_max) == int(param.clip_max)
-            op = op.args[0]
+        ofm_channels = op.attrs.ofm_channels
 
-    test_cases = [
-        (create_graph_single, ["input", (1, 299, 299, 3), "uint8"]),
-        (create_graph_double, ["input", (1, 128, 256, 4), "uint8"]),
-    ]
-    for test_case in test_cases:
-        mod, conv_params = test_case[0](*test_case[1])
-        mod = ethosu.partition_for_ethosu(mod)
-        mod = legalize.LegalizeConv2D()(mod)
-        verify_linear(mod["tvmgen_default_ethos_u_main_0"], conv_params)
-
-
-def test_ethosu_conv2d_legalize_errors():
-    def create_graph_single_unsupported_ifm_layout(
-        input_tensor_name, input_tensor_shape, input_tensor_dtype
-    ):
-        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
-        c1_params.ifm.shape = input_tensor_shape
-        c1_params.ifm.layout = "NCHW"
-        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[1], 32)
-        c1_params.strides = (1, 1)
-        c1_params.pad = "VALID"
-        c1_params.activation = "CLIP"
-        c1_params.clip_min = 23
-        c1_params.clip_max = 180
-        input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype)
-        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
-        c1_params.ofm.shape = get_shape_expr(input0, c1)
-
-        f = relay.Function([input0], c1)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod, [c1_params]
+        # check IFM
+        ifm = op.args[0].checked_type
+        assert list(ifm.shape) == list(ifm_shape)
+        assert str(ifm.dtype) == dtype
+        assert ifm.shape[3] == ofm_channels
+
+        # check OFM
+        ofm = op.checked_type
+        expected_ofm_shape = infra.compute_ofm_shape(
+            ifm_shape, padding, kernel_shape, strides, dilation
+        )
+        assert list(ofm.shape) == list(expected_ofm_shape)
+        assert str(ofm.dtype) == dtype
+        assert ofm.shape[3] == ofm_channels
+
+        # check weights
+        weights_ohwi = op.args[1].data.asnumpy()
+        assert str(weights_ohwi.dtype) == dtype
+        assert weights_ohwi.shape[0] == ofm_channels
+        assert weights_ohwi.shape[1] == kernel_shape[0]
+        assert weights_ohwi.shape[2] == kernel_shape[1]
+        assert weights_ohwi.shape[3] == 3
 
-    test_cases = [
-        (create_graph_single_unsupported_ifm_layout, ["input", (1, 3, 299, 299), "uint8"]),
+        # Check that scale_bias matches weight tensor
+        assert list(op.args[2].checked_type.shape)[0] == ofm_channels
+
+        expected_padding = infra.compute_padding_shape(
+            ifm_shape,
+            expected_ofm_shape,
+            padding,
+            (kernel_shape[0], kernel_shape[1]),
+            strides,
+            dilation,
+        )
+        assert list(op.attrs.padding) == list(expected_padding)
+        assert list(op.attrs.strides) == list(strides)
+        assert list(op.attrs.dilation) == list(dilation)
+        if activation == "RELU":
+            assert str(op.attrs.activation) == "CLIP"
+
+    conv2d_pattern_table = [
+        (
+            ethosu.QnnConv2DParams.composite_name,
+            ethosu.qnn_conv2d_pattern(),
+            lambda pat: ethosu.QnnConv2DParams(pat).is_valid(),
+        )
     ]
 
-    for test_case in test_cases:
-        mod, conv_params = test_case[0](*test_case[1])
-        mod = ethosu.partition_for_ethosu(mod)
-        with pytest.raises(
-            tvm._ffi.base.TVMError, match="EthosUCodegenError: Unsupported Layout NCHW"
-        ):
-            mod = legalize.LegalizeConv2D()(mod)
+    tflite_graph = create_tflite_graph_single()
+    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
+
+    mod, conv_params = relay.frontend.from_tflite(
+        tflite_model,
+        shape_dict={"input": ifm_shape},
+        dtype_dict={"input": dtype},
+    )
+
+    mod["main"] = bind_params_by_name(mod["main"], conv_params)
+    mod = partition_ethosu_by_table(mod, conv2d_pattern_table)
+
+    mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite(
+        legalize.Conv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"]
+    )
+
+    verify(mod["tvmgen_default_ethos_u_main_0"])
 
 
 @pytest.mark.parametrize("ifm_shape", [(1, 299, 299, 3), (1, 123, 17, 7)])