From b8c934721446829ba32b1109f493dfcf89e427bd Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 10:08:52 +0900
Subject: [PATCH 01/10] begin nhwc roi align

---
 python/tvm/relay/op/strategy/cuda.py     | 19 +++--
 python/tvm/relay/op/strategy/generic.py  | 19 +++--
 python/tvm/relay/op/strategy/x86.py      | 25 +++----
 python/tvm/topi/vision/rcnn/roi_align.py | 89 +++++++++++++++++++++++-
 4 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index 032d2dd2c8f1..a7815f94a4d2 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -945,12 +945,19 @@ def roi_align_strategy_cuda(attrs, inputs, out_type, target):
     """roi_align cuda strategy"""
     strategy = _op.OpStrategy()
     layout = attrs.layout
-    assert layout == "NCHW", "only support nchw for now"
-    strategy.add_implementation(
-        wrap_compute_roi_align(topi.vision.rcnn.roi_align_nchw),
-        wrap_topi_schedule(topi.cuda.schedule_roi_align),
-        name="roi_align_nchw.cuda",
-    )
+
+    if layout == "NCHW":
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.vision.rcnn.roi_align_nchw),
+            wrap_topi_schedule(topi.cuda.schedule_roi_align),
+            name="roi_align_nchw.cuda",
+        )
+    else:
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.vision.rcnn.roi_align_nhwc),
+            wrap_topi_schedule(topi.cuda.schedule_roi_align),
+            name="roi_align_nhwc.cuda",
+        )
     return strategy
 
 
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index e744b8c9da83..508403c044ae 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -1039,7 +1039,6 @@ def wrap_compute_roi_align(topi_compute):
     """wrap roi_align topi compute"""
 
     def _compute_roi_align(attrs, inputs, out_type):
-        assert attrs.layout == "NCHW"
         pooled_size = get_const_tuple(attrs.pooled_size)
         mode = bytes(attrs.mode, "utf-8")
         return [
@@ -1061,12 +1060,18 @@ def roi_align_strategy(attrs, inputs, out_type, target):
     """roi_align generic strategy"""
     strategy = _op.OpStrategy()
     layout = attrs.layout
-    assert layout == "NCHW", "only support nchw for now"
-    strategy.add_implementation(
-        wrap_compute_roi_align(topi.vision.rcnn.roi_align_nchw),
-        wrap_topi_schedule(topi.generic.schedule_roi_align),
-        name="roi_align.generic",
-    )
+    if layout == "NCHW":
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.vision.rcnn.roi_align_nchw),
+            wrap_topi_schedule(topi.generic.schedule_roi_align),
+            name="roi_align.generic",
+        )
+    else:
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.vision.rcnn.roi_align_nhwc),
+            wrap_topi_schedule(topi.generic.schedule_roi_align),
+            name="roi_align.generic",
+        )
     return strategy
 
 
diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py
index f33c45b248d6..b988bdecf4c9 100644
--- a/python/tvm/relay/op/strategy/x86.py
+++ b/python/tvm/relay/op/strategy/x86.py
@@ -476,18 +476,19 @@ def sparse_dense_strategy_cpu(attrs, inputs, out_type, target):
     return strategy
 
 
-@roi_align_strategy.register("cpu")
-def roi_align_strategy_cpu(attrs, inputs, out_type, target):
-    """roi_align x86 strategy"""
-    strategy = _op.OpStrategy()
-    layout = attrs.layout
-    assert layout == "NCHW", "only support nchw for now"
-    strategy.add_implementation(
-        wrap_compute_roi_align(topi.x86.roi_align_nchw),
-        wrap_topi_schedule(topi.generic.schedule_roi_align),
-        name="roi_align.x86",
-    )
-    return strategy
+# @roi_align_strategy.register("cpu")
+# def roi_align_strategy_cpu(attrs, inputs, out_type, target):
+#     """roi_align x86 strategy"""
+#     strategy = _op.OpStrategy()
+#     layout = attrs.layout
+#     assert layout == "NCHW", "only support nchw for now"
+#     if layout == "NHWC":
+#         strategy.add_implementation(
+#             wrap_compute_roi_align(topi.x86.roi_align_nchw),
+#             wrap_topi_schedule(topi.generic.schedule_roi_align),
+#             name="roi_align.x86",
+#         )
+#     return strategy
 
 
 @bitserial_conv2d_strategy.register("cpu")
diff --git a/python/tvm/topi/vision/rcnn/roi_align.py b/python/tvm/topi/vision/rcnn/roi_align.py
index 95f350084ba5..d987b853cd8e 100644
--- a/python/tvm/topi/vision/rcnn/roi_align.py
+++ b/python/tvm/topi/vision/rcnn/roi_align.py
@@ -19,7 +19,7 @@
 import tvm
 from tvm import te
 from ...utils import get_const_tuple
-from ...cpp.utils import bilinear_sample_nchw
+from ...cpp.utils import bilinear_sample_nchw, bilinear_sample_nhwc
 
 
 def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1):
@@ -124,3 +124,90 @@ def _sample(i, c, ph, pw):
     return te.compute(
         (num_roi, channel, pooled_size_h, pooled_size_w), _sample, tag="pool,roi_align_nchw"
     )
+
+
+def roi_align_nhwc(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
+    """ROI align operator in NHWC layout.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        4-D with shape [batch, channel, height, width]
+
+    rois : tvm.te.Tensor
+        2-D with shape [num_roi, 5]. The last dimension should be in format of
+        [batch_index, w_start, h_start, w_end, h_end]
+
+    pooled_size : int or list/tuple of two ints
+        output size, or [out_height, out_width]
+
+    spatial_scale : float
+        Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal
+        of total stride in convolutional layers, which should be in range (0.0, 1.0]
+
+    sample_ratio : int
+        Optional sampling ratio of ROI align, using adaptive size by default.
+
+    Returns
+    -------
+    output : tvm.te.Tensor
+        4-D with shape [num_roi, channel, pooled_size, pooled_size]
+    """
+    dtype = rois.dtype
+    _, height, width, channel = get_const_tuple(data.shape)
+    num_roi, _ = get_const_tuple(rois.shape)
+
+    if isinstance(pooled_size, int):
+        pooled_size_h = pooled_size_w = pooled_size
+    else:
+        pooled_size_h, pooled_size_w = pooled_size
+
+    def _bilinear(i, y, x, c):
+        outside = tvm.tir.any(y < -1.0, x < -1.0, y > height, x > width)
+        y = tvm.te.min(tvm.te.max(y, 0.0), height - 1)
+        x = tvm.te.min(tvm.te.max(x, 0.0), width - 1)
+        val = bilinear_sample_nhwc(data, (i, y, x, c), height - 1, width - 1)
+        return tvm.tir.if_then_else(outside, 0.0, val)
+
+    def _sample(i, ph, pw, c):
+        roi = rois[i]
+        batch_index = roi[0].astype("int32")
+        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
+        roi_start_h *= spatial_scale
+        roi_end_h *= spatial_scale
+        roi_start_w *= spatial_scale
+        roi_end_w *= spatial_scale
+
+        # force malformed ROIs to be 1x1
+        roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype))
+        roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype))
+
+        bin_h = roi_h / pooled_size_h
+        bin_w = roi_w / pooled_size_w
+
+        if sample_ratio > 0:
+            roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, "int32")
+        else:
+            roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype("int32")
+            roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype("int32")
+
+        count = roi_bin_grid_h * roi_bin_grid_w
+        rh = te.reduce_axis((0, roi_bin_grid_h))
+        rw = te.reduce_axis((0, roi_bin_grid_w))
+        roi_start_h += ph * bin_h
+        roi_start_w += pw * bin_w
+        return te.sum(
+            _bilinear(
+                batch_index,
+                roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+                roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
+                c,
+            )
+            / count,
+            axis=[rh, rw],
+        )
+
+    print("nhwc roi align called")
+    return te.compute(
+        (num_roi, pooled_size_h, pooled_size_w, channel, ), _sample, tag="pool,roi_align_nhwc"
+    )

From bbaa206297a6df4a340a67afb0a0f13af1381852 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 10:13:21 +0900
Subject: [PATCH 02/10] integrate mode change from upstream

---
 python/tvm/topi/vision/rcnn/roi_align.py | 35 ++++++++++++++++++------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/python/tvm/topi/vision/rcnn/roi_align.py b/python/tvm/topi/vision/rcnn/roi_align.py
index d987b853cd8e..55eb468c471d 100644
--- a/python/tvm/topi/vision/rcnn/roi_align.py
+++ b/python/tvm/topi/vision/rcnn/roi_align.py
@@ -126,13 +126,13 @@ def _sample(i, c, ph, pw):
     )
 
 
-def roi_align_nhwc(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
+def roi_align_nhwc(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1):
     """ROI align operator in NHWC layout.
 
     Parameters
     ----------
     data : tvm.te.Tensor
-        4-D with shape [batch, channel, height, width]
+        4-D with shape [batch, height, width, channel]
 
     rois : tvm.te.Tensor
         2-D with shape [num_roi, 5]. The last dimension should be in format of
@@ -145,14 +145,21 @@ def roi_align_nhwc(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
         Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal
         of total stride in convolutional layers, which should be in range (0.0, 1.0]
 
+    mode : int or str
+        There are two modes, average and max. For the average mode, you can pass b'avg' or 0, and
+        for the max mode, you can pass b'max' or 1.
+
     sample_ratio : int
         Optional sampling ratio of ROI align, using adaptive size by default.
 
     Returns
     -------
     output : tvm.te.Tensor
-        4-D with shape [num_roi, channel, pooled_size, pooled_size]
+        4-D with shape [num_roi, pooled_size, pooled_size, channel]
     """
+    avg_mode = mode in (b"avg", 0)
+    max_mode = mode in (b"max", 1)
+    assert avg_mode or max_mode, "Mode must be avg or max. Please pass in a valid mode."
     dtype = rois.dtype
     _, height, width, channel = get_const_tuple(data.shape)
     num_roi, _ = get_const_tuple(rois.shape)
@@ -196,18 +203,28 @@ def _sample(i, ph, pw, c):
         rw = te.reduce_axis((0, roi_bin_grid_w))
         roi_start_h += ph * bin_h
         roi_start_w += pw * bin_w
-        return te.sum(
+        if avg_mode:
+            return te.sum(
+                _bilinear(
+                    batch_index,
+                    roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+                    roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
+                    c
+                )
+                / count,
+                axis=[rh, rw],
+            )
+        # max mode
+        return te.max(
             _bilinear(
                 batch_index,
                 roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
                 roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-                c,
-            )
-            / count,
+                c
+            ),
             axis=[rh, rw],
         )
 
-    print("nhwc roi align called")
     return te.compute(
-        (num_roi, pooled_size_h, pooled_size_w, channel, ), _sample, tag="pool,roi_align_nhwc"
+        (num_roi, pooled_size_h, pooled_size_w, channel), _sample, tag="pool,roi_align_nchw"
     )

From 0ec68a19d8e18c5b14e9ecd4f7c7f892a94e9b13 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 11:13:32 +0900
Subject: [PATCH 03/10] adding test

---
 python/tvm/topi/testing/__init__.py          |  2 +-
 python/tvm/topi/testing/roi_align_python.py  | 74 +++++++++++++++++++
 tests/python/topi/python/test_topi_vision.py | 76 ++++++++++++++++++--
 3 files changed, 144 insertions(+), 8 deletions(-)

diff --git a/python/tvm/topi/testing/__init__.py b/python/tvm/topi/testing/__init__.py
index 85f13a763c40..ef36b9e73446 100644
--- a/python/tvm/topi/testing/__init__.py
+++ b/python/tvm/topi/testing/__init__.py
@@ -39,7 +39,7 @@
 from .bilinear_resize_python import bilinear_resize_python
 from .trilinear_resize3d_python import trilinear_resize3d_python
 from .reorg_python import reorg_python
-from .roi_align_python import roi_align_nchw_python
+from .roi_align_python import roi_align_nchw_python, roi_align_nhwc_python
 from .roi_pool_python import roi_pool_nchw_python
 from .lrn_python import lrn_python
 from .l2_normalize_python import l2_normalize_python
diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py
index 643a954b101b..a7f73d41bbc9 100644
--- a/python/tvm/topi/testing/roi_align_python.py
+++ b/python/tvm/topi/testing/roi_align_python.py
@@ -92,3 +92,77 @@ def _bilinear(n, c, y, x):
                                 total = max(total, _bilinear(batch_index, c, y, x))
                     b_np[i, c, ph, pw] = total
     return b_np
+
+
+def roi_align_nhwc_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
+    """Roi align in python"""
+    avg_mode = mode in (b"avg", "avg", 0)
+    max_mode = mode in (b"max", "max", 1)
+    assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
+    _, height, width, channel = a_np.shape
+    num_roi = rois_np.shape[0]
+    b_np = np.zeros((num_roi, pooled_size, pooled_size, channel), dtype=a_np.dtype)
+    if isinstance(pooled_size, int):
+        pooled_size_h = pooled_size_w = pooled_size
+    else:
+        pooled_size_h, pooled_size_w = pooled_size
+
+    def _bilinear(n, y, x, c):
+        if y < -1 or y > height or x < -1 or x > width:
+            return 0
+
+        y = min(max(y, 0), height - 1)
+        x = min(max(x, 0), width - 1)
+
+        y_low = int(math.floor(y))
+        x_low = int(math.floor(x))
+        y_high = y_low + 1
+        x_high = x_low + 1
+
+        wy_h = y - y_low
+        wx_h = x - x_low
+        wy_l = 1 - wy_h
+        wx_l = 1 - wx_h
+
+        val = 0
+        for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
+            for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+                if 0 <= yp < height and 0 <= xp < width:
+                    val += wx * wy * a_np[n, yp, xp, c]
+        return val
+
+    for i in range(num_roi):
+        roi = rois_np[i]
+        batch_index = int(roi[0])
+        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1:] * spatial_scale
+        roi_h = max(roi_end_h - roi_start_h, 1.0)
+        roi_w = max(roi_end_w - roi_start_w, 1.0)
+
+        bin_h = roi_h / pooled_size_h
+        bin_w = roi_w / pooled_size_w
+
+        if sample_ratio > 0:
+            roi_bin_grid_h = roi_bin_grid_w = int(sample_ratio)
+        else:
+            roi_bin_grid_h = int(math.ceil(roi_h / pooled_size))
+            roi_bin_grid_w = int(math.ceil(roi_w / pooled_size))
+
+        count = roi_bin_grid_h * roi_bin_grid_w
+
+        for c in range(channel):
+            for ph in range(pooled_size_h):
+                for pw in range(pooled_size_w):
+                    if avg_mode:
+                        total = 0.0
+                    if max_mode:
+                        total = float("-inf")
+                    for iy in range(roi_bin_grid_h):
+                        for ix in range(roi_bin_grid_w):
+                            y = roi_start_h + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h
+                            x = roi_start_w + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w
+                            if avg_mode:
+                                total += _bilinear(batch_index, y, x, c) / count
+                            if max_mode:
+                                total = max(total, _bilinear(batch_index, y, x, c))
+                    b_np[i, ph, pw, c] = total
+    return b_np
diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py
index 839356892ab1..160319ef0e64 100644
--- a/tests/python/topi/python/test_topi_vision.py
+++ b/tests/python/topi/python/test_topi_vision.py
@@ -49,12 +49,18 @@
     "gpu": (topi.cuda.multibox_detection, topi.cuda.schedule_multibox_detection),
 }
 
-_roi_align_implement = {
+_roi_align_implement_nchw = {
     "generic": (topi.vision.roi_align_nchw, topi.generic.schedule_roi_align),
     "cpu": (topi.x86.roi_align_nchw, topi.generic.schedule_roi_align),
     "gpu": (topi.vision.roi_align_nchw, topi.cuda.schedule_roi_align),
 }
 
+_roi_align_implement = {
+    "generic": (topi.vision.roi_align_nhwc, topi.generic.schedule_roi_align),
+    "cpu": (topi.vision.roi_align_nhwc, topi.generic.schedule_roi_align),
+    "gpu": (topi.vision.roi_align_nhwc, topi.cuda.schedule_roi_align),
+}
+
 _roi_pool_schedule = {
     "generic": topi.generic.schedule_roi_pool,
     "gpu": topi.cuda.schedule_roi_pool,
@@ -474,6 +480,62 @@ def check_device(device):
         check_device(device)
 
 
+def verify_roi_align(
+    batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio, mode
+):  # For mode, 0 = avg, 1 = max
+    a_shape = (batch, in_size, in_size, in_channel)
+    rois_shape = (num_roi, 5)
+
+    a = te.placeholder(a_shape)
+    rois = te.placeholder(rois_shape)
+
+    @memoize("topi.tests.test_topi_vision.verify_roi_align")
+    def get_ref_data():
+        a_np = np.random.uniform(-1, 1, size=a_shape).astype("float32")
+        rois_np = np.random.uniform(-1, 1, size=rois_shape).astype("float32") * in_size
+        rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi)
+        b_np = tvm.topi.testing.roi_align_nhwc_python(
+            a_np,
+            rois_np,
+            pooled_size=pooled_size,
+            spatial_scale=spatial_scale,
+            sample_ratio=sample_ratio,
+            mode=mode,
+        )
+
+        return a_np, rois_np, b_np
+
+    a_np, rois_np, b_np = get_ref_data()
+
+    def check_device(device):
+        ctx = tvm.context(device, 0)
+        if not tvm.testing.device_enabled(device):
+            print("Skip because %s is not enabled" % device)
+            return
+        with tvm.target.Target(device):
+            fcompute, fschedule = tvm.topi.testing.dispatch(device, _roi_align_implement)
+            b = fcompute(
+                a,
+                rois,
+                pooled_size=pooled_size,
+                spatial_scale=spatial_scale,
+                sample_ratio=sample_ratio,
+                mode=mode,
+            )
+            s = fschedule(b)
+
+        tvm_a = tvm.nd.array(a_np, ctx)
+        tvm_rois = tvm.nd.array(rois_np, ctx)
+        tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), ctx=ctx)
+        f = tvm.build(s, [a, rois, b], device)
+        f(tvm_a, tvm_rois, tvm_b)
+        tvm_val = tvm_b.asnumpy()
+        tvm.testing.assert_allclose(tvm_val, b_np, rtol=1e-3, atol=1e-4)
+
+    for device in ["llvm", "cuda", "opencl"]:
+        check_device(device)
+
+
 @tvm.testing.uses_gpu
 def test_roi_align():
     verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 0)
@@ -624,10 +686,10 @@ def test_proposal():
 
 
 if __name__ == "__main__":
-    test_get_valid_counts()
-    test_multibox_prior()
-    test_multibox_detection()
+    # test_get_valid_counts()
+    # test_multibox_prior()
+    # test_multibox_detection()
     test_roi_align()
-    test_roi_pool()
-    test_proposal()
-    test_non_max_suppression()
+    # test_roi_pool()
+    # test_proposal()
+    # test_non_max_suppression()

From 48314d9ab732b426d61faeeb38491a93394fb984 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 14:19:20 +0900
Subject: [PATCH 04/10] support nhwc shape func

---
 python/tvm/relay/op/vision/_vision.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py
index 04676e24adf6..d43bccf4a661 100644
--- a/python/tvm/relay/op/vision/_vision.py
+++ b/python/tvm/relay/op/vision/_vision.py
@@ -86,7 +86,7 @@ def nms_shape_func(attrs, inputs, _):
 
 
 @script
-def _roi_align_shape_func(data_shape, rois_shape, pooled_size):
+def _roi_align_shape_func_nchw(data_shape, rois_shape, pooled_size):
     out = output_tensor((4,), "int64")
     out[0] = rois_shape[0]
     out[1] = data_shape[1]
@@ -95,6 +95,20 @@ def _roi_align_shape_func(data_shape, rois_shape, pooled_size):
     return out
 
 
+@script
+def _roi_align_shape_func_nhwc(data_shape, rois_shape, pooled_size):
+    out = output_tensor((4,), "int64")
+    out[0] = rois_shape[0]
+    out[1] = int64(pooled_size[0])
+    out[2] = int64(pooled_size[1])
+    out[3] = data_shape[3]
+    return out
+
+
 @reg.register_shape_func("vision.roi_align", False)
 def roi_align_shape_func(attrs, inputs, _):
-    return [_roi_align_shape_func(inputs[0], inputs[1], convert(attrs.pooled_size))]
+    if attrs.layout == "NCHW":
+        return [_roi_align_shape_func_nchw(inputs[0], inputs[1], convert(attrs.pooled_size))]
+    else:
+        assert attrs.layout == "NHWC"
+        return [_roi_align_shape_func_nhwc(inputs[0], inputs[1], convert(attrs.pooled_size))]

From 48359c190cf776c84d379bc4b8913a68ba0b919c Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 20:01:53 +0900
Subject: [PATCH 05/10] update strategy

---
 python/tvm/relay/op/strategy/cuda.py    |  1 +
 python/tvm/relay/op/strategy/generic.py |  1 +
 python/tvm/relay/op/strategy/x86.py     | 32 +++++++++++++++----------
 python/tvm/relay/op/vision/_vision.py   |  2 +-
 4 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index a7815f94a4d2..cb4688c4889e 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -953,6 +953,7 @@ def roi_align_strategy_cuda(attrs, inputs, out_type, target):
             name="roi_align_nchw.cuda",
         )
     else:
+        assert layout == "NHWC", "layout must be NCHW or NHWC."
         strategy.add_implementation(
             wrap_compute_roi_align(topi.vision.rcnn.roi_align_nhwc),
             wrap_topi_schedule(topi.cuda.schedule_roi_align),
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 508403c044ae..f076176c5d8a 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -1067,6 +1067,7 @@ def roi_align_strategy(attrs, inputs, out_type, target):
             name="roi_align.generic",
         )
     else:
+        assert layout == "NHWC", "layout must be NCHW or NHWC."
         strategy.add_implementation(
             wrap_compute_roi_align(topi.vision.rcnn.roi_align_nhwc),
             wrap_topi_schedule(topi.generic.schedule_roi_align),
diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py
index b988bdecf4c9..1f37a4f8e98c 100644
--- a/python/tvm/relay/op/strategy/x86.py
+++ b/python/tvm/relay/op/strategy/x86.py
@@ -476,19 +476,25 @@ def sparse_dense_strategy_cpu(attrs, inputs, out_type, target):
     return strategy
 
 
-# @roi_align_strategy.register("cpu")
-# def roi_align_strategy_cpu(attrs, inputs, out_type, target):
-#     """roi_align x86 strategy"""
-#     strategy = _op.OpStrategy()
-#     layout = attrs.layout
-#     assert layout == "NCHW", "only support nchw for now"
-#     if layout == "NHWC":
-#         strategy.add_implementation(
-#             wrap_compute_roi_align(topi.x86.roi_align_nchw),
-#             wrap_topi_schedule(topi.generic.schedule_roi_align),
-#             name="roi_align.x86",
-#         )
-#     return strategy
+@roi_align_strategy.register("cpu")
+def roi_align_strategy_cpu(attrs, inputs, out_type, target):
+    """roi_align x86 strategy"""
+    strategy = _op.OpStrategy()
+    layout = attrs.layout
+    if layout == "NCHW":
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.x86.roi_align_nchw),
+            wrap_topi_schedule(topi.generic.schedule_roi_align),
+            name="roi_align.x86",
+        )
+    else:
+        assert layout == "NHWC", "layout must be NCHW or NHWC."
+        strategy.add_implementation(
+            wrap_compute_roi_align(topi.vision.rcnn.roi_align_nhwc),
+            wrap_topi_schedule(topi.generic.schedule_roi_align),
+            name="roi_align.x86",
+        )
+    return strategy
 
 
 @bitserial_conv2d_strategy.register("cpu")
diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py
index d43bccf4a661..9db7e0e71da6 100644
--- a/python/tvm/relay/op/vision/_vision.py
+++ b/python/tvm/relay/op/vision/_vision.py
@@ -110,5 +110,5 @@ def roi_align_shape_func(attrs, inputs, _):
     if attrs.layout == "NCHW":
         return [_roi_align_shape_func_nchw(inputs[0], inputs[1], convert(attrs.pooled_size))]
     else:
-        assert attrs.layout == "NHWC"
+        assert attrs.layout == "NHWC", "layout must be NCHW or NHWC."
         return [_roi_align_shape_func_nhwc(inputs[0], inputs[1], convert(attrs.pooled_size))]

From fe4e7c4d5e967e67a6b93b8ad933fe9af5f922ca Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 20:16:07 +0900
Subject: [PATCH 06/10] refactoring test

---
 python/tvm/topi/testing/roi_align_python.py | 130 ++++++++++++++------
 1 file changed, 90 insertions(+), 40 deletions(-)

diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py
index a7f73d41bbc9..b149deaa4bac 100644
--- a/python/tvm/topi/testing/roi_align_python.py
+++ b/python/tvm/topi/testing/roi_align_python.py
@@ -20,42 +20,50 @@
 import numpy as np
 
 
-def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
-    """Roi align in python"""
-    avg_mode = mode in (b"avg", "avg", 0)
-    max_mode = mode in (b"max", "max", 1)
-    assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
-    _, channel, height, width = a_np.shape
-    num_roi = rois_np.shape[0]
-    b_np = np.zeros((num_roi, channel, pooled_size, pooled_size), dtype=a_np.dtype)
-    if isinstance(pooled_size, int):
-        pooled_size_h = pooled_size_w = pooled_size
-    else:
-        pooled_size_h, pooled_size_w = pooled_size
-
-    def _bilinear(n, c, y, x):
-        if y < -1 or y > height or x < -1 or x > width:
-            return 0
-
-        y = min(max(y, 0), height - 1)
-        x = min(max(x, 0), width - 1)
-
-        y_low = int(math.floor(y))
-        x_low = int(math.floor(x))
-        y_high = y_low + 1
-        x_high = x_low + 1
-
-        wy_h = y - y_low
-        wx_h = x - x_low
-        wy_l = 1 - wy_h
-        wx_l = 1 - wx_h
-
-        val = 0
-        for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-            for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
-                if 0 <= yp < height and 0 <= xp < width:
+def _bilinear(a_np, n, c, y, x, height, width, layout):
+    if y < -1 or y > height or x < -1 or x > width:
+        return 0
+
+    y = min(max(y, 0), height - 1)
+    x = min(max(x, 0), width - 1)
+
+    y_low = int(math.floor(y))
+    x_low = int(math.floor(x))
+    y_high = y_low + 1
+    x_high = x_low + 1
+
+    wy_h = y - y_low
+    wx_h = x - x_low
+    wy_l = 1 - wy_h
+    wx_l = 1 - wx_h
+
+    val = 0
+    for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
+        for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
+            if 0 <= yp < height and 0 <= xp < width:
+                if layout == "NCHW":
                     val += wx * wy * a_np[n, c, yp, xp]
-        return val
+                else:
+                    val += wx * wy * a_np[n, yp, xp, c]
+    return val
+
+
+def roi_align_common(
+    a_np,
+    b_np,
+    rois_np,
+    channel,
+    pooled_size_h,
+    pooled_size_w,
+    spatial_scale,
+    sample_ratio,
+    avg_mode,
+    max_mode,
+    height,
+    width,
+    layout,
+):
+    num_roi = rois_np.shape[0]
 
     for i in range(num_roi):
         roi = rois_np[i]
@@ -70,8 +78,8 @@ def _bilinear(n, c, y, x):
         if sample_ratio > 0:
             roi_bin_grid_h = roi_bin_grid_w = int(sample_ratio)
         else:
-            roi_bin_grid_h = int(math.ceil(roi_h / pooled_size))
-            roi_bin_grid_w = int(math.ceil(roi_w / pooled_size))
+            roi_bin_grid_h = int(math.ceil(roi_h / pooled_size_h))
+            roi_bin_grid_w = int(math.ceil(roi_w / pooled_size_w))
 
         count = roi_bin_grid_h * roi_bin_grid_w
 
@@ -87,10 +95,52 @@ def _bilinear(n, c, y, x):
                             y = roi_start_h + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h
                             x = roi_start_w + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w
                             if avg_mode:
-                                total += _bilinear(batch_index, c, y, x) / count
+                                total += (
+                                    _bilinear(a_np, batch_index, c, y, x, height, width, layout)
+                                    / count
+                                )
                             if max_mode:
-                                total = max(total, _bilinear(batch_index, c, y, x))
-                    b_np[i, c, ph, pw] = total
+                                total = max(
+                                    total,
+                                    _bilinear(a_np, batch_index, c, y, x, height, width, layout),
+                                )
+
+                    if layout == "NCHW":
+                        b_np[i, c, ph, pw] = total
+                    else:
+                        b_np[i, ph, pw, c] = total
+    return b_np
+
+
+def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
+    """Roi align in python"""
+    avg_mode = mode in (b"avg", "avg", 0)
+    max_mode = mode in (b"max", "max", 1)
+    assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
+    _, channel, height, width = a_np.shape
+    if isinstance(pooled_size, int):
+        pooled_size_h = pooled_size_w = pooled_size
+    else:
+        pooled_size_h, pooled_size_w = pooled_size
+
+    num_roi = rois_np.shape[0]
+    b_np = np.zeros((num_roi, channel, pooled_size, pooled_size), dtype=a_np.dtype)
+    roi_align_common(
+        a_np,
+        b_np,
+        rois_np,
+        channel,
+        pooled_size_h,
+        pooled_size_w,
+        spatial_scale,
+        sample_ratio,
+        avg_mode,
+        max_mode,
+        height,
+        width,
+        "NCHW",
+    )
+
     return b_np
 
 

From 252453e16582ea0085d563c895f9878193694e3d Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 20:19:26 +0900
Subject: [PATCH 07/10] refactor test

---
 python/tvm/topi/testing/roi_align_python.py | 84 +++++----------------
 1 file changed, 20 insertions(+), 64 deletions(-)

diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py
index b149deaa4bac..c4f6f8279efe 100644
--- a/python/tvm/topi/testing/roi_align_python.py
+++ b/python/tvm/topi/testing/roi_align_python.py
@@ -123,9 +123,9 @@ def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_rati
     else:
         pooled_size_h, pooled_size_w = pooled_size
 
-    num_roi = rois_np.shape[0]
-    b_np = np.zeros((num_roi, channel, pooled_size, pooled_size), dtype=a_np.dtype)
-    roi_align_common(
+    b_np = np.zeros((rois_np.shape[0], channel, pooled_size_h, pooled_size_w), dtype=a_np.dtype)
+
+    return roi_align_common(
         a_np,
         b_np,
         rois_np,
@@ -141,8 +141,6 @@ def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_rati
         "NCHW",
     )
 
-    return b_np
-
 
 def roi_align_nhwc_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
     """Roi align in python"""
@@ -151,68 +149,26 @@ def roi_align_nhwc_python(a_np, rois_np, pooled_size, spatial_scale, sample_rati
     assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
     _, height, width, channel = a_np.shape
     num_roi = rois_np.shape[0]
-    b_np = np.zeros((num_roi, pooled_size, pooled_size, channel), dtype=a_np.dtype)
+
     if isinstance(pooled_size, int):
         pooled_size_h = pooled_size_w = pooled_size
     else:
         pooled_size_h, pooled_size_w = pooled_size
 
-    def _bilinear(n, y, x, c):
-        if y < -1 or y > height or x < -1 or x > width:
-            return 0
-
-        y = min(max(y, 0), height - 1)
-        x = min(max(x, 0), width - 1)
-
-        y_low = int(math.floor(y))
-        x_low = int(math.floor(x))
-        y_high = y_low + 1
-        x_high = x_low + 1
+    b_np = np.zeros((num_roi, pooled_size_h, pooled_size_w, channel), dtype=a_np.dtype)
 
-        wy_h = y - y_low
-        wx_h = x - x_low
-        wy_l = 1 - wy_h
-        wx_l = 1 - wx_h
-
-        val = 0
-        for wx, xp in zip((wx_l, wx_h), (x_low, x_high)):
-            for wy, yp in zip((wy_l, wy_h), (y_low, y_high)):
-                if 0 <= yp < height and 0 <= xp < width:
-                    val += wx * wy * a_np[n, yp, xp, c]
-        return val
-
-    for i in range(num_roi):
-        roi = rois_np[i]
-        batch_index = int(roi[0])
-        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1:] * spatial_scale
-        roi_h = max(roi_end_h - roi_start_h, 1.0)
-        roi_w = max(roi_end_w - roi_start_w, 1.0)
-
-        bin_h = roi_h / pooled_size_h
-        bin_w = roi_w / pooled_size_w
-
-        if sample_ratio > 0:
-            roi_bin_grid_h = roi_bin_grid_w = int(sample_ratio)
-        else:
-            roi_bin_grid_h = int(math.ceil(roi_h / pooled_size))
-            roi_bin_grid_w = int(math.ceil(roi_w / pooled_size))
-
-        count = roi_bin_grid_h * roi_bin_grid_w
-
-        for c in range(channel):
-            for ph in range(pooled_size_h):
-                for pw in range(pooled_size_w):
-                    if avg_mode:
-                        total = 0.0
-                    if max_mode:
-                        total = float("-inf")
-                    for iy in range(roi_bin_grid_h):
-                        for ix in range(roi_bin_grid_w):
-                            y = roi_start_h + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h
-                            x = roi_start_w + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w
-                            if avg_mode:
-                                total += _bilinear(batch_index, y, x, c) / count
-                            if max_mode:
-                                total = max(total, _bilinear(batch_index, y, x, c))
-                    b_np[i, ph, pw, c] = total
-    return b_np
+    return roi_align_common(
+        a_np,
+        b_np,
+        rois_np,
+        channel,
+        pooled_size_h,
+        pooled_size_w,
+        spatial_scale,
+        sample_ratio,
+        avg_mode,
+        max_mode,
+        height,
+        width,
+        "NHWC",
+    )

From 9b2764d7ef3552277d4e128bd38fb2cd210b9f92 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 20:30:22 +0900
Subject: [PATCH 08/10] refactoring

---
 python/tvm/topi/vision/rcnn/roi_align.py | 184 +++++++++++------------
 1 file changed, 91 insertions(+), 93 deletions(-)

diff --git a/python/tvm/topi/vision/rcnn/roi_align.py b/python/tvm/topi/vision/rcnn/roi_align.py
index 55eb468c471d..655ba2637d84 100644
--- a/python/tvm/topi/vision/rcnn/roi_align.py
+++ b/python/tvm/topi/vision/rcnn/roi_align.py
@@ -22,6 +22,70 @@
 from ...cpp.utils import bilinear_sample_nchw, bilinear_sample_nhwc
 
 
+def _sample_common(
+    i,
+    c,
+    ph,
+    pw,
+    rois,
+    pooled_size_h,
+    pooled_size_w,
+    spatial_scale,
+    sample_ratio,
+    dtype,
+    avg_mode,
+    bilinear_func,
+):
+    roi = rois[i]
+    batch_index = roi[0].astype("int32")
+    roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
+    roi_start_h *= spatial_scale
+    roi_end_h *= spatial_scale
+    roi_start_w *= spatial_scale
+    roi_end_w *= spatial_scale
+
+    # force malformed ROIs to be 1x1
+    roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype))
+    roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype))
+
+    bin_h = roi_h / pooled_size_h
+    bin_w = roi_w / pooled_size_w
+
+    if sample_ratio > 0:
+        roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, "int32")
+    else:
+        roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype("int32")
+        roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype("int32")
+
+    count = roi_bin_grid_h * roi_bin_grid_w
+    rh = te.reduce_axis((0, roi_bin_grid_h))
+    rw = te.reduce_axis((0, roi_bin_grid_w))
+    roi_start_h += ph * bin_h
+    roi_start_w += pw * bin_w
+
+    if avg_mode:
+        return te.sum(
+            bilinear_func(
+                batch_index,
+                c,
+                roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+                roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
+            )
+            / count,
+            axis=[rh, rw],
+        )
+    # max mode
+    return te.max(
+        bilinear_func(
+            batch_index,
+            c,
+            roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+            roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
+        ),
+        axis=[rh, rw],
+    )
+
+
 def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1):
     """ROI align operator in NCHW layout.
 
@@ -73,52 +137,19 @@ def _bilinear(i, c, y, x):
         return tvm.tir.if_then_else(outside, 0.0, val)
 
     def _sample(i, c, ph, pw):
-        roi = rois[i]
-        batch_index = roi[0].astype("int32")
-        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
-        roi_start_h *= spatial_scale
-        roi_end_h *= spatial_scale
-        roi_start_w *= spatial_scale
-        roi_end_w *= spatial_scale
-
-        # force malformed ROIs to be 1x1
-        roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype))
-        roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype))
-
-        bin_h = roi_h / pooled_size_h
-        bin_w = roi_w / pooled_size_w
-
-        if sample_ratio > 0:
-            roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, "int32")
-        else:
-            roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype("int32")
-            roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype("int32")
-
-        count = roi_bin_grid_h * roi_bin_grid_w
-        rh = te.reduce_axis((0, roi_bin_grid_h))
-        rw = te.reduce_axis((0, roi_bin_grid_w))
-        roi_start_h += ph * bin_h
-        roi_start_w += pw * bin_w
-        if avg_mode:
-            return te.sum(
-                _bilinear(
-                    batch_index,
-                    c,
-                    roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
-                    roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-                )
-                / count,
-                axis=[rh, rw],
-            )
-        # max mode
-        return te.max(
-            _bilinear(
-                batch_index,
-                c,
-                roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
-                roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-            ),
-            axis=[rh, rw],
+        return _sample_common(
+            i,
+            c,
+            ph,
+            pw,
+            rois,
+            pooled_size_h,
+            pooled_size_w,
+            spatial_scale,
+            sample_ratio,
+            dtype,
+            avg_mode,
+            _bilinear,
         )
 
     return te.compute(
@@ -169,7 +200,7 @@ def roi_align_nhwc(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1
     else:
         pooled_size_h, pooled_size_w = pooled_size
 
-    def _bilinear(i, y, x, c):
+    def _bilinear(i, c, y, x):
         outside = tvm.tir.any(y < -1.0, x < -1.0, y > height, x > width)
         y = tvm.te.min(tvm.te.max(y, 0.0), height - 1)
         x = tvm.te.min(tvm.te.max(x, 0.0), width - 1)
@@ -177,52 +208,19 @@ def _bilinear(i, y, x, c):
         return tvm.tir.if_then_else(outside, 0.0, val)
 
     def _sample(i, ph, pw, c):
-        roi = rois[i]
-        batch_index = roi[0].astype("int32")
-        roi_start_w, roi_start_h, roi_end_w, roi_end_h = roi[1], roi[2], roi[3], roi[4]
-        roi_start_h *= spatial_scale
-        roi_end_h *= spatial_scale
-        roi_start_w *= spatial_scale
-        roi_end_w *= spatial_scale
-
-        # force malformed ROIs to be 1x1
-        roi_h = tvm.te.max(roi_end_h - roi_start_h, tvm.tir.const(1.0, dtype))
-        roi_w = tvm.te.max(roi_end_w - roi_start_w, tvm.tir.const(1.0, dtype))
-
-        bin_h = roi_h / pooled_size_h
-        bin_w = roi_w / pooled_size_w
-
-        if sample_ratio > 0:
-            roi_bin_grid_h = roi_bin_grid_w = tvm.tir.const(sample_ratio, "int32")
-        else:
-            roi_bin_grid_h = te.ceil(roi_h / pooled_size_h).astype("int32")
-            roi_bin_grid_w = te.ceil(roi_w / pooled_size_w).astype("int32")
-
-        count = roi_bin_grid_h * roi_bin_grid_w
-        rh = te.reduce_axis((0, roi_bin_grid_h))
-        rw = te.reduce_axis((0, roi_bin_grid_w))
-        roi_start_h += ph * bin_h
-        roi_start_w += pw * bin_w
-        if avg_mode:
-            return te.sum(
-                _bilinear(
-                    batch_index,
-                    roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
-                    roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-                    c
-                )
-                / count,
-                axis=[rh, rw],
-            )
-        # max mode
-        return te.max(
-            _bilinear(
-                batch_index,
-                roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
-                roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-                c
-            ),
-            axis=[rh, rw],
+        return _sample_common(
+            i,
+            c,
+            ph,
+            pw,
+            rois,
+            pooled_size_h,
+            pooled_size_w,
+            spatial_scale,
+            sample_ratio,
+            dtype,
+            avg_mode,
+            _bilinear,
         )
 
     return te.compute(

From e4a0724c4d9caf6bcb058cb23a7126e569edcd69 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Wed, 17 Feb 2021 20:34:39 +0900
Subject: [PATCH 09/10] fix lint

---
 python/tvm/relay/op/vision/_vision.py       | 5 ++---
 python/tvm/topi/testing/roi_align_python.py | 5 +++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py
index 9db7e0e71da6..9c8c853fa3d2 100644
--- a/python/tvm/relay/op/vision/_vision.py
+++ b/python/tvm/relay/op/vision/_vision.py
@@ -109,6 +109,5 @@ def _roi_align_shape_func_nhwc(data_shape, rois_shape, pooled_size):
 def roi_align_shape_func(attrs, inputs, _):
     if attrs.layout == "NCHW":
         return [_roi_align_shape_func_nchw(inputs[0], inputs[1], convert(attrs.pooled_size))]
-    else:
-        assert attrs.layout == "NHWC", "layout must be NCHW or NHWC."
-        return [_roi_align_shape_func_nhwc(inputs[0], inputs[1], convert(attrs.pooled_size))]
+    assert attrs.layout == "NHWC", "layout must be NCHW or NHWC."
+    return [_roi_align_shape_func_nhwc(inputs[0], inputs[1], convert(attrs.pooled_size))]
diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py
index c4f6f8279efe..986123b6c9c6 100644
--- a/python/tvm/topi/testing/roi_align_python.py
+++ b/python/tvm/topi/testing/roi_align_python.py
@@ -63,6 +63,7 @@ def roi_align_common(
     width,
     layout,
 ):
+    """Common code used by roi align NCHW and NHWC"""
     num_roi = rois_np.shape[0]
 
     for i in range(num_roi):
@@ -113,7 +114,7 @@ def roi_align_common(
 
 
 def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
-    """Roi align in python"""
+    """Roi align NCHW in python"""
     avg_mode = mode in (b"avg", "avg", 0)
     max_mode = mode in (b"max", "max", 1)
     assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
@@ -143,7 +144,7 @@ def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_rati
 
 
 def roi_align_nhwc_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
-    """Roi align in python"""
+    """Roi align NHWC in python"""
     avg_mode = mode in (b"avg", "avg", 0)
     max_mode = mode in (b"max", "max", 1)
     assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."

From c0d25f5dc58b9fcda1a316957c968568df1402a7 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Thu, 18 Feb 2021 03:39:02 +0900
Subject: [PATCH 10/10] update relay op tests

---
 tests/python/relay/test_op_level5.py         | 89 +++++++++++++++++---
 tests/python/topi/python/test_topi_vision.py | 76 ++---------------
 2 files changed, 83 insertions(+), 82 deletions(-)

diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py
index 95cd537091f5..0a84667f8bdb 100644
--- a/tests/python/relay/test_op_level5.py
+++ b/tests/python/relay/test_op_level5.py
@@ -583,7 +583,18 @@ def test_threshold():
 
 @tvm.testing.uses_gpu
 def test_roi_align():
-    def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode):
+    def verify_roi_align(
+        data_shape,
+        rois_shape,
+        channel,
+        in_size,
+        pooled_size,
+        spatial_scale,
+        sample_ratio,
+        mode,
+        layout,
+        ref_func,
+    ):
         data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
         rois = relay.var("rois", relay.ty.TensorType(rois_shape, "float32"))
         z = relay.vision.roi_align(
@@ -593,21 +604,27 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             spatial_scale=spatial_scale,
             sample_ratio=sample_ratio,
             mode=mode,
-            layout="NCHW",
+            layout=layout,
         )
         zz = run_infer_type(z)
-        batch, channel, in_size, _ = data_shape
+
         num_roi = rois_shape[0]
-        assert zz.checked_type == relay.ty.TensorType(
-            (num_roi, channel, pooled_size, pooled_size), "float32"
-        )
+
+        if layout == "NCHW":
+            assert zz.checked_type == relay.ty.TensorType(
+                (num_roi, channel, pooled_size, pooled_size), "float32"
+            )
+        else:
+            assert zz.checked_type == relay.ty.TensorType(
+                (num_roi, pooled_size, pooled_size, channel), "float32"
+            )
 
         func = relay.Function([data, rois], z)
         func = run_infer_type(func)
         np_data = np.random.uniform(size=data_shape).astype("float32")
         np_rois = np.random.uniform(size=rois_shape).astype("float32") * in_size
-        np_rois[:, 0] = np.random.randint(low=0, high=batch, size=num_roi)
-        ref_res = tvm.topi.testing.roi_align_nchw_python(
+        np_rois[:, 0] = np.random.randint(low=0, high=data_shape[0], size=num_roi)
+        ref_res = ref_func(
             np_data,
             np_rois,
             pooled_size=pooled_size,
@@ -616,6 +633,7 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             mode=mode,
         )
         for target, ctx in tvm.testing.enabled_targets():
+            print("test on", target)
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
             op_res1 = intrp1.evaluate(func)(np_data, np_rois)
             tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-4)
@@ -623,18 +641,64 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             op_res2 = intrp2.evaluate(func)(np_data, np_rois)
             tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-4)
 
-    verify_roi_align(
+    def verify_roi_align_nchw(
+        data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode
+    ):
+        _, channel, in_size, _ = data_shape
+        return verify_roi_align(
+            data_shape,
+            rois_shape,
+            channel,
+            in_size,
+            pooled_size,
+            spatial_scale,
+            sample_ratio,
+            mode,
+            "NCHW",
+            tvm.topi.testing.roi_align_nchw_python,
+        )
+
+    def verify_roi_align_nhwc(
+        data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode
+    ):
+        _, in_size, _, channel = data_shape
+        return verify_roi_align(
+            data_shape,
+            rois_shape,
+            channel,
+            in_size,
+            pooled_size,
+            spatial_scale,
+            sample_ratio,
+            mode,
+            "NHWC",
+            tvm.topi.testing.roi_align_nhwc_python,
+        )
+
+    verify_roi_align_nchw(
         (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="avg"
     )
-    verify_roi_align(
+    verify_roi_align_nchw(
         (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="avg"
     )
-    verify_roi_align(
+    verify_roi_align_nchw(
         (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="max"
     )
-    verify_roi_align(
+    verify_roi_align_nchw(
         (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="max"
     )
+    verify_roi_align_nhwc(
+        (1, 16, 16, 4), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="avg"
+    )
+    verify_roi_align_nhwc(
+        (4, 16, 16, 4), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="avg"
+    )
+    verify_roi_align_nhwc(
+        (1, 16, 16, 4), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="max"
+    )
+    verify_roi_align_nhwc(
+        (4, 16, 16, 4), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="max"
+    )
 
 
 @tvm.testing.uses_gpu
@@ -1262,7 +1326,6 @@ def verify_batch_to_space_nd(dshape, block_shape, crops):
     test_resize_infer_type()
     test_resize()
     test_resize3d_infer_type()
-    test_resize3d()
     test_crop_and_resize()
     test_multibox_prior()
     test_multibox_transform_loc()
diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py
index 160319ef0e64..839356892ab1 100644
--- a/tests/python/topi/python/test_topi_vision.py
+++ b/tests/python/topi/python/test_topi_vision.py
@@ -49,18 +49,12 @@
     "gpu": (topi.cuda.multibox_detection, topi.cuda.schedule_multibox_detection),
 }
 
-_roi_align_implement_nchw = {
+_roi_align_implement = {
     "generic": (topi.vision.roi_align_nchw, topi.generic.schedule_roi_align),
     "cpu": (topi.x86.roi_align_nchw, topi.generic.schedule_roi_align),
     "gpu": (topi.vision.roi_align_nchw, topi.cuda.schedule_roi_align),
 }
 
-_roi_align_implement = {
-    "generic": (topi.vision.roi_align_nhwc, topi.generic.schedule_roi_align),
-    "cpu": (topi.vision.roi_align_nhwc, topi.generic.schedule_roi_align),
-    "gpu": (topi.vision.roi_align_nhwc, topi.cuda.schedule_roi_align),
-}
-
 _roi_pool_schedule = {
     "generic": topi.generic.schedule_roi_pool,
     "gpu": topi.cuda.schedule_roi_pool,
@@ -480,62 +474,6 @@ def check_device(device):
         check_device(device)
 
 
-def verify_roi_align(
-    batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio, mode
-):  # For mode, 0 = avg, 1 = max
-    a_shape = (batch, in_size, in_size, in_channel)
-    rois_shape = (num_roi, 5)
-
-    a = te.placeholder(a_shape)
-    rois = te.placeholder(rois_shape)
-
-    @memoize("topi.tests.test_topi_vision.verify_roi_align")
-    def get_ref_data():
-        a_np = np.random.uniform(-1, 1, size=a_shape).astype("float32")
-        rois_np = np.random.uniform(-1, 1, size=rois_shape).astype("float32") * in_size
-        rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi)
-        b_np = tvm.topi.testing.roi_align_nhwc_python(
-            a_np,
-            rois_np,
-            pooled_size=pooled_size,
-            spatial_scale=spatial_scale,
-            sample_ratio=sample_ratio,
-            mode=mode,
-        )
-
-        return a_np, rois_np, b_np
-
-    a_np, rois_np, b_np = get_ref_data()
-
-    def check_device(device):
-        ctx = tvm.context(device, 0)
-        if not tvm.testing.device_enabled(device):
-            print("Skip because %s is not enabled" % device)
-            return
-        with tvm.target.Target(device):
-            fcompute, fschedule = tvm.topi.testing.dispatch(device, _roi_align_implement)
-            b = fcompute(
-                a,
-                rois,
-                pooled_size=pooled_size,
-                spatial_scale=spatial_scale,
-                sample_ratio=sample_ratio,
-                mode=mode,
-            )
-            s = fschedule(b)
-
-        tvm_a = tvm.nd.array(a_np, ctx)
-        tvm_rois = tvm.nd.array(rois_np, ctx)
-        tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), ctx=ctx)
-        f = tvm.build(s, [a, rois, b], device)
-        f(tvm_a, tvm_rois, tvm_b)
-        tvm_val = tvm_b.asnumpy()
-        tvm.testing.assert_allclose(tvm_val, b_np, rtol=1e-3, atol=1e-4)
-
-    for device in ["llvm", "cuda", "opencl"]:
-        check_device(device)
-
-
 @tvm.testing.uses_gpu
 def test_roi_align():
     verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 0)
@@ -686,10 +624,10 @@ def test_proposal():
 
 
 if __name__ == "__main__":
-    # test_get_valid_counts()
-    # test_multibox_prior()
-    # test_multibox_detection()
+    test_get_valid_counts()
+    test_multibox_prior()
+    test_multibox_detection()
     test_roi_align()
-    # test_roi_pool()
-    # test_proposal()
-    # test_non_max_suppression()
+    test_roi_pool()
+    test_proposal()
+    test_non_max_suppression()