diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h
index ca2c4a2b837d..4a96d391430e 100644
--- a/include/tvm/relay/attrs/vision.h
+++ b/include/tvm/relay/attrs/vision.h
@@ -124,6 +124,7 @@ struct ROIAlignAttrs : public tvm::AttrsNode<ROIAlignAttrs> {
   double spatial_scale;
   int sample_ratio;
   std::string layout;
+  std::string mode;
   TVM_DECLARE_ATTRS(ROIAlignAttrs, "relay.attrs.ROIAlignAttrs") {
     TVM_ATTR_FIELD(pooled_size).describe("Output size of roi align.");
     TVM_ATTR_FIELD(spatial_scale)
@@ -139,6 +140,8 @@ struct ROIAlignAttrs : public tvm::AttrsNode<ROIAlignAttrs> {
         "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
         "dimensions respectively. Convolution is applied on the 'H' and"
         "'W' dimensions.");
+    TVM_ATTR_FIELD(mode).set_default("avg").describe(
+        "Mode for ROI Align. Can be 'avg' or 'max'. The default mode is 'avg'.");
   }
 };
 
diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index fb3d1c923561..109e80c99783 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -1665,6 +1665,7 @@ def expand_shape(in_shape, shape):
             """
             in_dims = infer_shape(in_shape)[0]
             new_dims = infer_shape(shape)[0]
+
             if in_dims < new_dims:
                 in_shape = _op.concatenate(
                     [
@@ -2084,8 +2085,8 @@ def _impl_v1(cls, inputs, attr, params):
         rois = inputs[1]
         batch_indices = inputs[2]
         mode = attr.get("mode", b"avg")
-        if mode != b"avg":
-            raise ValueError("RoiAlign in Relay only uses avg mode")
+        if mode not in (b"avg", b"max"):
+            raise ValueError("RoiAlign in Relay only uses avg and max modes")
         output_height = attr.get("output_height", 1)
         output_width = attr.get("output_width", 1)
 
@@ -2097,7 +2098,7 @@ def _impl_v1(cls, inputs, attr, params):
         rois = _op.concatenate([batch_indices, rois], 1)
 
         return _vision.roi_align(
-            x, rois, [output_height, output_width], spatial_scale, sampling_ratio
+            x, rois, [output_height, output_width], spatial_scale, sampling_ratio, mode=mode
         )
 
 
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 92a72f950615..2d69a2f6942e 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -1041,6 +1041,7 @@ def wrap_compute_roi_align(topi_compute):
     def _compute_roi_align(attrs, inputs, out_type):
         assert attrs.layout == "NCHW"
         pooled_size = get_const_tuple(attrs.pooled_size)
+        mode = bytes(attrs.mode, "utf-8")
         return [
             topi_compute(
                 inputs[0],
@@ -1048,6 +1049,7 @@ def _compute_roi_align(attrs, inputs, out_type):
                 pooled_size=pooled_size,
                 spatial_scale=attrs.spatial_scale,
                 sample_ratio=attrs.sample_ratio,
+                mode=mode,
             )
         ]
 
diff --git a/python/tvm/relay/op/vision/rcnn.py b/python/tvm/relay/op/vision/rcnn.py
index b87eb07d7563..d25c5de89cee 100644
--- a/python/tvm/relay/op/vision/rcnn.py
+++ b/python/tvm/relay/op/vision/rcnn.py
@@ -18,7 +18,7 @@
 from . import _make
 
 
-def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="NCHW"):
+def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="NCHW", mode="avg"):
     """ROI align operator.
 
     Parameters
@@ -40,12 +40,15 @@ def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="N
     sample_ratio : int
         Optional sampling ratio of ROI align, using adaptive size by default.
 
+    mode : str, Optional
+        The pooling method. Relay supports two methods, 'avg' and 'max'. Default is 'avg'.
+
     Returns
     -------
     output : relay.Expr
         4-D tensor with shape [num_roi, channel, pooled_size, pooled_size]
     """
-    return _make.roi_align(data, rois, pooled_size, spatial_scale, sample_ratio, layout)
+    return _make.roi_align(data, rois, pooled_size, spatial_scale, sample_ratio, layout, mode)
 
 
 def roi_pool(data, rois, pooled_size, spatial_scale, layout="NCHW"):
diff --git a/python/tvm/te/hybrid/calls.py b/python/tvm/te/hybrid/calls.py
index 6785457c3bd7..462066106a9d 100644
--- a/python/tvm/te/hybrid/calls.py
+++ b/python/tvm/te/hybrid/calls.py
@@ -167,3 +167,17 @@ def max_num_threads(func_id, args):
         _internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint")
         res = Target.current(args[0].value).max_num_threads
     return convert(res)
+
+
+def inf(func_id, args):
+    """Infinity"""
+    _internal_assert(func_id == "inf", "This function cannot be directly invoked!")
+    _internal_assert(args.__len__() == 1, "One argument accepted!")
+    return tvm.tir.max_value(args[0])
+
+
+def ninf(func_id, args):
+    """Negative infinity"""
+    _internal_assert(func_id == "ninf", "This function cannot be directly invoked!")
+    _internal_assert(args.__len__() == 1, "One argument accepted!")
+    return tvm.tir.min_value(args[0])
diff --git a/python/tvm/te/hybrid/runtime.py b/python/tvm/te/hybrid/runtime.py
index 7b90f8729014..615bd7e43a7d 100644
--- a/python/tvm/te/hybrid/runtime.py
+++ b/python/tvm/te/hybrid/runtime.py
@@ -111,6 +111,14 @@ def max_num_threads(allow_none=True):
     return Target.current(allow_none).max_num_threads
 
 
+def inf(dtype):
+    return numpy.iinfo(dtype).max
+
+
+def ninf(dtype):
+    return numpy.iinfo(dtype).min
+
+
 HYBRID_GLOBALS = {
     "unroll": range,
     "vectorize": range,
@@ -142,6 +150,8 @@ def max_num_threads(allow_none=True):
     "float64": numpy.float64,
     "ceil_div": lambda a, b: (a + b - 1) // b,
     "max_num_threads": max_num_threads,
+    "inf": inf,
+    "ninf": inf,
 }
 
 
diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py
index abef25f0b994..643a954b101b 100644
--- a/python/tvm/topi/testing/roi_align_python.py
+++ b/python/tvm/topi/testing/roi_align_python.py
@@ -20,12 +20,14 @@
 import numpy as np
 
 
-def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio):
+def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"):
     """Roi align in python"""
+    avg_mode = mode in (b"avg", "avg", 0)
+    max_mode = mode in (b"max", "max", 1)
+    assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode."
     _, channel, height, width = a_np.shape
     num_roi = rois_np.shape[0]
     b_np = np.zeros((num_roi, channel, pooled_size, pooled_size), dtype=a_np.dtype)
-
     if isinstance(pooled_size, int):
         pooled_size_h = pooled_size_w = pooled_size
     else:
@@ -76,11 +78,17 @@ def _bilinear(n, c, y, x):
         for c in range(channel):
             for ph in range(pooled_size_h):
                 for pw in range(pooled_size_w):
-                    total = 0.0
+                    if avg_mode:
+                        total = 0.0
+                    if max_mode:
+                        total = float("-inf")
                     for iy in range(roi_bin_grid_h):
                         for ix in range(roi_bin_grid_w):
                             y = roi_start_h + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h
                             x = roi_start_w + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w
-                            total += _bilinear(batch_index, c, y, x)
-                    b_np[i, c, ph, pw] = total / count
+                            if avg_mode:
+                                total += _bilinear(batch_index, c, y, x) / count
+                            if max_mode:
+                                total = max(total, _bilinear(batch_index, c, y, x))
+                    b_np[i, c, ph, pw] = total
     return b_np
diff --git a/python/tvm/topi/vision/rcnn/roi_align.py b/python/tvm/topi/vision/rcnn/roi_align.py
index 30824770b7b2..95f350084ba5 100644
--- a/python/tvm/topi/vision/rcnn/roi_align.py
+++ b/python/tvm/topi/vision/rcnn/roi_align.py
@@ -22,7 +22,7 @@
 from ...cpp.utils import bilinear_sample_nchw
 
 
-def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
+def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1):
     """ROI align operator in NCHW layout.
 
     Parameters
@@ -41,6 +41,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
         Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal
         of total stride in convolutional layers, which should be in range (0.0, 1.0]
 
+    mode : int or str
+        There are two modes, average and max. For the average mode, you can pass b'avg' or 0, and
+        for the max mode, you can pass b'max' or 1.
+
     sample_ratio : int
         Optional sampling ratio of ROI align, using adaptive size by default.
 
@@ -49,6 +53,9 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
     output : tvm.te.Tensor
         4-D with shape [num_roi, channel, pooled_size, pooled_size]
     """
+    avg_mode = mode in (b"avg", 0)
+    max_mode = mode in (b"max", 1)
+    assert avg_mode or max_mode, "Mode must be avg or max. Please pass in a valid mode."
     dtype = rois.dtype
     _, channel, height, width = get_const_tuple(data.shape)
     num_roi, _ = get_const_tuple(rois.shape)
@@ -92,14 +99,25 @@ def _sample(i, c, ph, pw):
         rw = te.reduce_axis((0, roi_bin_grid_w))
         roi_start_h += ph * bin_h
         roi_start_w += pw * bin_w
-        return te.sum(
+        if avg_mode:
+            return te.sum(
+                _bilinear(
+                    batch_index,
+                    c,
+                    roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
+                    roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
+                )
+                / count,
+                axis=[rh, rw],
+            )
+        # max mode
+        return te.max(
             _bilinear(
                 batch_index,
                 c,
                 roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h,
                 roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w,
-            )
-            / count,
+            ),
             axis=[rh, rw],
         )
 
diff --git a/python/tvm/topi/x86/roi_align.py b/python/tvm/topi/x86/roi_align.py
index ac2146b558f9..336a336f50e5 100644
--- a/python/tvm/topi/x86/roi_align.py
+++ b/python/tvm/topi/x86/roi_align.py
@@ -17,15 +17,17 @@
 # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements
 """Non-maximum suppression operator for intel cpu"""
 import math
-import tvm
 
+import tvm
 from tvm.te import hybrid
 from ..tensor import full
 from ..utils import get_const_tuple
 
 
 @hybrid.script
-def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio):
+def roi_align_nchw_ir(
+    data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio, mode
+):
     """Hybrid routing fo ROI align operator in NCHW layout.
 
     Parameters
@@ -57,6 +59,10 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s
     sample_ratio : tvm.tir.const
         Sampling ratio of ROI align, using adaptive size by default.
 
+    mode : tvm.tir.const
+        Mode of RoiAlign. A value of 0 corrensponds to b'avg', while a value of 1 corresponds to
+        b'max'.
+
     Returns
     -------
     output : tvm.te.Tensor or numpy NDArray
@@ -160,10 +166,12 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s
             pre_calc_index = 0
             for ph in range(pooled_size_h):
                 for pw in range(pooled_size_w):
-                    output_val = 0.0
+                    output_val = 0.0  # Avg mode
+                    if mode == 1:  # Max mode
+                        output_val = ninf("float32")
                     for iy in range(roi_bin_grid_h):
                         for ix in range(roi_bin_grid_w):
-                            output_val += (
+                            bilinear_val = (
                                 w_pc[n, pre_calc_index, 0]
                                 * data[
                                     roi_batch_index,
@@ -194,14 +202,15 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s
                                 ]
                             )
                             pre_calc_index += 1
-
-                    output_val /= count
-                    output[n, c, ph, pw] = output_val
-
+                            if mode == 0:  # Avg mode
+                                output_val += bilinear_val / count
+                            if mode == 1:  # Max mode
+                                output_val = max(output_val, bilinear_val)
+                        output[n, c, ph, pw] = output_val
     return output
 
 
-def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
+def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1):
     """ROI align operator in NCHW layout.
 
     Parameters
@@ -220,6 +229,9 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
         Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal
         of total stride in convolutional layers, which should be in range (0.0, 1.0]
 
+    mode : str
+        Mode of RoiAlign. Should be b'max' or b'avg'.
+
     sample_ratio : int
         Optional sampling ratio of ROI align, using adaptive size by default.
 
@@ -250,6 +262,21 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1):
     pooled_size = tvm.runtime.convert(pooled_size)
     spatial_scale = tvm.tir.const(spatial_scale, "float32")
     sample_ratio = tvm.tir.const(sample_ratio, "int32")
+    if mode in (b"avg", 0):
+        mode = tvm.tir.const(0, dtype="float32")
+    elif mode in (b"max", 1):
+        mode = tvm.tir.const(1, dtype="float32")
+    else:
+        raise ValueError(mode, "Value %s passed in for mode not supported", mode)
+
     return roi_align_nchw_ir(
-        data, rois, num_rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio
+        data,
+        rois,
+        num_rois,
+        w_pc_buffer,
+        pos_pc_buffer,
+        pooled_size,
+        spatial_scale,
+        sample_ratio,
+        mode,
     )
diff --git a/src/relay/op/vision/rcnn_op.cc b/src/relay/op/vision/rcnn_op.cc
index f7bbf378d09c..c899681733f8 100644
--- a/src/relay/op/vision/rcnn_op.cc
+++ b/src/relay/op/vision/rcnn_op.cc
@@ -76,12 +76,13 @@ Array<Array<Layout> > ROIAlignInferCorrectLayout(const Attrs& attrs,
 }
 
 Expr MakeROIAlign(Expr data, Expr rois, Array<IndexExpr> pooled_size, double spatial_scale,
-                  int sample_ratio, String layout) {
+                  int sample_ratio, String layout, String mode) {
   auto attrs = make_object<ROIAlignAttrs>();
   attrs->pooled_size = pooled_size;
   attrs->spatial_scale = spatial_scale;
   attrs->sample_ratio = sample_ratio;
   attrs->layout = layout;
+  attrs->mode = mode;
   static const Op& op = Op::Get("vision.roi_align");
   return Call(op, {data, rois}, Attrs(attrs), {});
 }
diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py
index 27b91dd38f8e..59ecffe829df 100644
--- a/tests/python/frontend/onnx/test_forward.py
+++ b/tests/python/frontend/onnx/test_forward.py
@@ -3437,7 +3437,13 @@ def verify_topk(input_dims, K, axis=-1):
 @tvm.testing.uses_gpu
 def test_roi_align():
     def verify_roi_align(
-        input_dims, num_roi, output_height, output_width, sampling_ratio=0, spatial_scale=1.0
+        input_dims,
+        num_roi,
+        output_height,
+        output_width,
+        sampling_ratio=0,
+        spatial_scale=1.0,
+        mode="avg",
     ):
         output_dims = [num_roi, input_dims[1], output_height, output_width]
 
@@ -3445,7 +3451,7 @@ def verify_roi_align(
             "RoiAlign",
             inputs=["X", "rois", "batch_indicies"],
             outputs=["Y"],
-            mode="avg",
+            mode=mode,
             output_height=output_height,
             output_width=output_width,
             sampling_ratio=sampling_ratio,
@@ -3490,6 +3496,8 @@ def verify_roi_align(
     verify_roi_align((5, 4, 16, 14), 32, 7, 7, sampling_ratio=1, spatial_scale=1.0)
     verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0)
 
+    # ONNX implementation of roi_align with max mode is incorrect, so we don't compare outputs here.
+
 
 # @tvm.testing.uses_gpu
 def test_non_max_suppression():
diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py
index 6d7d401d706b..95cd537091f5 100644
--- a/tests/python/relay/test_op_level5.py
+++ b/tests/python/relay/test_op_level5.py
@@ -583,7 +583,7 @@ def test_threshold():
 
 @tvm.testing.uses_gpu
 def test_roi_align():
-    def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio):
+    def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode):
         data = relay.var("data", relay.ty.TensorType(data_shape, "float32"))
         rois = relay.var("rois", relay.ty.TensorType(rois_shape, "float32"))
         z = relay.vision.roi_align(
@@ -592,6 +592,7 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             pooled_size=(pooled_size, pooled_size),
             spatial_scale=spatial_scale,
             sample_ratio=sample_ratio,
+            mode=mode,
             layout="NCHW",
         )
         zz = run_infer_type(z)
@@ -612,6 +613,7 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             pooled_size=pooled_size,
             spatial_scale=spatial_scale,
             sample_ratio=sample_ratio,
+            mode=mode,
         )
         for target, ctx in tvm.testing.enabled_targets():
             intrp1 = relay.create_executor("graph", ctx=ctx, target=target)
@@ -621,8 +623,18 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_
             op_res2 = intrp2.evaluate(func)(np_data, np_rois)
             tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-4)
 
-    verify_roi_align((1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1)
-    verify_roi_align((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2)
+    verify_roi_align(
+        (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="avg"
+    )
+    verify_roi_align(
+        (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="avg"
+    )
+    verify_roi_align(
+        (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="max"
+    )
+    verify_roi_align(
+        (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="max"
+    )
 
 
 @tvm.testing.uses_gpu
diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py
index 697ef8a24f67..839356892ab1 100644
--- a/tests/python/topi/python/test_topi_vision.py
+++ b/tests/python/topi/python/test_topi_vision.py
@@ -418,7 +418,9 @@ def check_device(device):
         check_device(device)
 
 
-def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio):
+def verify_roi_align(
+    batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio, mode
+):  # For mode, 0 = avg, 1 = max
     a_shape = (batch, in_channel, in_size, in_size)
     rois_shape = (num_roi, 5)
 
@@ -427,8 +429,8 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s
 
     @memoize("topi.tests.test_topi_vision.verify_roi_align")
     def get_ref_data():
-        a_np = np.random.uniform(size=a_shape).astype("float32")
-        rois_np = np.random.uniform(size=rois_shape).astype("float32") * in_size
+        a_np = np.random.uniform(-1, 1, size=a_shape).astype("float32")
+        rois_np = np.random.uniform(-1, 1, size=rois_shape).astype("float32") * in_size
         rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi)
         b_np = tvm.topi.testing.roi_align_nchw_python(
             a_np,
@@ -436,6 +438,7 @@ def get_ref_data():
             pooled_size=pooled_size,
             spatial_scale=spatial_scale,
             sample_ratio=sample_ratio,
+            mode=mode,
         )
 
         return a_np, rois_np, b_np
@@ -447,8 +450,6 @@ def check_device(device):
         if not tvm.testing.device_enabled(device):
             print("Skip because %s is not enabled" % device)
             return
-        print("Running on target: %s" % device)
-
         with tvm.target.Target(device):
             fcompute, fschedule = tvm.topi.testing.dispatch(device, _roi_align_implement)
             b = fcompute(
@@ -457,6 +458,7 @@ def check_device(device):
                 pooled_size=pooled_size,
                 spatial_scale=spatial_scale,
                 sample_ratio=sample_ratio,
+                mode=mode,
             )
             s = fschedule(b)
 
@@ -465,7 +467,8 @@ def check_device(device):
         tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), ctx=ctx)
         f = tvm.build(s, [a, rois, b], device)
         f(tvm_a, tvm_rois, tvm_b)
-        tvm.testing.assert_allclose(tvm_b.asnumpy(), b_np, rtol=1e-3)
+        tvm_val = tvm_b.asnumpy()
+        tvm.testing.assert_allclose(tvm_val, b_np, rtol=1e-3, atol=1e-4)
 
     for device in ["llvm", "cuda", "opencl"]:
         check_device(device)
@@ -473,10 +476,14 @@ def check_device(device):
 
 @tvm.testing.uses_gpu
 def test_roi_align():
-    verify_roi_align(1, 16, 32, 64, 7, 1.0, -1)
-    verify_roi_align(4, 16, 32, 64, 7, 0.5, 2)
-    verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2)
-    verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2)
+    verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 0)
+    verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 0)
+    verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 0)
+    verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 0)
+    verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 1)
+    verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 1)
+    verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 1)
+    verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 1)
 
 
 def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_scale):