diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index ca2c4a2b837d..4a96d391430e 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -124,6 +124,7 @@ struct ROIAlignAttrs : public tvm::AttrsNode { double spatial_scale; int sample_ratio; std::string layout; + std::string mode; TVM_DECLARE_ATTRS(ROIAlignAttrs, "relay.attrs.ROIAlignAttrs") { TVM_ATTR_FIELD(pooled_size).describe("Output size of roi align."); TVM_ATTR_FIELD(spatial_scale) @@ -139,6 +140,8 @@ struct ROIAlignAttrs : public tvm::AttrsNode { "'N', 'C', 'H', 'W' stands for batch, channel, height, and width" "dimensions respectively. Convolution is applied on the 'H' and" "'W' dimensions."); + TVM_ATTR_FIELD(mode).set_default("avg").describe( + "Mode for ROI Align. Can be 'avg' or 'max'. The default mode is 'avg'."); } }; diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index fb3d1c923561..109e80c99783 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -1665,6 +1665,7 @@ def expand_shape(in_shape, shape): """ in_dims = infer_shape(in_shape)[0] new_dims = infer_shape(shape)[0] + if in_dims < new_dims: in_shape = _op.concatenate( [ @@ -2084,8 +2085,8 @@ def _impl_v1(cls, inputs, attr, params): rois = inputs[1] batch_indices = inputs[2] mode = attr.get("mode", b"avg") - if mode != b"avg": - raise ValueError("RoiAlign in Relay only uses avg mode") + if mode not in (b"avg", b"max"): + raise ValueError("RoiAlign in Relay only uses avg and max modes") output_height = attr.get("output_height", 1) output_width = attr.get("output_width", 1) @@ -2097,7 +2098,7 @@ def _impl_v1(cls, inputs, attr, params): rois = _op.concatenate([batch_indices, rois], 1) return _vision.roi_align( - x, rois, [output_height, output_width], spatial_scale, sampling_ratio + x, rois, [output_height, output_width], spatial_scale, sampling_ratio, mode=mode ) diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 92a72f950615..2d69a2f6942e 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -1041,6 +1041,7 @@ def wrap_compute_roi_align(topi_compute): def _compute_roi_align(attrs, inputs, out_type): assert attrs.layout == "NCHW" pooled_size = get_const_tuple(attrs.pooled_size) + mode = bytes(attrs.mode, "utf-8") return [ topi_compute( inputs[0], @@ -1048,6 +1049,7 @@ def _compute_roi_align(attrs, inputs, out_type): pooled_size=pooled_size, spatial_scale=attrs.spatial_scale, sample_ratio=attrs.sample_ratio, + mode=mode, ) ] diff --git a/python/tvm/relay/op/vision/rcnn.py b/python/tvm/relay/op/vision/rcnn.py index b87eb07d7563..d25c5de89cee 100644 --- a/python/tvm/relay/op/vision/rcnn.py +++ b/python/tvm/relay/op/vision/rcnn.py @@ -18,7 +18,7 @@ from . import _make -def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="NCHW"): +def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="NCHW", mode="avg"): """ROI align operator. Parameters @@ -40,12 +40,15 @@ def roi_align(data, rois, pooled_size, spatial_scale, sample_ratio=-1, layout="N sample_ratio : int Optional sampling ratio of ROI align, using adaptive size by default. + mode : str, Optional + The pooling method. Relay supports two methods, 'avg' and 'max'. Default is 'avg'. + Returns ------- output : relay.Expr 4-D tensor with shape [num_roi, channel, pooled_size, pooled_size] """ - return _make.roi_align(data, rois, pooled_size, spatial_scale, sample_ratio, layout) + return _make.roi_align(data, rois, pooled_size, spatial_scale, sample_ratio, layout, mode) def roi_pool(data, rois, pooled_size, spatial_scale, layout="NCHW"): diff --git a/python/tvm/te/hybrid/calls.py b/python/tvm/te/hybrid/calls.py index 6785457c3bd7..462066106a9d 100644 --- a/python/tvm/te/hybrid/calls.py +++ b/python/tvm/te/hybrid/calls.py @@ -167,3 +167,17 @@ def max_num_threads(func_id, args): _internal_assert(isinstance(args[0], _expr.IntImm), "In tvm bool should be uint") res = Target.current(args[0].value).max_num_threads return convert(res) + + +def inf(func_id, args): + """Infinity""" + _internal_assert(func_id == "inf", "This function cannot be directly invoked!") + _internal_assert(args.__len__() == 1, "One argument accepted!") + return tvm.tir.max_value(args[0]) + + +def ninf(func_id, args): + """Negative infinity""" + _internal_assert(func_id == "ninf", "This function cannot be directly invoked!") + _internal_assert(args.__len__() == 1, "One argument accepted!") + return tvm.tir.min_value(args[0]) diff --git a/python/tvm/te/hybrid/runtime.py b/python/tvm/te/hybrid/runtime.py index 7b90f8729014..615bd7e43a7d 100644 --- a/python/tvm/te/hybrid/runtime.py +++ b/python/tvm/te/hybrid/runtime.py @@ -111,6 +111,14 @@ def max_num_threads(allow_none=True): return Target.current(allow_none).max_num_threads +def inf(dtype): + return numpy.iinfo(dtype).max + + +def ninf(dtype): + return numpy.iinfo(dtype).min + + HYBRID_GLOBALS = { "unroll": range, "vectorize": range, @@ -142,6 +150,8 @@ def max_num_threads(allow_none=True): "float64": numpy.float64, "ceil_div": lambda a, b: (a + b - 1) // b, "max_num_threads": max_num_threads, + "inf": inf, + "ninf": inf, } diff --git a/python/tvm/topi/testing/roi_align_python.py b/python/tvm/topi/testing/roi_align_python.py index abef25f0b994..643a954b101b 100644 --- a/python/tvm/topi/testing/roi_align_python.py +++ b/python/tvm/topi/testing/roi_align_python.py @@ -20,12 +20,14 @@ import numpy as np -def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio): +def roi_align_nchw_python(a_np, rois_np, pooled_size, spatial_scale, sample_ratio, mode=b"avg"): """Roi align in python""" + avg_mode = mode in (b"avg", "avg", 0) + max_mode = mode in (b"max", "max", 1) + assert avg_mode or max_mode, "Mode must be average or max. Please pass a valid mode." _, channel, height, width = a_np.shape num_roi = rois_np.shape[0] b_np = np.zeros((num_roi, channel, pooled_size, pooled_size), dtype=a_np.dtype) - if isinstance(pooled_size, int): pooled_size_h = pooled_size_w = pooled_size else: @@ -76,11 +78,17 @@ def _bilinear(n, c, y, x): for c in range(channel): for ph in range(pooled_size_h): for pw in range(pooled_size_w): - total = 0.0 + if avg_mode: + total = 0.0 + if max_mode: + total = float("-inf") for iy in range(roi_bin_grid_h): for ix in range(roi_bin_grid_w): y = roi_start_h + ph * bin_h + (iy + 0.5) * bin_h / roi_bin_grid_h x = roi_start_w + pw * bin_w + (ix + 0.5) * bin_w / roi_bin_grid_w - total += _bilinear(batch_index, c, y, x) - b_np[i, c, ph, pw] = total / count + if avg_mode: + total += _bilinear(batch_index, c, y, x) / count + if max_mode: + total = max(total, _bilinear(batch_index, c, y, x)) + b_np[i, c, ph, pw] = total return b_np diff --git a/python/tvm/topi/vision/rcnn/roi_align.py b/python/tvm/topi/vision/rcnn/roi_align.py index 30824770b7b2..95f350084ba5 100644 --- a/python/tvm/topi/vision/rcnn/roi_align.py +++ b/python/tvm/topi/vision/rcnn/roi_align.py @@ -22,7 +22,7 @@ from ...cpp.utils import bilinear_sample_nchw -def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): +def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1): """ROI align operator in NCHW layout. Parameters @@ -41,6 +41,10 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal of total stride in convolutional layers, which should be in range (0.0, 1.0] + mode : int or str + There are two modes, average and max. For the average mode, you can pass b'avg' or 0, and + for the max mode, you can pass b'max' or 1. + sample_ratio : int Optional sampling ratio of ROI align, using adaptive size by default. @@ -49,6 +53,9 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): output : tvm.te.Tensor 4-D with shape [num_roi, channel, pooled_size, pooled_size] """ + avg_mode = mode in (b"avg", 0) + max_mode = mode in (b"max", 1) + assert avg_mode or max_mode, "Mode must be avg or max. Please pass in a valid mode." dtype = rois.dtype _, channel, height, width = get_const_tuple(data.shape) num_roi, _ = get_const_tuple(rois.shape) @@ -92,14 +99,25 @@ def _sample(i, c, ph, pw): rw = te.reduce_axis((0, roi_bin_grid_w)) roi_start_h += ph * bin_h roi_start_w += pw * bin_w - return te.sum( + if avg_mode: + return te.sum( + _bilinear( + batch_index, + c, + roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, + roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w, + ) + / count, + axis=[rh, rw], + ) + # max mode + return te.max( _bilinear( batch_index, c, roi_start_h + (rh + 0.5) * bin_h / roi_bin_grid_h, roi_start_w + (rw + 0.5) * bin_w / roi_bin_grid_w, - ) - / count, + ), axis=[rh, rw], ) diff --git a/python/tvm/topi/x86/roi_align.py b/python/tvm/topi/x86/roi_align.py index ac2146b558f9..336a336f50e5 100644 --- a/python/tvm/topi/x86/roi_align.py +++ b/python/tvm/topi/x86/roi_align.py @@ -17,15 +17,17 @@ # pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements """Non-maximum suppression operator for intel cpu""" import math -import tvm +import tvm from tvm.te import hybrid from ..tensor import full from ..utils import get_const_tuple @hybrid.script -def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio): +def roi_align_nchw_ir( + data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_scale, sample_ratio, mode +): """Hybrid routing fo ROI align operator in NCHW layout. Parameters @@ -57,6 +59,10 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s sample_ratio : tvm.tir.const Sampling ratio of ROI align, using adaptive size by default. + mode : tvm.tir.const + Mode of RoiAlign. A value of 0 corrensponds to b'avg', while a value of 1 corresponds to + b'max'. + Returns ------- output : tvm.te.Tensor or numpy NDArray @@ -160,10 +166,12 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s pre_calc_index = 0 for ph in range(pooled_size_h): for pw in range(pooled_size_w): - output_val = 0.0 + output_val = 0.0 # Avg mode + if mode == 1: # Max mode + output_val = ninf("float32") for iy in range(roi_bin_grid_h): for ix in range(roi_bin_grid_w): - output_val += ( + bilinear_val = ( w_pc[n, pre_calc_index, 0] * data[ roi_batch_index, @@ -194,14 +202,15 @@ def roi_align_nchw_ir(data, rois, num_rois, w_pc, pos_pc, pooled_size, spatial_s ] ) pre_calc_index += 1 - - output_val /= count - output[n, c, ph, pw] = output_val - + if mode == 0: # Avg mode + output_val += bilinear_val / count + if mode == 1: # Max mode + output_val = max(output_val, bilinear_val) + output[n, c, ph, pw] = output_val return output -def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): +def roi_align_nchw(data, rois, pooled_size, spatial_scale, mode, sample_ratio=-1): """ROI align operator in NCHW layout. Parameters @@ -220,6 +229,9 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): Ratio of input feature map height (or w) to raw image height (or w). Equals the reciprocal of total stride in convolutional layers, which should be in range (0.0, 1.0] + mode : str + Mode of RoiAlign. Should be b'max' or b'avg'. + sample_ratio : int Optional sampling ratio of ROI align, using adaptive size by default. @@ -250,6 +262,21 @@ def roi_align_nchw(data, rois, pooled_size, spatial_scale, sample_ratio=-1): pooled_size = tvm.runtime.convert(pooled_size) spatial_scale = tvm.tir.const(spatial_scale, "float32") sample_ratio = tvm.tir.const(sample_ratio, "int32") + if mode in (b"avg", 0): + mode = tvm.tir.const(0, dtype="float32") + elif mode in (b"max", 1): + mode = tvm.tir.const(1, dtype="float32") + else: + raise ValueError(mode, "Value %s passed in for mode not supported", mode) + return roi_align_nchw_ir( - data, rois, num_rois, w_pc_buffer, pos_pc_buffer, pooled_size, spatial_scale, sample_ratio + data, + rois, + num_rois, + w_pc_buffer, + pos_pc_buffer, + pooled_size, + spatial_scale, + sample_ratio, + mode, ) diff --git a/src/relay/op/vision/rcnn_op.cc b/src/relay/op/vision/rcnn_op.cc index f7bbf378d09c..c899681733f8 100644 --- a/src/relay/op/vision/rcnn_op.cc +++ b/src/relay/op/vision/rcnn_op.cc @@ -76,12 +76,13 @@ Array > ROIAlignInferCorrectLayout(const Attrs& attrs, } Expr MakeROIAlign(Expr data, Expr rois, Array pooled_size, double spatial_scale, - int sample_ratio, String layout) { + int sample_ratio, String layout, String mode) { auto attrs = make_object(); attrs->pooled_size = pooled_size; attrs->spatial_scale = spatial_scale; attrs->sample_ratio = sample_ratio; attrs->layout = layout; + attrs->mode = mode; static const Op& op = Op::Get("vision.roi_align"); return Call(op, {data, rois}, Attrs(attrs), {}); } diff --git a/tests/python/frontend/onnx/test_forward.py b/tests/python/frontend/onnx/test_forward.py index 27b91dd38f8e..59ecffe829df 100644 --- a/tests/python/frontend/onnx/test_forward.py +++ b/tests/python/frontend/onnx/test_forward.py @@ -3437,7 +3437,13 @@ def verify_topk(input_dims, K, axis=-1): @tvm.testing.uses_gpu def test_roi_align(): def verify_roi_align( - input_dims, num_roi, output_height, output_width, sampling_ratio=0, spatial_scale=1.0 + input_dims, + num_roi, + output_height, + output_width, + sampling_ratio=0, + spatial_scale=1.0, + mode="avg", ): output_dims = [num_roi, input_dims[1], output_height, output_width] @@ -3445,7 +3451,7 @@ def verify_roi_align( "RoiAlign", inputs=["X", "rois", "batch_indicies"], outputs=["Y"], - mode="avg", + mode=mode, output_height=output_height, output_width=output_width, sampling_ratio=sampling_ratio, @@ -3490,6 +3496,8 @@ def verify_roi_align( verify_roi_align((5, 4, 16, 14), 32, 7, 7, sampling_ratio=1, spatial_scale=1.0) verify_roi_align((1, 4, 16, 16), 32, 7, 7, sampling_ratio=2, spatial_scale=1.0) + # ONNX implementation of roi_align with max mode is incorrect, so we don't compare outputs here. + # @tvm.testing.uses_gpu def test_non_max_suppression(): diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 6d7d401d706b..95cd537091f5 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -583,7 +583,7 @@ def test_threshold(): @tvm.testing.uses_gpu def test_roi_align(): - def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio): + def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ratio, mode): data = relay.var("data", relay.ty.TensorType(data_shape, "float32")) rois = relay.var("rois", relay.ty.TensorType(rois_shape, "float32")) z = relay.vision.roi_align( @@ -592,6 +592,7 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ pooled_size=(pooled_size, pooled_size), spatial_scale=spatial_scale, sample_ratio=sample_ratio, + mode=mode, layout="NCHW", ) zz = run_infer_type(z) @@ -612,6 +613,7 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ pooled_size=pooled_size, spatial_scale=spatial_scale, sample_ratio=sample_ratio, + mode=mode, ) for target, ctx in tvm.testing.enabled_targets(): intrp1 = relay.create_executor("graph", ctx=ctx, target=target) @@ -621,8 +623,18 @@ def verify_roi_align(data_shape, rois_shape, pooled_size, spatial_scale, sample_ op_res2 = intrp2.evaluate(func)(np_data, np_rois) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-4) - verify_roi_align((1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1) - verify_roi_align((4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2) + verify_roi_align( + (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="avg" + ) + verify_roi_align( + (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="avg" + ) + verify_roi_align( + (1, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=1.0, sample_ratio=-1, mode="max" + ) + verify_roi_align( + (4, 4, 16, 16), (32, 5), pooled_size=7, spatial_scale=0.5, sample_ratio=2, mode="max" + ) @tvm.testing.uses_gpu diff --git a/tests/python/topi/python/test_topi_vision.py b/tests/python/topi/python/test_topi_vision.py index 697ef8a24f67..839356892ab1 100644 --- a/tests/python/topi/python/test_topi_vision.py +++ b/tests/python/topi/python/test_topi_vision.py @@ -418,7 +418,9 @@ def check_device(device): check_device(device) -def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio): +def verify_roi_align( + batch, in_channel, in_size, num_roi, pooled_size, spatial_scale, sample_ratio, mode +): # For mode, 0 = avg, 1 = max a_shape = (batch, in_channel, in_size, in_size) rois_shape = (num_roi, 5) @@ -427,8 +429,8 @@ def verify_roi_align(batch, in_channel, in_size, num_roi, pooled_size, spatial_s @memoize("topi.tests.test_topi_vision.verify_roi_align") def get_ref_data(): - a_np = np.random.uniform(size=a_shape).astype("float32") - rois_np = np.random.uniform(size=rois_shape).astype("float32") * in_size + a_np = np.random.uniform(-1, 1, size=a_shape).astype("float32") + rois_np = np.random.uniform(-1, 1, size=rois_shape).astype("float32") * in_size rois_np[:, 0] = np.random.randint(low=0, high=batch, size=num_roi) b_np = tvm.topi.testing.roi_align_nchw_python( a_np, @@ -436,6 +438,7 @@ def get_ref_data(): pooled_size=pooled_size, spatial_scale=spatial_scale, sample_ratio=sample_ratio, + mode=mode, ) return a_np, rois_np, b_np @@ -447,8 +450,6 @@ def check_device(device): if not tvm.testing.device_enabled(device): print("Skip because %s is not enabled" % device) return - print("Running on target: %s" % device) - with tvm.target.Target(device): fcompute, fschedule = tvm.topi.testing.dispatch(device, _roi_align_implement) b = fcompute( @@ -457,6 +458,7 @@ def check_device(device): pooled_size=pooled_size, spatial_scale=spatial_scale, sample_ratio=sample_ratio, + mode=mode, ) s = fschedule(b) @@ -465,7 +467,8 @@ def check_device(device): tvm_b = tvm.nd.array(np.zeros(get_const_tuple(b.shape), dtype=b.dtype), ctx=ctx) f = tvm.build(s, [a, rois, b], device) f(tvm_a, tvm_rois, tvm_b) - tvm.testing.assert_allclose(tvm_b.asnumpy(), b_np, rtol=1e-3) + tvm_val = tvm_b.asnumpy() + tvm.testing.assert_allclose(tvm_val, b_np, rtol=1e-3, atol=1e-4) for device in ["llvm", "cuda", "opencl"]: check_device(device) @@ -473,10 +476,14 @@ def check_device(device): @tvm.testing.uses_gpu def test_roi_align(): - verify_roi_align(1, 16, 32, 64, 7, 1.0, -1) - verify_roi_align(4, 16, 32, 64, 7, 0.5, 2) - verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2) - verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2) + verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 0) + verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 0) + verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 0) + verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 0) + verify_roi_align(1, 16, 32, 64, 7, 1.0, -1, 1) + verify_roi_align(4, 16, 32, 64, 7, 0.5, 2, 1) + verify_roi_align(1, 32, 32, 80, 8, 0.0625, 2, 1) + verify_roi_align(1, 32, 500, 80, 8, 0.0625, 2, 1) def verify_roi_pool(batch, in_channel, in_size, num_roi, pooled_size, spatial_scale):