From 73e49c11bcfd3b647eec0988d34d6ce0fec2e787 Mon Sep 17 00:00:00 2001 From: Matthew Brookhart Date: Wed, 25 Nov 2020 11:21:30 -0700 Subject: [PATCH 1/6] Add sort op to relay --- python/tvm/relay/op/_algorithm.py | 4 + python/tvm/relay/op/algorithm.py | 22 ++++ python/tvm/relay/op/dyn/_algorithm.py | 1 + python/tvm/relay/op/strategy/cuda.py | 20 ++++ python/tvm/relay/op/strategy/generic.py | 24 +++++ python/tvm/topi/cuda/sort.py | 100 ++++++++++++++++++ python/tvm/topi/generic/sort.py | 17 +++ python/tvm/topi/sort.py | 44 +++++++- src/relay/op/algorithm/sort.cc | 65 ++++++++++++ src/runtime/contrib/sort/sort.cc | 63 ++++++++++- .../relay/dyn/test_dynamic_op_level6.py | 4 +- tests/python/relay/test_op_level6.py | 24 +++++ tests/python/topi/python/test_topi_sort.py | 53 ++++++++++ 13 files changed, 436 insertions(+), 5 deletions(-) create mode 100644 src/relay/op/algorithm/sort.cc diff --git a/python/tvm/relay/op/_algorithm.py b/python/tvm/relay/op/_algorithm.py index 732d5016755a..3b15f0112e4e 100644 --- a/python/tvm/relay/op/_algorithm.py +++ b/python/tvm/relay/op/_algorithm.py @@ -26,6 +26,10 @@ from .op import OpPattern, register_pattern from .op import register_strategy +# sort +register_strategy("sort", strategy.sort_strategy) +register_pattern("sort", OpPattern.OPAQUE) + # argsort register_strategy("argsort", strategy.argsort_strategy) register_pattern("argsort", OpPattern.OPAQUE) diff --git a/python/tvm/relay/op/algorithm.py b/python/tvm/relay/op/algorithm.py index e0550543f4b8..99140fcb3e11 100644 --- a/python/tvm/relay/op/algorithm.py +++ b/python/tvm/relay/op/algorithm.py @@ -22,6 +22,28 @@ from ..expr import TupleWrapper, Expr, Constant +def sort(data, axis=-1, is_ascend=1): + """Performs sorting along the given axis and returns data in sorted order. + + Parameters + ---------- + data : relay.Expr + The input data tensor. + + axis : int, optional + Axis long which to sort the input tensor. + + is_ascend : boolean, optional + Whether to sort in ascending or descending order. + + Returns + ------- + out : relay.Expr + Tensor with same shape as data. + """ + return _make.sort(data, axis, is_ascend) + + def argsort(data, axis=-1, is_ascend=1, dtype="int32"): """Performs sorting along the given axis and returns an array of indicies having same shape as an input array that index data in sorted order. diff --git a/python/tvm/relay/op/dyn/_algorithm.py b/python/tvm/relay/op/dyn/_algorithm.py index ba903e680bbd..fa2678a459af 100644 --- a/python/tvm/relay/op/dyn/_algorithm.py +++ b/python/tvm/relay/op/dyn/_algorithm.py @@ -20,6 +20,7 @@ from tvm.te.hybrid import script from tvm.runtime import convert +from tvm import topi from .. import strategy from .. import op as _reg diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 326a184579e0..c0fc43d9f558 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -770,6 +770,26 @@ def scatter_nd_cuda(attrs, inputs, out_type, target): name="scatter_nd.cuda", plevel=10, ) + + +@sort_strategy.register(["cuda", "gpu"]) +def sort_strategy_cuda(attrs, inputs, out_type, target): + """sort cuda strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_sort(topi.cuda.sort), + wrap_topi_schedule(topi.cuda.schedule_sort), + name="sort.cuda", + ) + if target.kind.name == "cuda" and get_global_func( + "tvm.contrib.thrust.sort", allow_missing=True + ): + strategy.add_implementation( + wrap_compute_sort(topi.cuda.sort_thrust), + wrap_topi_schedule(topi.cuda.schedule_sort), + name="sort_thrust.cuda", + plevel=15, + ) return strategy diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 18b3dd7e41cc..24a81e4dfa1f 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -769,6 +769,30 @@ def schedule_sparse_transpose(attrs, outs, target): return topi.generic.schedule_sparse_transpose(outs) +# sort +def wrap_compute_sort(topi_compute): + """Wrap sort topi compute""" + + def _compute_sort(attrs, inputs, _): + axis = get_const_int(attrs.axis) + is_ascend = bool(get_const_int(attrs.is_ascend)) + return [topi_compute(inputs[0], axis=axis, is_ascend=is_ascend)] + + return _compute_sort + + +@override_native_generic_func("sort_strategy") +def sort_strategy(attrs, inputs, out_type, target): + """sort generic strategy""" + strategy = _op.OpStrategy() + strategy.add_implementation( + wrap_compute_sort(topi.sort), + wrap_topi_schedule(topi.generic.schedule_sort), + name="sort.generic", + ) + return strategy + + # argsort def wrap_compute_argsort(topi_compute): """Wrap argsort topi compute""" diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 329f0fb897e5..47684b8f778e 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -316,6 +316,89 @@ def argsort_nms_thrust(data, valid_count, axis=-1, is_ascend=1, dtype="float32") return out[1] +def sort(data, axis=-1, is_ascend=1): + """Performs sorting along the given axis and returns an array of indicies + having same shape as an input array that index data in sorted order. + + Parameters + ---------- + data: tvm.te.Tensor + The input array. + + axis : int, optional + Axis long which to sort the input tensor. + + is_ascend : boolean, optional + Whether to sort in ascending or descending order. + + Returns + ------- + out : tvm.te.Tensor + The output of this function. + """ + dtype = "float32" + value_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out = te.extern( + [data.shape, data.shape], + [data], + lambda ins, outs: sort_ir(ins[0], outs[0], axis, is_ascend, indices_out=outs[1]), + out_buffers=[value_buf, indices_buf], + name="sort_gpu", + tag="sort_gpu", + )[0] + return out + + +def sort_thrust(data, axis=-1, is_ascend=1): + """Performs sorting along the given axis and returns an array of indicies + having same shape as an input array that index data in sorted order. + + Parameters + ---------- + data: tvm.te.Tensor + The input array. + + axis : int, optional + Axis long which to sort the input tensor. + + is_ascend : boolean, optional + Whether to sort in ascending or descending order. + + Returns + ------- + out : tvm.te.Tensor + The output of this function. + """ + dtype = "float32" + + ndim = len(data.shape) + axis = ndim + axis if axis < 0 else axis + + if axis != ndim - 1: + # Prepare for sorting along axis -1. + axes = swap(list(range(ndim)), axis) + data = transpose(data, axes) + + value_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "value_buf", data_alignment=8) + indices_buf = tvm.tir.decl_buffer(data.shape, dtype, "out_buf", data_alignment=8) + out = te.extern( + [data.shape, data.shape], + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.thrust.sort", ins[0], outs[0], outs[1], is_ascend + ), + out_buffers=[value_buf, indices_buf], + name="sort_gpu", + tag="sort_gpu", + )[0] + + if axis != ndim - 1: + axes = swap(list(range(ndim)), axis) + out = transpose(out, axes) + return out + + def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): """Performs sorting along the given axis and returns an array of indicies having same shape as an input array that index data in sorted order. @@ -408,6 +491,23 @@ def argsort_thrust(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32" return out +def schedule_sort(outs): + """Schedule for sort operator. + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of argsort + in the format of an array of tensors. + + Returns + ------- + s: Schedule + The computation schedule for the op. + """ + return _schedule_sort(outs) + + def schedule_argsort(outs): """Schedule for argsort operator. diff --git a/python/tvm/topi/generic/sort.py b/python/tvm/topi/generic/sort.py index 16e6a5d70881..65df7a1a2569 100644 --- a/python/tvm/topi/generic/sort.py +++ b/python/tvm/topi/generic/sort.py @@ -20,6 +20,23 @@ from .default import default_schedule as _default_schedule +def schedule_sort(outs): + """Schedule for sort operator. + + Parameters + ---------- + outs: Array of Tensor + The indices that would sort an input array along + the given axis. + + Returns + ------- + s: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs, False) + + def schedule_argsort(outs): """Schedule for argsort operator. diff --git a/python/tvm/topi/sort.py b/python/tvm/topi/sort.py index 98a1080660fb..576d0424634f 100644 --- a/python/tvm/topi/sort.py +++ b/python/tvm/topi/sort.py @@ -14,13 +14,55 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# pylint: disable=too-many-arguments +# pylint: disable=too-many-arguments, unused-argument """Argsort operator""" import tvm from tvm import te from .utils import get_const_tuple +def sort(data, axis=-1, is_ascend=1): + """Performs sorting along the given axis and returns an array + in sorted order. + + Parameters + ---------- + data : tvm.te.Tensor + The input tensor. + + axis : int, optional + Axis along which to sort the input tensor. + By default the flattened array is used. + + is_ascend : boolean, optional + Whether to sort in ascending or descending order. + + dtype : string, optional + DType of the output indices. + + Returns + ------- + out : tvm.te.Tensor + Sorted index tensor. + + """ + data_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) + out_buf = tvm.tir.decl_buffer(data.shape, data.dtype, "out_buf", data_alignment=8) + out = te.extern( + data.shape, + [data], + lambda ins, outs: tvm.tir.call_packed( + "tvm.contrib.sort.sort", ins[0], outs[0], axis, is_ascend + ), + dtype=data.dtype, + in_buffers=[data_buf], + out_buffers=out_buf, + name="sort_cpu", + tag="sort_cpu", + ) + return out + + def argsort(data, valid_count=None, axis=-1, is_ascend=1, dtype="float32"): """Performs sorting along the given axis and returns an array of indices having the same shape as an input array that index diff --git a/src/relay/op/algorithm/sort.cc b/src/relay/op/algorithm/sort.cc new file mode 100644 index 000000000000..69a6ae55c71d --- /dev/null +++ b/src/relay/op/algorithm/sort.cc @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file sort.cc + * \brief Sort operators + */ +#include +#include + +namespace tvm { +namespace relay { + +bool SortRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // `types` contains: [data, result] + ICHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + if (data == nullptr) { + ICHECK(types[0].as()) + << "Sort: expect input type to be TensorType but get " << types[0]; + return false; + } + reporter->Assign(types[1], TensorType(data->shape, data->dtype)); + return true; +} + +Expr MakeSort(Expr data, int axis, bool is_ascend) { + auto attrs = make_object(); + attrs->axis = axis; + attrs->is_ascend = is_ascend; + static const Op& op = Op::Get("sort"); + return Call(op, {data}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.sort").set_body_typed(MakeSort); + +RELAY_REGISTER_OP("sort") + .describe(R"doc(Returns the indices that would sort an +input array along the given axis. +)doc" TVM_ADD_FILELINE) + .set_num_inputs(1) + .set_attrs_type() + .add_argument("data", "Tensor", "Input data.") + .set_support_level(6) + .add_type_rel("Sort", SortRel); + +} // namespace relay +} // namespace tvm diff --git a/src/runtime/contrib/sort/sort.cc b/src/runtime/contrib/sort/sort.cc index 31cf38d7d7a5..fba57d923b38 100644 --- a/src/runtime/contrib/sort/sort.cc +++ b/src/runtime/contrib/sort/sort.cc @@ -125,7 +125,7 @@ TVM_REGISTER_GLOBAL("tvm.contrib.sort.argsort_nms").set_body([](TVMArgs args, TV }); template -void argsort(DLTensor* input, DLTensor* output, int32_t axis, bool is_ascend) { +void sort_impl(DLTensor* input, DLTensor* output, int32_t axis, bool is_ascend, bool is_argsort) { auto data_ptr = static_cast(input->data); auto out_ptr = static_cast(output->data); std::vector> sorter; @@ -153,13 +153,29 @@ void argsort(DLTensor* input, DLTensor* output, int32_t axis, bool is_ascend) { } else { std::stable_sort(sorter.begin(), sorter.end(), CompareDescend); } - for (int64_t k = 0; k < input->shape[axis]; ++k) { - out_ptr[base_idx + k * axis_mul_after] = static_cast(sorter[k].first); + if (is_argsort) { + for (int64_t k = 0; k < input->shape[axis]; ++k) { + out_ptr[base_idx + k * axis_mul_after] = static_cast(sorter[k].first); + } + } else { + for (int64_t k = 0; k < input->shape[axis]; ++k) { + out_ptr[base_idx + k * axis_mul_after] = static_cast(sorter[k].second); + } } } } } +template +void argsort(DLTensor* input, DLTensor* output, int32_t axis, bool is_ascend) { + return sort_impl(input, output, axis, is_ascend, true); +} + +template +void sort(DLTensor* input, DLTensor* output, int32_t axis, bool is_ascend) { + return sort_impl(input, output, axis, is_ascend, false); +} + // Argsort implemented C library sort. // Return indices of sorted tensor. // By default, the last axis will be used to sort. @@ -243,6 +259,47 @@ TVM_REGISTER_GLOBAL("tvm.contrib.sort.argsort").set_body([](TVMArgs args, TVMRet } }); +// Sort implemented C library sort. +// Return sorted tensor. +// By default, the last axis will be used to sort. +// sort_num specify the number of elements to be sorted. +// If input tensor has dimension (d0, d1, ..., d(k-1), dk, d(k+1), ..., d(n-1)) +// and sort axis is dk. sort_num should have dimension of +// (d1, d2, ..., d(k-1), d(k+1), ..., dn). +TVM_REGISTER_GLOBAL("tvm.contrib.sort.sort").set_body([](TVMArgs args, TVMRetValue* ret) { + DLTensor* input = args[0]; + DLTensor* output = args[1]; + int32_t axis = args[2]; + bool is_ascend = args[3]; + if (axis < 0) { + axis = input->ndim + axis; + } + ICHECK_LT(axis, input->ndim) << "Axis out of boundary for " + "input ndim " + << input->ndim; + + auto data_dtype = DLDataType2String(input->dtype); + auto out_dtype = DLDataType2String(output->dtype); + + ICHECK_EQ(data_dtype, out_dtype); + + if (data_dtype == "float32") { + sort(input, output, axis, is_ascend); + } else if (data_dtype == "float64") { + sort(input, output, axis, is_ascend); +#if (__ARM_FEATURE_FP16_SCALAR_ARITHMETIC == 1) + } else if (data_dtype == "float16") { + sort<__fp16, __fp16>(input, output, axis, is_ascend); +#endif + } else if (data_dtype == "int32") { + sort(input, output, axis, is_ascend); + } else if (data_dtype == "int64") { + sort(input, output, axis, is_ascend); + } else { + LOG(FATAL) << "Unsupported input dtype: " << data_dtype; + } +}); + template void topk(DLTensor* input, DLTensor* out_values, DLTensor* out_indices, int k, int axis, bool is_ascend) { diff --git a/tests/python/relay/dyn/test_dynamic_op_level6.py b/tests/python/relay/dyn/test_dynamic_op_level6.py index 52abbe2a15b6..ec6bd64d33e2 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level6.py +++ b/tests/python/relay/dyn/test_dynamic_op_level6.py @@ -53,7 +53,9 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): np_indices = np_indices.astype(dtype) for target, ctx in tvm.testing.enabled_targets(): - for kind in ["vm", "debug"]: + # only test with vm, graph runtime doesn't support dynamic shapes + # and debug runtime doesn't support legalization for cuda kernel + for kind in ["vm"]: mod = tvm.ir.IRModule.from_expr(func) intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target) op_res = intrp.evaluate()(np_data, np.array([k]).astype("float32")) diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py index de51c1502603..6e687658c60c 100644 --- a/tests/python/relay/test_op_level6.py +++ b/tests/python/relay/test_op_level6.py @@ -23,6 +23,29 @@ import tvm.testing +@tvm.testing.uses_gpu +def test_sort(): + def verify_sort(shape, axis, is_ascend): + x = relay.var("x", relay.TensorType(shape, "float32")) + z = relay.sort(x, axis=axis, is_ascend=is_ascend) + func = relay.Function([x], z) + x_data = np.random.uniform(size=shape).astype("float32") + if is_ascend: + ref_res = np.sort(x_data, axis=axis) + else: + ref_res = -np.sort(-x_data, axis=axis) + + for target, ctx in tvm.testing.enabled_targets(): + for kind in ["graph", "debug"]: + intrp = relay.create_executor(kind, ctx=ctx, target=target) + op_res = intrp.evaluate(func)(x_data) + tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5) + + verify_sort((2, 3, 4), axis=0, is_ascend=False) + verify_sort((1, 4, 6), axis=1, is_ascend=True) + verify_sort((3, 5, 6), axis=-1, is_ascend=False) + + @tvm.testing.uses_gpu def test_argsort(): def verify_argsort(shape, axis, is_ascend, dtype): @@ -95,5 +118,6 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): if __name__ == "__main__": + test_sort() test_argsort() test_topk() diff --git a/tests/python/topi/python/test_topi_sort.py b/tests/python/topi/python/test_topi_sort.py index 7e0c982e0c42..626218f30144 100644 --- a/tests/python/topi/python/test_topi_sort.py +++ b/tests/python/topi/python/test_topi_sort.py @@ -23,6 +23,11 @@ import tvm.topi.testing import tvm.testing +_sort_implement = { + "generic": (topi.sort, topi.generic.schedule_sort), + "gpu": (topi.cuda.sort, topi.cuda.schedule_sort), +} + _argsort_implement = { "generic": (topi.argsort, topi.generic.schedule_argsort), "gpu": (topi.cuda.argsort, topi.cuda.schedule_argsort), @@ -34,6 +39,46 @@ } +def verify_sort(axis, is_ascend): + dshape = (20, 100) + data_dtype = "float32" + data = te.placeholder(dshape, name="data", dtype=data_dtype) + + perm = np.arange(dshape[0] * dshape[1], dtype=data_dtype) + np.random.shuffle(perm) + np_data = perm.reshape(dshape) + + if is_ascend: + np_sort = np.sort(np_data, axis=axis) + else: + np_sort = -np.sort(-np_data, axis=axis) + + if axis == 0: + np_sort = np_sort[: dshape[axis], :] + else: + np_sort = np_sort[:, : dshape[axis]] + + def check_device(device): + if not tvm.testing.device_enabled(device): + print("Skip because %s is not enabled" % device) + return + ctx = tvm.context(device, 0) + print("Running on target: %s" % device) + with tvm.target.Target(device): + fcompute, fschedule = tvm.topi.testing.dispatch(device, _sort_implement) + out = fcompute(data, axis=axis, is_ascend=is_ascend) + s = fschedule(out) + + tvm_data = tvm.nd.array(np_data, ctx) + tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), ctx) + f = tvm.build(s, [data, out], device) + f(tvm_data, tvm_out) + tvm.testing.assert_allclose(tvm_out.asnumpy(), np_sort, rtol=1e0) + + for device in ["llvm", "cuda", "opencl"]: + check_device(device) + + def verify_argsort(axis, is_ascend): dshape = (20, 100) data_dtype = "float32" @@ -126,6 +171,14 @@ def check_device(device): check_device(device) +@tvm.testing.uses_gpu +def test_sort(): + np.random.seed(0) + for axis in [0, -1, 1]: + verify_sort(axis, True) + verify_sort(axis, False) + + @tvm.testing.uses_gpu def test_argsort(): np.random.seed(0) From b20e67162e451e6bfb51c8ccd062d75c2a319a5e Mon Sep 17 00:00:00 2001 From: mbrookhart Date: Tue, 8 Dec 2020 14:12:28 -0700 Subject: [PATCH 2/6] fix lint --- python/tvm/relay/op/dyn/_algorithm.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/op/dyn/_algorithm.py b/python/tvm/relay/op/dyn/_algorithm.py index fa2678a459af..ba903e680bbd 100644 --- a/python/tvm/relay/op/dyn/_algorithm.py +++ b/python/tvm/relay/op/dyn/_algorithm.py @@ -20,7 +20,6 @@ from tvm.te.hybrid import script from tvm.runtime import convert -from tvm import topi from .. import strategy from .. import op as _reg From d67cfdd931491c333422b7368c117702cb5774d2 Mon Sep 17 00:00:00 2001 From: mbrookhart Date: Wed, 16 Dec 2020 12:12:58 -0700 Subject: [PATCH 3/6] fix sort docstring --- python/tvm/topi/cuda/sort.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 47684b8f778e..b8873e0b546b 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -317,8 +317,8 @@ def argsort_nms_thrust(data, valid_count, axis=-1, is_ascend=1, dtype="float32") def sort(data, axis=-1, is_ascend=1): - """Performs sorting along the given axis and returns an array of indicies - having same shape as an input array that index data in sorted order. + """Performs sorting along the given axis and returns an array of + sorted values with teh same shape as the input data. Parameters ---------- From ea16f08de9e05d314028b32b23a437a20c166f40 Mon Sep 17 00:00:00 2001 From: mbrookhart Date: Wed, 16 Dec 2020 15:39:59 -0700 Subject: [PATCH 4/6] fix docs --- python/tvm/topi/cuda/sort.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index b8873e0b546b..79f069d4cc5c 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -318,7 +318,7 @@ def argsort_nms_thrust(data, valid_count, axis=-1, is_ascend=1, dtype="float32") def sort(data, axis=-1, is_ascend=1): """Performs sorting along the given axis and returns an array of - sorted values with teh same shape as the input data. + sorted values with the same shape as the input data. Parameters ---------- @@ -351,8 +351,8 @@ def sort(data, axis=-1, is_ascend=1): def sort_thrust(data, axis=-1, is_ascend=1): - """Performs sorting along the given axis and returns an array of indicies - having same shape as an input array that index data in sorted order. + """Performs sorting along the given axis and returns an array of + sorted values with the same shape as the input data. Parameters ---------- From 79cee17b36355cd76346b6584edfe072e636bed2 Mon Sep 17 00:00:00 2001 From: mbrookhart Date: Thu, 17 Dec 2020 09:10:32 -0700 Subject: [PATCH 5/6] add TODO, shape_func, cleanup --- python/tvm/relay/op/_algorithm.py | 3 +++ python/tvm/topi/cuda/sort.py | 3 +++ python/tvm/topi/sort.py | 2 +- tests/python/relay/dyn/test_dynamic_op_level6.py | 4 +--- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/python/tvm/relay/op/_algorithm.py b/python/tvm/relay/op/_algorithm.py index 3b15f0112e4e..0f9a6e64f1ed 100644 --- a/python/tvm/relay/op/_algorithm.py +++ b/python/tvm/relay/op/_algorithm.py @@ -23,16 +23,19 @@ from . import strategy from . import op as _reg +from _tensor import elemwise_shape_func from .op import OpPattern, register_pattern from .op import register_strategy # sort register_strategy("sort", strategy.sort_strategy) register_pattern("sort", OpPattern.OPAQUE) +register_shape_func("sort", False, elemwise_shape_func) # argsort register_strategy("argsort", strategy.argsort_strategy) register_pattern("argsort", OpPattern.OPAQUE) +register_shape_func("argsort", False, elemwise_shape_func) # topk register_strategy("topk", strategy.topk_strategy) diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index 79f069d4cc5c..4162f53206a5 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -385,6 +385,9 @@ def sort_thrust(data, axis=-1, is_ascend=1): out = te.extern( [data.shape, data.shape], [data], + ## TODO(mbrookhart): This thrust function is actually doing argsort, not sort + ## For performance, we should probably rename the contrib function and add + ## a pure sort lambda ins, outs: tvm.tir.call_packed( "tvm.contrib.thrust.sort", ins[0], outs[0], outs[1], is_ascend ), diff --git a/python/tvm/topi/sort.py b/python/tvm/topi/sort.py index 576d0424634f..8964e363b06f 100644 --- a/python/tvm/topi/sort.py +++ b/python/tvm/topi/sort.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# pylint: disable=too-many-arguments, unused-argument +# pylint: disable=too-many-arguments """Argsort operator""" import tvm from tvm import te diff --git a/tests/python/relay/dyn/test_dynamic_op_level6.py b/tests/python/relay/dyn/test_dynamic_op_level6.py index ec6bd64d33e2..52abbe2a15b6 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level6.py +++ b/tests/python/relay/dyn/test_dynamic_op_level6.py @@ -53,9 +53,7 @@ def verify_topk(k, axis, ret_type, is_ascend, dtype): np_indices = np_indices.astype(dtype) for target, ctx in tvm.testing.enabled_targets(): - # only test with vm, graph runtime doesn't support dynamic shapes - # and debug runtime doesn't support legalization for cuda kernel - for kind in ["vm"]: + for kind in ["vm", "debug"]: mod = tvm.ir.IRModule.from_expr(func) intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target) op_res = intrp.evaluate()(np_data, np.array([k]).astype("float32")) From 2202b8477a4186622b76b45932cffac19c1ad32c Mon Sep 17 00:00:00 2001 From: mbrookhart Date: Thu, 17 Dec 2020 09:26:50 -0700 Subject: [PATCH 6/6] add dynamic tests for sort and argsort --- python/tvm/relay/op/_algorithm.py | 4 +-- tests/python/relay/test_op_level6.py | 52 +++++++++++++++++++--------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/op/_algorithm.py b/python/tvm/relay/op/_algorithm.py index 0f9a6e64f1ed..817f96b696df 100644 --- a/python/tvm/relay/op/_algorithm.py +++ b/python/tvm/relay/op/_algorithm.py @@ -23,9 +23,9 @@ from . import strategy from . import op as _reg -from _tensor import elemwise_shape_func from .op import OpPattern, register_pattern -from .op import register_strategy +from .op import register_strategy, register_shape_func +from ._tensor import elemwise_shape_func # sort register_strategy("sort", strategy.sort_strategy) diff --git a/tests/python/relay/test_op_level6.py b/tests/python/relay/test_op_level6.py index 6e687658c60c..a5ce1fdcf589 100644 --- a/tests/python/relay/test_op_level6.py +++ b/tests/python/relay/test_op_level6.py @@ -25,8 +25,11 @@ @tvm.testing.uses_gpu def test_sort(): - def verify_sort(shape, axis, is_ascend): - x = relay.var("x", relay.TensorType(shape, "float32")) + def verify_sort(shape, axis, is_ascend, is_dyn=False): + if is_dyn: + x = relay.var("x", relay.TensorType([relay.Any()] * len(shape), "float32")) + else: + x = relay.var("x", relay.TensorType(shape, "float32")) z = relay.sort(x, axis=axis, is_ascend=is_ascend) func = relay.Function([x], z) x_data = np.random.uniform(size=shape).astype("float32") @@ -35,21 +38,30 @@ def verify_sort(shape, axis, is_ascend): else: ref_res = -np.sort(-x_data, axis=axis) + if is_dyn: + backends = ["vm", "debug"] + else: + backends = ["graph", "debug"] for target, ctx in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - intrp = relay.create_executor(kind, ctx=ctx, target=target) - op_res = intrp.evaluate(func)(x_data) + for kind in backends: + mod = tvm.ir.IRModule.from_expr(func) + intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target) + op_res = intrp.evaluate()(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res, rtol=1e-5) - verify_sort((2, 3, 4), axis=0, is_ascend=False) - verify_sort((1, 4, 6), axis=1, is_ascend=True) - verify_sort((3, 5, 6), axis=-1, is_ascend=False) + for is_dyn in [False, True]: + verify_sort((2, 3, 4), axis=0, is_ascend=False, is_dyn=is_dyn) + verify_sort((1, 4, 6), axis=1, is_ascend=True, is_dyn=is_dyn) + verify_sort((3, 5, 6), axis=-1, is_ascend=False, is_dyn=is_dyn) @tvm.testing.uses_gpu def test_argsort(): - def verify_argsort(shape, axis, is_ascend, dtype): - x = relay.var("x", relay.TensorType(shape, "float32")) + def verify_argsort(shape, axis, is_ascend, dtype, is_dyn=False): + if is_dyn: + x = relay.var("x", relay.TensorType([relay.Any()] * len(shape), "float32")) + else: + x = relay.var("x", relay.TensorType(shape, "float32")) z = relay.argsort(x, axis=axis, is_ascend=is_ascend, dtype=dtype) func = relay.Function([x], z) x_data = np.random.uniform(size=shape).astype("float32") @@ -58,16 +70,22 @@ def verify_argsort(shape, axis, is_ascend, dtype): else: ref_res = np.argsort(-x_data, axis=axis) + if is_dyn: + backends = ["vm", "debug"] + else: + backends = ["graph", "debug"] for target, ctx in tvm.testing.enabled_targets(): - for kind in ["graph", "debug"]: - intrp = relay.create_executor(kind, ctx=ctx, target=target) - op_res = intrp.evaluate(func)(x_data) + for kind in backends: + mod = tvm.ir.IRModule.from_expr(func) + intrp = relay.create_executor(kind, mod=mod, ctx=ctx, target=target) + op_res = intrp.evaluate()(x_data) tvm.testing.assert_allclose(op_res.asnumpy(), ref_res.astype(dtype), rtol=1e-5) - for dtype in ["int32", "int64", "float32", "float64"]: - verify_argsort((2, 3, 4), axis=0, is_ascend=False, dtype=dtype) - verify_argsort((1, 4, 6), axis=1, is_ascend=True, dtype=dtype) - verify_argsort((3, 5, 6), axis=-1, is_ascend=False, dtype=dtype) + for is_dyn in [False, True]: + for dtype in ["int32", "int64", "float32", "float64"]: + verify_argsort((2, 3, 4), axis=0, is_ascend=False, dtype=dtype, is_dyn=is_dyn) + verify_argsort((1, 4, 6), axis=1, is_ascend=True, dtype=dtype, is_dyn=is_dyn) + verify_argsort((3, 5, 6), axis=-1, is_ascend=False, dtype=dtype, is_dyn=is_dyn) @tvm.testing.uses_gpu