From ee2363bf8131830cf0fb112890befd6be6a03f36 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 29 Jan 2021 11:44:02 +0900 Subject: [PATCH] enable extern lib offload for nvptx --- python/tvm/relay/op/strategy/cuda.py | 8 ++++---- python/tvm/topi/cuda/nms.py | 2 +- python/tvm/topi/cuda/scan.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 346e93445f1c..b8305f858e76 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -263,7 +263,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target): else: raise RuntimeError("Unsupported conv2d layout {} for CUDA".format(layout)) # add cudnn implementation - if target.kind.name == "cuda" and "cudnn" in target.libs: + if target.kind.name in ["cuda", "nvptx"] and "cudnn" in target.libs: if layout in ["NCHW", "NHWC"] and padding[0] == padding[2] and padding[1] == padding[3]: strategy.add_implementation( wrap_compute_conv2d( @@ -705,7 +705,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target): name="dense_tensorcore.cuda", plevel=20, ) - if target.kind.name == "cuda" and "cublas" in target.libs: + if target.kind.name in ["cuda", "nvptx"] and "cublas" in target.libs: strategy.add_implementation( wrap_compute_dense(topi.cuda.dense_cublas), wrap_topi_schedule(topi.cuda.schedule_dense_cublas), @@ -858,7 +858,7 @@ def argsort_strategy_cuda(attrs, inputs, out_type, target): wrap_topi_schedule(topi.cuda.schedule_argsort), name="argsort.cuda", ) - if target.kind.name == "cuda" and get_global_func( + if target.kind.name in ["cuda", "nvptx"] and get_global_func( "tvm.contrib.thrust.sort", allow_missing=True ): strategy.add_implementation( @@ -879,7 +879,7 @@ def topk_strategy_cuda(attrs, inputs, out_type, target): wrap_topi_schedule(topi.cuda.schedule_topk), name="topk.cuda", ) - if target.kind.name == "cuda" and get_global_func( + if target.kind.name in ["cuda", "nvptx"] and get_global_func( "tvm.contrib.thrust.sort", allow_missing=True ): strategy.add_implementation( diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 2d6e1e464ef8..3b528dd6dded 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -610,7 +610,7 @@ def _get_sorted_indices(data, data_buf, score_index, score_shape): ) target = tvm.target.Target.current() - if target and target.kind.name == "cuda" and is_thrust_available(): + if target and target.kind.name in ["cuda", "nvptx"] and is_thrust_available(): sort_tensor = argsort_thrust(score_tensor, axis=1, is_ascend=False, dtype="int32") else: sort_tensor = argsort(score_tensor, axis=1, is_ascend=False, dtype="int32") diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py index 232d679840fd..7eed9cc0af3c 100644 --- a/python/tvm/topi/cuda/scan.py +++ b/python/tvm/topi/cuda/scan.py @@ -352,7 +352,7 @@ def exclusive_scan( def do_scan(data, output_dtype): target = tvm.target.Target.current() - if target and target.kind.name == "cuda" and is_thrust_available(): + if target and target.kind.name in ["cuda", "nvptx"] and is_thrust_available(): return scan_thrust( data, output_dtype, exclusive=True, return_reduction=return_reduction, binop=binop )