From ee2363bf8131830cf0fb112890befd6be6a03f36 Mon Sep 17 00:00:00 2001
From: Masahiro Masuda <masahi129@gmail.com>
Date: Fri, 29 Jan 2021 11:44:02 +0900
Subject: [PATCH] enable extern lib offload for nvptx

---
 python/tvm/relay/op/strategy/cuda.py | 8 ++++----
 python/tvm/topi/cuda/nms.py          | 2 +-
 python/tvm/topi/cuda/scan.py         | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py
index 346e93445f1c..b8305f858e76 100644
--- a/python/tvm/relay/op/strategy/cuda.py
+++ b/python/tvm/relay/op/strategy/cuda.py
@@ -263,7 +263,7 @@ def conv2d_strategy_cuda(attrs, inputs, out_type, target):
         else:
             raise RuntimeError("Unsupported conv2d layout {} for CUDA".format(layout))
         # add cudnn implementation
-        if target.kind.name == "cuda" and "cudnn" in target.libs:
+        if target.kind.name in ["cuda", "nvptx"] and "cudnn" in target.libs:
             if layout in ["NCHW", "NHWC"] and padding[0] == padding[2] and padding[1] == padding[3]:
                 strategy.add_implementation(
                     wrap_compute_conv2d(
@@ -705,7 +705,7 @@ def dense_strategy_cuda(attrs, inputs, out_type, target):
                         name="dense_tensorcore.cuda",
                         plevel=20,
                     )
-    if target.kind.name == "cuda" and "cublas" in target.libs:
+    if target.kind.name in ["cuda", "nvptx"] and "cublas" in target.libs:
         strategy.add_implementation(
             wrap_compute_dense(topi.cuda.dense_cublas),
             wrap_topi_schedule(topi.cuda.schedule_dense_cublas),
@@ -858,7 +858,7 @@ def argsort_strategy_cuda(attrs, inputs, out_type, target):
         wrap_topi_schedule(topi.cuda.schedule_argsort),
         name="argsort.cuda",
     )
-    if target.kind.name == "cuda" and get_global_func(
+    if target.kind.name in ["cuda", "nvptx"]  and get_global_func(
         "tvm.contrib.thrust.sort", allow_missing=True
     ):
         strategy.add_implementation(
@@ -879,7 +879,7 @@ def topk_strategy_cuda(attrs, inputs, out_type, target):
         wrap_topi_schedule(topi.cuda.schedule_topk),
         name="topk.cuda",
     )
-    if target.kind.name == "cuda" and get_global_func(
+    if target.kind.name in ["cuda", "nvptx"] and get_global_func(
         "tvm.contrib.thrust.sort", allow_missing=True
     ):
         strategy.add_implementation(
diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py
index 2d6e1e464ef8..3b528dd6dded 100644
--- a/python/tvm/topi/cuda/nms.py
+++ b/python/tvm/topi/cuda/nms.py
@@ -610,7 +610,7 @@ def _get_sorted_indices(data, data_buf, score_index, score_shape):
     )
 
     target = tvm.target.Target.current()
-    if target and target.kind.name == "cuda" and is_thrust_available():
+    if target and target.kind.name in ["cuda", "nvptx"] and is_thrust_available():
         sort_tensor = argsort_thrust(score_tensor, axis=1, is_ascend=False, dtype="int32")
     else:
         sort_tensor = argsort(score_tensor, axis=1, is_ascend=False, dtype="int32")
diff --git a/python/tvm/topi/cuda/scan.py b/python/tvm/topi/cuda/scan.py
index 232d679840fd..7eed9cc0af3c 100644
--- a/python/tvm/topi/cuda/scan.py
+++ b/python/tvm/topi/cuda/scan.py
@@ -352,7 +352,7 @@ def exclusive_scan(
 
     def do_scan(data, output_dtype):
         target = tvm.target.Target.current()
-        if target and target.kind.name == "cuda" and is_thrust_available():
+        if target and target.kind.name in ["cuda", "nvptx"] and is_thrust_available():
             return scan_thrust(
                 data, output_dtype, exclusive=True, return_reduction=return_reduction, binop=binop
             )