[CI] Don't include ATen/cuda/CUDAContext.h to avoid cusparse.h

Dao-AILab · Dec 6, 2024 · 1f502d4 · 1f502d4
1 parent 5d39d51
commit 1f502d4
Show file tree

Hide file tree

Showing 3 changed files with 5 additions and 9 deletions.
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -109,7 +109,6 @@ jobs:
           # We need the cuda libraries (e.g. cuSparse, cuSolver) for compiling PyTorch extensions,
           # not just nvcc
           sub-packages: '["nvcc"]'
-          non-cuda-sub-packages: '["libcublas", "libcusparse"]'
 
       - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }}
         run: |

diff --git a/causal_conv1d/__init__.py b/causal_conv1d/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "1.5.0.post3"
+__version__ = "1.5.0.post4"
 
 from causal_conv1d.causal_conv1d_interface import causal_conv1d_fn, causal_conv1d_update
diff --git a/csrc/causal_conv1d.cpp b/csrc/causal_conv1d.cpp
@@ -2,8 +2,8 @@
  * Copyright (c) 2024, Tri Dao.
  ******************************************************************************/
 
-#include <ATen/cuda/CUDAContext.h>
 #include <c10/cuda/CUDAGuard.h>
+#include <c10/cuda/CUDAStream.h>
 #include <torch/python.h>
 #include <vector>
 
@@ -221,8 +221,7 @@ causal_conv1d_fwd(const at::Tensor &x, const at::Tensor &weight,
     }
 
     // Otherwise the kernel will be launched from cuda:0 device
-    // Cast to char to avoid compiler warning about narrowing
-    at::cuda::CUDAGuard device_guard{(char)x.get_device()};
+    at::cuda::CUDAGuard device_guard{x.device()};
     auto stream = at::cuda::getCurrentCUDAStream().stream();
     DISPATCH_ITYPE_FLOAT_AND_HALF_AND_BF16(x.scalar_type(), "causal_conv1d_fwd", [&] {
         DISPATCH_WTYPE_FLOAT_AND_HALF_AND_BF16(weight.scalar_type(), "causal_conv1d_fwd", [&] {
@@ -308,8 +307,7 @@ causal_conv1d_bwd(const at::Tensor &x, const at::Tensor &weight,
     }
 
     // Otherwise the kernel will be launched from cuda:0 device
-    // Cast to char to avoid compiler warning about narrowing
-    at::cuda::CUDAGuard device_guard{(char)x.get_device()};
+    at::cuda::CUDAGuard device_guard{x.device()};
 
     at::Tensor dweight = torch::zeros_like(weight, weight.options().dtype(at::kFloat));
     at::Tensor dbias;
@@ -462,8 +460,7 @@ causal_conv1d_update(const at::Tensor &x,
     }
 
     // Otherwise the kernel will be launched from cuda:0 device
-    // Cast to char to avoid compiler warning about narrowing
-    at::cuda::CUDAGuard device_guard{(char)x.get_device()};
+    at::cuda::CUDAGuard device_guard{x.device()};
     auto stream = at::cuda::getCurrentCUDAStream().stream();
     DISPATCH_ITYPE_FLOAT_AND_HALF_AND_BF16(x.scalar_type(), "causal_conv1d_update", [&] {
         DISPATCH_WTYPE_FLOAT_AND_HALF_AND_BF16(weight.scalar_type(), "causal_conv1d_update", [&] {