diff --git a/csrc/quantization/squeezellm/quant_cuda_kernel.cu b/csrc/quantization/squeezellm/quant_cuda_kernel.cu index 40baac6108695..714907428a1ab 100644 --- a/csrc/quantization/squeezellm/quant_cuda_kernel.cu +++ b/csrc/quantization/squeezellm/quant_cuda_kernel.cu @@ -197,13 +197,13 @@ void squeezellm_gemm(torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); vllm::squeezellm::NUQ4MatMulKernel<<>>( #ifndef USE_ROCM - (half2*)vec.data(), + (half2*)vec.data_ptr(), #else (__half2*)vec.data_ptr(), #endif mat.data_ptr(), #ifndef USE_ROCM - (half2*)mul.data(), (__half*)lookup_table.data(), + (half2*)mul.data(), (__half*)lookup_table.data_ptr(), #else (float2*)mul.data_ptr(), (__half*)lookup_table.data_ptr(),