Skip to content

Commit

Permalink
Need to debug CPU-only failure on CI from CUDA laptop, so add MOPS_CU…
Browse files Browse the repository at this point in the history
…DA option
  • Loading branch information
frostedoyster committed Apr 14, 2024
1 parent 36de392 commit a346353
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 12 deletions.
17 changes: 9 additions & 8 deletions mops/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,19 @@ project(mops VERSION ${MOPS_VERSION} LANGUAGES CXX)
include(CheckLanguage)
check_language(CUDA)

if(CMAKE_CUDA_COMPILER)
option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" OFF)

OPTION(MOPS_OPENMP "Try to use OpenMP when compiling MOPs" ON)
OPTION(MOPS_ARCH_NATIVE "Try to use -march=native when compiling MOPs" ON)
option(MOPS_CUDA "compile mops with CUDA support" ON)

if(CMAKE_CUDA_COMPILER AND MOPS_CUDA)
enable_language(CUDA)
set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "" FORCE)
else()
message(STATUS "Could not find a CUDA compiler")
endif()

option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" OFF)

OPTION(MOPS_OPENMP "Try to use OpenMP when compiling MOPs" ON)
OPTION(MOPS_ARCH_NATIVE "Try to use -march=native when compiling MOPs" ON)

set(LIB_INSTALL_DIR "lib" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install libraries")
set(BIN_INSTALL_DIR "bin" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install DLL/binaries")
set(INCLUDE_INSTALL_DIR "include" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install headers")
Expand Down Expand Up @@ -107,7 +108,7 @@ add_library(mops
"include/mops/sasaw.h"
)

if(CMAKE_CUDA_COMPILER)
if(CMAKE_CUDA_COMPILER AND MOPS_CUDA)
target_compile_definitions(mops PUBLIC MOPS_CUDA_ENABLED)
set_target_properties(mops PROPERTIES CUDA_ARCHITECTURES native)
set_target_properties(mops PROPERTIES CUDA_NVCC_FLAGS "-lineinfo")
Expand Down Expand Up @@ -135,7 +136,7 @@ set_target_properties(mops PROPERTIES
CXX_VISIBILITY_PRESET hidden
)

if (CMAKE_CUDA_COMPILER)
if(CMAKE_CUDA_COMPILER AND MOPS_CUDA)
set_target_properties(mops PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(mops PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
endif()
Expand Down
2 changes: 1 addition & 1 deletion mops/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.16)

foreach(tgt hpe)
foreach(tgt hpe opsa opsaw sap sasaw)
add_executable(${tgt} ${tgt}.cpp utils.hpp)
target_link_libraries(${tgt} PRIVATE mops)
endforeach()
2 changes: 1 addition & 1 deletion mops/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ add_executable(example_cpp example.cpp)
target_link_libraries(example_cpp mops)
add_test(NAME example_cpp COMMAND ./example_cpp)

if (CMAKE_CUDA_COMPILER)
if (CMAKE_CUDA_COMPILER AND MOPS_CUDA)
add_executable(example_cuda example.cu)
set_target_properties(example_cuda PROPERTIES CUDA_ARCHITECTURES native)
target_link_libraries(example_cuda mops)
Expand Down
21 changes: 19 additions & 2 deletions mops/src/sasaw/sasaw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,25 @@ void mops::cuda::
}

template <typename scalar_t>
void mops::cuda::
sparse_accumulation_scatter_add_with_weights_vjp_vjp(Tensor<scalar_t, 3>, Tensor<scalar_t, 2>, Tensor<scalar_t, 2>, Tensor<scalar_t, 3>, Tensor<scalar_t, 2>, Tensor<scalar_t, 2>, Tensor<scalar_t, 3>, Tensor<scalar_t, 2>, Tensor<scalar_t, 2>, Tensor<scalar_t, 3>, Tensor<scalar_t, 3>, Tensor<scalar_t, 2>, Tensor<scalar_t, 2>, Tensor<scalar_t, 1>, Tensor<scalar_t, 3>, Tensor<int, 1>, Tensor<int, 1>, Tensor<int, 1>, Tensor<int, 1>, Tensor<int, 1>) {
void mops::cuda::sparse_accumulation_scatter_add_with_weights_vjp_vjp(
Tensor<scalar_t, 3> /*grad_grad_output*/,
Tensor<scalar_t, 2> /*grad_A_2*/,
Tensor<scalar_t, 2> /*grad_B_2*/,
Tensor<scalar_t, 3> /*grad_W_2*/,
Tensor<scalar_t, 2> /*grad_grad_A*/,
Tensor<scalar_t, 2> /*grad_grad_B*/,
Tensor<scalar_t, 3> /*grad_grad_W*/,
Tensor<scalar_t, 3> /*grad_output*/,
Tensor<scalar_t, 2> /*A*/,
Tensor<scalar_t, 2> /*B*/,
Tensor<scalar_t, 1> /*C*/,
Tensor<scalar_t, 3> /*W*/,
Tensor<int32_t, 1> /*indices_A*/,
Tensor<int32_t, 1> /*indices_W_1*/,
Tensor<int32_t, 1> /*indices_W_2*/,
Tensor<int32_t, 1> /*indices_output_1*/,
Tensor<int32_t, 1> /*indices_output_2*/
) {
throw std::runtime_error("MOPS was not compiled with CUDA support");
}

Expand Down

0 comments on commit a346353

Please sign in to comment.