diff --git a/mops/CMakeLists.txt b/mops/CMakeLists.txt index f6cdae52..846eb477 100644 --- a/mops/CMakeLists.txt +++ b/mops/CMakeLists.txt @@ -30,18 +30,19 @@ project(mops VERSION ${MOPS_VERSION} LANGUAGES CXX) include(CheckLanguage) check_language(CUDA) -if(CMAKE_CUDA_COMPILER) +option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" OFF) + +OPTION(MOPS_OPENMP "Try to use OpenMP when compiling MOPs" ON) +OPTION(MOPS_ARCH_NATIVE "Try to use -march=native when compiling MOPs" ON) +option(MOPS_CUDA "compile mops with CUDA support" ON) + +if(CMAKE_CUDA_COMPILER AND MOPS_CUDA) enable_language(CUDA) set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE BOOL "" FORCE) else() message(STATUS "Could not find a CUDA compiler") endif() -option(BUILD_SHARED_LIBS "Build shared libraries instead of static ones" OFF) - -OPTION(MOPS_OPENMP "Try to use OpenMP when compiling MOPs" ON) -OPTION(MOPS_ARCH_NATIVE "Try to use -march=native when compiling MOPs" ON) - set(LIB_INSTALL_DIR "lib" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install libraries") set(BIN_INSTALL_DIR "bin" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install DLL/binaries") set(INCLUDE_INSTALL_DIR "include" CACHE PATH "Path relative to CMAKE_INSTALL_PREFIX where to install headers") @@ -107,7 +108,7 @@ add_library(mops "include/mops/sasaw.h" ) -if(CMAKE_CUDA_COMPILER) +if(CMAKE_CUDA_COMPILER AND MOPS_CUDA) target_compile_definitions(mops PUBLIC MOPS_CUDA_ENABLED) set_target_properties(mops PROPERTIES CUDA_ARCHITECTURES native) set_target_properties(mops PROPERTIES CUDA_NVCC_FLAGS "-lineinfo") @@ -135,7 +136,7 @@ set_target_properties(mops PROPERTIES CXX_VISIBILITY_PRESET hidden ) -if (CMAKE_CUDA_COMPILER) +if(CMAKE_CUDA_COMPILER AND MOPS_CUDA) set_target_properties(mops PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(mops PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON) endif() diff --git a/mops/benchmarks/CMakeLists.txt b/mops/benchmarks/CMakeLists.txt index 61f38443..634e1102 100644 --- a/mops/benchmarks/CMakeLists.txt +++ b/mops/benchmarks/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.16) -foreach(tgt hpe) +foreach(tgt hpe opsa opsaw sap sasaw) add_executable(${tgt} ${tgt}.cpp utils.hpp) target_link_libraries(${tgt} PRIVATE mops) endforeach() diff --git a/mops/examples/CMakeLists.txt b/mops/examples/CMakeLists.txt index 6b0adbb1..ddddae69 100644 --- a/mops/examples/CMakeLists.txt +++ b/mops/examples/CMakeLists.txt @@ -4,7 +4,7 @@ add_executable(example_cpp example.cpp) target_link_libraries(example_cpp mops) add_test(NAME example_cpp COMMAND ./example_cpp) -if (CMAKE_CUDA_COMPILER) +if (CMAKE_CUDA_COMPILER AND MOPS_CUDA) add_executable(example_cuda example.cu) set_target_properties(example_cuda PROPERTIES CUDA_ARCHITECTURES native) target_link_libraries(example_cuda mops) diff --git a/mops/src/sasaw/sasaw.cpp b/mops/src/sasaw/sasaw.cpp index de46b93d..9bc4a686 100644 --- a/mops/src/sasaw/sasaw.cpp +++ b/mops/src/sasaw/sasaw.cpp @@ -115,8 +115,25 @@ void mops::cuda:: } template -void mops::cuda:: - sparse_accumulation_scatter_add_with_weights_vjp_vjp(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor) { +void mops::cuda::sparse_accumulation_scatter_add_with_weights_vjp_vjp( + Tensor /*grad_grad_output*/, + Tensor /*grad_A_2*/, + Tensor /*grad_B_2*/, + Tensor /*grad_W_2*/, + Tensor /*grad_grad_A*/, + Tensor /*grad_grad_B*/, + Tensor /*grad_grad_W*/, + Tensor /*grad_output*/, + Tensor /*A*/, + Tensor /*B*/, + Tensor /*C*/, + Tensor /*W*/, + Tensor /*indices_A*/, + Tensor /*indices_W_1*/, + Tensor /*indices_W_2*/, + Tensor /*indices_output_1*/, + Tensor /*indices_output_2*/ +) { throw std::runtime_error("MOPS was not compiled with CUDA support"); }