Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Find CUDA libraries with NVHPC package #1194

Merged
merged 3 commits into from
Nov 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ if(MINGW OR CYGWIN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wa,-mbig-obj")
endif()

# For now, PGI/NVHPC nvc++ compiler doesn't seem to support
# `#pragma omp declare reduction`
if (${CMAKE_CXX_COMPILER_ID} MATCHES "PGI|NVHPC")
message(STATUS "OpenMP: Switching to OFF because PGI/NVHPC nvc++ compiler lacks important features.")
set(GINKGO_BUILD_OMP OFF)
endif()

set(GINKGO_CIRCULAR_DEPS_FLAGS "-Wl,--no-undefined")

# Use ccache as compilation launcher
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,12 @@ For Ginkgo core library:
* _clang 3.9+_
* _Intel compiler 2018+_
* _Apple LLVM 8.0+_
* _Cray Compiler 14.0.1+_
* _NVHPC Compiler 22.7+_

The Ginkgo CUDA module has the following __additional__ requirements:

* _CUDA 9.2+_
* _CUDA 9.2+_ or _NVHPC Package 22.7+_
* Any host compiler restrictions your version of CUDA may impose also apply
here. For the newest CUDA version, this information can be found in the
[CUDA installation guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)
Expand Down
56 changes: 36 additions & 20 deletions cmake/cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,44 @@ cas_variable_cuda_architectures(GINKGO_CUDA_ARCH_FLAGS
ARCHITECTURES ${GINKGO_CUDA_ARCHITECTURES}
UNSUPPORTED "20" "21")

if (${CMAKE_CXX_COMPILER_ID} MATCHES "PGI|NVHPC")
find_package(NVHPC REQUIRED
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this need to be required in this case, or we silently disable CUDA if not found?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about PGI.
For NVHPC, I will assume it is always shipped with the cuda toolkit

Copy link
Member Author

@tcojean tcojean Nov 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the problem is the nvc++ compiler is identified as PGI at least on Perlmutter...

Also, if the user sets -DGINKGO_BUILD_CUDA=OFF it should still succeed even with a PGI compiler which doesn't include CUDA, as this would not be evaluated.

HINTS
$ENV{NVIDIA_PATH}
${CMAKE_CUDA_COMPILER}/../../..
)

set(CUDA_RUNTIME_LIBS_DYNAMIC ${NVHPC_CUDART_LIBRARY})
set(CUDA_RUNTIME_LIBS_STATIC ${NVHPC_CUDART_LIBRARY_STATIC})
set(CUBLAS ${NVHPC_CUBLAS_LIBRARY})
set(CUSPARSE ${NVHPC_CUSPARSE_LIBRARY})
set(CURAND ${NVHPC_CURAND_LIBRARY})
set(CUFFT ${NVHPC_CUFFT_LIBRARY})
else()
find_library(CUDA_RUNTIME_LIBS_DYNAMIC cudart
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUDA_RUNTIME_LIBS_STATIC cudart_static
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})

# CUDA 10.1/10.2 put cublas, cublasLt, cudnn in /usr/lib/<arch>-linux-gnu/, but
# others (<= 10.0 or >= 11) put them in cuda own directory
# If the environment installs several cuda including 10.1/10.2, cmake will find
# the 10.1/10.2 .so files when searching others cuda in the default path.
# CMake already puts /usr/lib/<arch>-linux-gnu/ after cuda own directory in the
# `CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES`, so we always put NO_DEFAULT_PATH here.
find_library(CUBLAS cublas
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} NO_DEFAULT_PATH)
find_library(CUSPARSE cusparse
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CURAND curand
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUFFT cufft
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
endif()

# MSVC nvcc uses static cudartlibrary by default, and other platforms use shared cudartlibrary.
# add `-cudart shared` or `-cudart=shared` according system into CMAKE_CUDA_FLAGS
# to force nvcc to use dynamic cudart library in MSVC.
find_library(CUDA_RUNTIME_LIBS_DYNAMIC cudart
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUDA_RUNTIME_LIBS_STATIC cudart_static
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
if(MSVC)
if("${CMAKE_CUDA_FLAGS}" MATCHES "-cudart(=| )shared")
set(CUDA_RUNTIME_LIBS "${CUDA_RUNTIME_LIBS_DYNAMIC}" CACHE STRING "Path to a library" FORCE)
Expand All @@ -38,21 +69,6 @@ else()
set(CUDA_RUNTIME_LIBS "${CUDA_RUNTIME_LIBS_DYNAMIC}" CACHE STRING "Path to a library" FORCE)
endif()

# CUDA 10.1/10.2 put cublas, cublasLt, cudnn in /usr/lib/<arch>-linux-gnu/, but
# others (<= 10.0 or >= 11) put them in cuda own directory
# If the environment installs several cuda including 10.1/10.2, cmake will find
# the 10.1/10.2 .so files when searching others cuda in the default path.
# CMake already puts /usr/lib/<arch>-linux-gnu/ after cuda own directory in the
# `CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES`, so we always put NO_DEFAULT_PATH here.
find_library(CUBLAS cublas
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} NO_DEFAULT_PATH)
find_library(CUSPARSE cusparse
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CURAND curand
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})
find_library(CUFFT cufft
HINT ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES})

if (NOT CMAKE_CUDA_HOST_COMPILER AND NOT GINKGO_CUDA_DEFAULT_HOST_COMPILER)
set(CMAKE_CUDA_HOST_COMPILER "${CMAKE_CXX_COMPILER}" CACHE STRING "" FORCE)
elseif(GINKGO_CUDA_DEFAULT_HOST_COMPILER)
Expand All @@ -75,4 +91,4 @@ if (CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION
message(FATAL_ERROR "There is a bug between nvcc 9.2 and clang 5.0 which create a compiling issue."
"Consider using a different CUDA host compiler or CUDA version.")
endif()
endif()
endif()
2 changes: 0 additions & 2 deletions include/ginkgo/core/stop/combined.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,6 @@ std::shared_ptr<const CriterionFactory> combine(FactoryContainer&& factories)
switch (factories.size()) {
case 0:
GKO_NOT_SUPPORTED(nullptr);
return nullptr;
case 1:
if (factories[0] == nullptr) {
GKO_NOT_SUPPORTED(nullptr);
Expand All @@ -135,7 +134,6 @@ std::shared_ptr<const CriterionFactory> combine(FactoryContainer&& factories)
if (factories[0] == nullptr) {
// first factory must be valid to capture executor
GKO_NOT_SUPPORTED(nullptr);
return nullptr;
} else {
auto exec = factories[0]->get_executor();
return Combined::build()
Expand Down
1 change: 1 addition & 0 deletions omp/distributed/partition_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ void build_starting_indices(std::shared_ptr<const DefaultExecutor> exec,
}
#pragma omp barrier
// exclusive prefix sum over local sizes
// FIXME: PGI/NVHPC(22.7) doesn't like reduction with references
#pragma omp for reduction(+ : num_empty_parts)
for (comm_index_type part = 0; part < num_parts; ++part) {
LocalIndexType size{};
Expand Down