From ed13bd8918b531817b350c9a5975aa6ecebeba0a Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Fri, 24 Sep 2021 10:47:26 -0400 Subject: [PATCH 01/16] Eliminate depricated find_package(CUDA) from qmcpack Replace it with first-class language support and find_package(CUDAToolkit) Signed-off-by: Steven Hahn --- CMakeLists.txt | 59 +++++++++++------------- src/AFQMC/CMakeLists.txt | 11 ++--- src/Particle/CMakeLists.txt | 2 +- src/Platforms/CUDA/CMakeLists.txt | 8 ++-- src/Platforms/CUDA_legacy/CMakeLists.txt | 6 +-- src/Platforms/tests/CUDA/CMakeLists.txt | 2 +- src/QMCHamiltonians/CMakeLists.txt | 2 +- src/QMCWaveFunctions/CMakeLists.txt | 4 +- src/einspline/CMakeLists.txt | 2 +- src/einspline/tests/CMakeLists.txt | 2 +- 10 files changed, 45 insertions(+), 53 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 075ebe69aa..8dab5013f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ ###################################################################### # CMake version and policies ###################################################################### -cmake_minimum_required(VERSION 3.15.0) +cmake_minimum_required(VERSION 3.17.0) # CMP0074: CMake find_package will use _ROOT CMake variable # and environment variable in search path. @@ -14,6 +14,7 @@ cmake_policy(SET CMP0075 NEW) ###################################################################### # QMCPACK project ###################################################################### + project( qmcpack VERSION 3.11.9 @@ -41,6 +42,17 @@ if(NOT QMC_CXX_STANDARD EQUAL 17) "Using other versions of the C++ standard is unsupported and done entirely at user's own risk.") endif() +#-------------------------------------------------------------------- +# Set CUDA standard +#-------------------------------------------------------------------- +set(QMC_CUDA_STANDARD + 14 + CACHE STRING "QMCPACK CUDA C++ language standard") +if(NOT QMC_CUDA_STANDARD EQUAL 14) + message(WARNING "C++14 is the only CUDA language standard officially supported by this QMCPACK version. " + "Using other versions of the CUDA C++ standard is unsupported and done entirely at user's own risk.") +endif() + #-------------------------------------------------------------------- # Programmind model related build options # MPI, OpenMP, GPU acceleration @@ -260,6 +272,8 @@ endif() set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD}) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) +set(CMAKE_CUDA_STANDARD ${QMC_CUDA_STANDARD}) +set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) # Check that a C++ compiler is compatible with the underlying libstdc++ include(Testlibstdc++) @@ -662,38 +676,19 @@ if(QMC_CUDA OR ENABLE_CUDA) if(QMC_CUDA2HIP) message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM else(QMC_CUDA2HIP) - # FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF - # It happened -ffast-math from host caused numerical issue in CUDA kernels. - option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF) - find_package(CUDA REQUIRED) - set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) - #set(CUDA_NVCC_FLAGS - # "-arch=sm_20;-Drestrict=__restrict__;-DNO_CUDA_MAIN;-O3;-use_fast_math") - if(CUDA_NVCC_FLAGS MATCHES "arch") - # User defined NVCC flags - message(STATUS "Setting CUDA FLAGS=${CUDA_NVCC_FLAGS}") - else(CUDA_NVCC_FLAGS MATCHES "arch") - # Automatically set the default NVCC flags - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Drestrict=__restrict__;-DNO_CUDA_MAIN;-std=c++14") - if(QMC_COMPLEX) - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DQMC_COMPLEX=${QMC_COMPLEX}") - endif() - if(CMAKE_BUILD_TYPE STREQUAL "DEBUG") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-g;-G") - else() - # Temporarily disable fast_math because it causes multiple test failures - # SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3;-use_fast_math") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3") - endif() - set(CUDA_ARCH - sm_70 - CACHE STRING "CUDA architecture sm_XX") - set(CUDA_NVCC_FLAGS "-arch=${CUDA_ARCH};${CUDA_NVCC_FLAGS}") - endif(CUDA_NVCC_FLAGS MATCHES "arch") - include_directories(${CUDA_INCLUDE_DIRS}) + enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70) + endif() + # Automatically set the default NVCC flags + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN") + if(QMC_COMPLEX) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}") + endif() set(HAVE_CUDA 1) - message(" CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}") - endif(QMC_CUDA2HIP) + message(" CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}") + endif() else(QMC_CUDA OR ENABLE_CUDA) if(QMC_CUDA2HIP) message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.") diff --git a/src/AFQMC/CMakeLists.txt b/src/AFQMC/CMakeLists.txt index 0b8564a7e1..22e9cf7b98 100644 --- a/src/AFQMC/CMakeLists.txt +++ b/src/AFQMC/CMakeLists.txt @@ -97,16 +97,11 @@ elseif(ENABLE_HIP) Numerics/detail/HIP/Kernels/inplace_product.hip.cpp Numerics/detail/HIP/Kernels/get_diagonal.hip.cpp) set(AFQMC_SRCS ${AFQMC_SRCS} Memory/HIP/hip_utilities.cpp Memory/HIP/hip_arch.cpp Memory/HIP/hip_init.cpp) -else(ENABLE_CUDA) - -endif(ENABLE_CUDA) +endif() if(ENABLE_CUDA) - cuda_add_library(afqmc ${AFQMC_SRCS}) - cuda_add_cublas_to_target(afqmc) - target_link_libraries(afqmc PRIVATE ${CUDA_cusparse_LIBRARY}) - target_link_libraries(afqmc PRIVATE ${CUDA_cusolver_LIBRARY}) - target_link_libraries(afqmc PRIVATE ${CUDA_curand_LIBRARY}) + add_library(afqmc ${AFQMC_SRCS}) + target_link_libraries(afqmc PRIVATE CUDA::curand CUDA::cusparse CUDA::cusolver CUDA::cublas) elseif(ENABLE_HIP) set_source_files_properties(${AFQMC_HIP_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) hip_add_library(afqmc_hip_lib ${AFQMC_HIP_SRCS}) diff --git a/src/Particle/CMakeLists.txt b/src/Particle/CMakeLists.txt index bd23e31d67..a3e996bb67 100644 --- a/src/Particle/CMakeLists.txt +++ b/src/Particle/CMakeLists.txt @@ -63,7 +63,7 @@ target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime) if(QMC_CUDA) if(NOT QMC_CUDA2HIP) - cuda_add_library(qmcparticle_cuda accept_kernel.cu) + add_library(qmcparticle_cuda accept_kernel.cu) else() hip_add_library(qmcparticle_cuda accept_kernel.cu) endif(NOT QMC_CUDA2HIP) diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt index 11a34fc793..343016fe3d 100644 --- a/src/Platforms/CUDA/CMakeLists.txt +++ b/src/Platforms/CUDA/CMakeLists.txt @@ -13,9 +13,11 @@ set(CUDA_RT_SRCS CUDAfill.cpp CUDAallocator.cpp CUDAruntime.cpp) set(CUDA_LA_SRCS cuBLAS_missing_functions.cu) if(NOT QMC_CUDA2HIP) - cuda_add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) - cuda_add_library(platform_cuda_LA ${CUDA_LA_SRCS}) - cuda_add_cublas_to_target(platform_cuda_LA) + add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) + add_library(platform_cuda_LA ${CUDA_LA_SRCS}) + target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas) + target_include_directories(platform_cuda_runtime PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_link_libraries(platform_cuda_runtime PRIVATE CUDA::cudart) else() hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS}) diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt index 1c2ecc96a3..7eb121c67a 100644 --- a/src/Platforms/CUDA_legacy/CMakeLists.txt +++ b/src/Platforms/CUDA_legacy/CMakeLists.txt @@ -12,9 +12,9 @@ set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp) if(NOT QMC_CUDA2HIP) - cuda_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) - cuda_add_cublas_to_target(platform_cuda_legacy) - target_link_libraries(platform_cuda_legacy PRIVATE ${CUDA_LIBRARIES}) + add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) + target_include_directories(platform_cuda_legacy PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) + target_link_libraries(platform_cuda_legacy PRIVATE CUDA::cublas) else() hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA) diff --git a/src/Platforms/tests/CUDA/CMakeLists.txt b/src/Platforms/tests/CUDA/CMakeLists.txt index 41cb8096be..82762f78ba 100644 --- a/src/Platforms/tests/CUDA/CMakeLists.txt +++ b/src/Platforms/tests/CUDA/CMakeLists.txt @@ -15,7 +15,7 @@ set(UTEST_EXE test_${SRC_DIR}) set(UTEST_NAME deterministic-unit_test_${SRC_DIR}) if(NOT QMC_CUDA2HIP) - cuda_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu) + add_library(cuda_device_value_test_kernels test_device_value_kernels.cu) else() hip_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu) endif() diff --git a/src/QMCHamiltonians/CMakeLists.txt b/src/QMCHamiltonians/CMakeLists.txt index f910b2f481..bf37f918fe 100644 --- a/src/QMCHamiltonians/CMakeLists.txt +++ b/src/QMCHamiltonians/CMakeLists.txt @@ -99,7 +99,7 @@ else() endif() if(QMC_CUDA) if(NOT QMC_CUDA2HIP) - cuda_add_library(qmcham_cuda ${HAMSRCS_CUDA}) + add_library(qmcham_cuda ${HAMSRCS_CUDA}) else() hip_add_library(qmcham_cuda ${HAMSRCS_CUDA}) endif(NOT QMC_CUDA2HIP) diff --git a/src/QMCWaveFunctions/CMakeLists.txt b/src/QMCWaveFunctions/CMakeLists.txt index 6c83e7edb5..ee111fbb50 100644 --- a/src/QMCWaveFunctions/CMakeLists.txt +++ b/src/QMCWaveFunctions/CMakeLists.txt @@ -187,7 +187,7 @@ endif(USE_OBJECT_TARGET) if(QMC_CUDA OR ENABLE_CUDA) if(NOT QMC_CUDA2HIP) - cuda_add_library(qmcwfs_cuda ${WFSSRCS_CUDA}) + add_library(qmcwfs_cuda ${WFSSRCS_CUDA}) else() hip_add_library(qmcwfs_cuda ${WFSSRCS_CUDA}) target_link_libraries(qmcwfs_cuda PUBLIC platform_LA) @@ -208,7 +208,7 @@ target_link_libraries(qmcwfs PRIVATE einspline platform_LA Math::FFTW3) if(ENABLE_CUDA) set(DIRECT_INVERSION_SRCS detail/CUDA/cuBLAS_LU.cu) if(NOT QMC_CUDA2HIP) - cuda_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}") + add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}") else() hip_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}") endif() diff --git a/src/einspline/CMakeLists.txt b/src/einspline/CMakeLists.txt index e85e6955c5..34ca21049e 100644 --- a/src/einspline/CMakeLists.txt +++ b/src/einspline/CMakeLists.txt @@ -37,7 +37,7 @@ set(SRCS if(QMC_CUDA) set(SRCS ${SRCS} multi_bspline_create_cuda.cu bspline_create_cuda.cu) if(NOT QMC_CUDA2HIP) - cuda_add_library(einspline ${SRCS}) + add_library(einspline ${SRCS}) else() hip_add_library(einspline ${SRCS}) endif(NOT QMC_CUDA2HIP) diff --git a/src/einspline/tests/CMakeLists.txt b/src/einspline/tests/CMakeLists.txt index 1d114fe1a5..1f1400508a 100644 --- a/src/einspline/tests/CMakeLists.txt +++ b/src/einspline/tests/CMakeLists.txt @@ -18,7 +18,7 @@ set(SRCS test_one.cpp test_3d.cpp) if(QMC_CUDA) set(SRCS ${SRCS} test_cuda.cu) if(NOT QMC_CUDA2HIP) - cuda_add_library(cudatests test_cuda.cu) + add_library(cudatests test_cuda.cu) else() hip_add_library(cudatests test_cuda.cu) endif(NOT QMC_CUDA2HIP) From b607510cfc4dd3ab94579b5aaa70e7e4d86df766 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Wed, 29 Sep 2021 15:06:19 -0400 Subject: [PATCH 02/16] Don't change required CMake version Signed-off-by: Steven Hahn --- CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8dab5013f4..14ab6dbb6f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ ###################################################################### # CMake version and policies ###################################################################### -cmake_minimum_required(VERSION 3.17.0) +cmake_minimum_required(VERSION 3.15.0) # CMP0074: CMake find_package will use _ROOT CMake variable # and environment variable in search path. @@ -676,6 +676,9 @@ if(QMC_CUDA OR ENABLE_CUDA) if(QMC_CUDA2HIP) message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM else(QMC_CUDA2HIP) + if (CMAKE_VERSION VERSION_LESS 3.17.0) + message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later") + endif() enable_language(CUDA) find_package(CUDAToolkit REQUIRED) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) From 761745bfaeb1bcd38c012169a8c5d4a98fac2506 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Wed, 29 Sep 2021 17:36:10 -0400 Subject: [PATCH 03/16] Apply changes recommended by @ye-lou. Signed-off-by: Steven Hahn --- CMakeLists.txt | 22 +++++++--------------- src/Platforms/CUDA/CMakeLists.txt | 7 +++---- src/Platforms/CUDA_legacy/CMakeLists.txt | 3 +-- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 14ab6dbb6f..24a7e9bfac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,6 @@ cmake_policy(SET CMP0075 NEW) ###################################################################### # QMCPACK project ###################################################################### - project( qmcpack VERSION 3.11.9 @@ -42,17 +41,6 @@ if(NOT QMC_CXX_STANDARD EQUAL 17) "Using other versions of the C++ standard is unsupported and done entirely at user's own risk.") endif() -#-------------------------------------------------------------------- -# Set CUDA standard -#-------------------------------------------------------------------- -set(QMC_CUDA_STANDARD - 14 - CACHE STRING "QMCPACK CUDA C++ language standard") -if(NOT QMC_CUDA_STANDARD EQUAL 14) - message(WARNING "C++14 is the only CUDA language standard officially supported by this QMCPACK version. " - "Using other versions of the CUDA C++ standard is unsupported and done entirely at user's own risk.") -endif() - #-------------------------------------------------------------------- # Programmind model related build options # MPI, OpenMP, GPU acceleration @@ -272,8 +260,9 @@ endif() set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD}) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CUDA_STANDARD ${QMC_CUDA_STANDARD}) +set(CMAKE_CUDA_STANDARD ${QMC_CXX_STANDARD}) set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) +set(CMAKE_CUDA_EXTENSIONS OFF) # Check that a C++ compiler is compatible with the underlying libstdc++ include(Testlibstdc++) @@ -679,11 +668,14 @@ if(QMC_CUDA OR ENABLE_CUDA) if (CMAKE_VERSION VERSION_LESS 3.17.0) message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later") endif() - enable_language(CUDA) - find_package(CUDAToolkit REQUIRED) + if(DEFINED CUDA_ARCH) + message(FATAL_ERROR "Use CMAKE_CUDA_ARCHITECTURES instead of CUDA_ARCH variable") + endif() if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 70) endif() + enable_language(CUDA) + find_package(CUDAToolkit REQUIRED) # Automatically set the default NVCC flags set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN") if(QMC_COMPLEX) diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt index 343016fe3d..a2cadf6019 100644 --- a/src/Platforms/CUDA/CMakeLists.txt +++ b/src/Platforms/CUDA/CMakeLists.txt @@ -15,9 +15,8 @@ set(CUDA_LA_SRCS cuBLAS_missing_functions.cu) if(NOT QMC_CUDA2HIP) add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) add_library(platform_cuda_LA ${CUDA_LA_SRCS}) - target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas) - target_include_directories(platform_cuda_runtime PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - target_link_libraries(platform_cuda_runtime PRIVATE CUDA::cudart) + target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver) + target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart) else() hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS}) @@ -25,4 +24,4 @@ else() target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA) endif() -target_link_libraries(platform_cuda_LA PRIVATE ${CUDA_cusolver_LIBRARY} platform_cuda_runtime) +target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime) diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt index 7eb121c67a..af20038d91 100644 --- a/src/Platforms/CUDA_legacy/CMakeLists.txt +++ b/src/Platforms/CUDA_legacy/CMakeLists.txt @@ -13,8 +13,7 @@ set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp) if(NOT QMC_CUDA2HIP) add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) - target_include_directories(platform_cuda_legacy PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - target_link_libraries(platform_cuda_legacy PRIVATE CUDA::cublas) + target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas) else() hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA) From 707977ed689a145a1a1dc5d03b276255780457fb Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 29 Sep 2021 17:22:44 -0500 Subject: [PATCH 04/16] More accurate stopper message. --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 24a7e9bfac..215ada295d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -669,7 +669,8 @@ if(QMC_CUDA OR ENABLE_CUDA) message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later") endif() if(DEFINED CUDA_ARCH) - message(FATAL_ERROR "Use CMAKE_CUDA_ARCHITECTURES instead of CUDA_ARCH variable") + unset(CUDA_ARCH CACHE) + message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.") endif() if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 70) From 734415c99f811275bfdd14edf608986abb746e23 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 29 Sep 2021 17:24:08 -0500 Subject: [PATCH 05/16] CMAKE_CUDA_ARCHITECTURES needs CMake 3.18. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 215ada295d..b8b013d4d9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -665,8 +665,8 @@ if(QMC_CUDA OR ENABLE_CUDA) if(QMC_CUDA2HIP) message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM else(QMC_CUDA2HIP) - if (CMAKE_VERSION VERSION_LESS 3.17.0) - message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.17.0 or later") + if (CMAKE_VERSION VERSION_LESS 3.18.0) + message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later") endif() if(DEFINED CUDA_ARCH) unset(CUDA_ARCH CACHE) From 57e55d33e21134d16ebc2d25f5d5c204f656b24f Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 29 Sep 2021 17:30:50 -0500 Subject: [PATCH 06/16] Set back CUDA default to C++14. --- CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8b013d4d9..b86fd6e287 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,9 +260,6 @@ endif() set(CMAKE_CXX_STANDARD ${QMC_CXX_STANDARD}) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) -set(CMAKE_CUDA_STANDARD ${QMC_CXX_STANDARD}) -set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) -set(CMAKE_CUDA_EXTENSIONS OFF) # Check that a C++ compiler is compatible with the underlying libstdc++ include(Testlibstdc++) @@ -672,6 +669,12 @@ if(QMC_CUDA OR ENABLE_CUDA) unset(CUDA_ARCH CACHE) message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.") endif() + # a few production machines use CUDA 10 which only supports C++14. + if(NOT DEFINED CMAKE_CUDA_STANDARD) + set(CMAKE_CUDA_STANDARD 14) + endif() + set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) + set(CMAKE_CUDA_EXTENSIONS OFF) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) set(CMAKE_CUDA_ARCHITECTURES 70) endif() From ebf78496ec445f5d5063288a7323c5fd2aafb667 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 29 Sep 2021 19:02:47 -0500 Subject: [PATCH 07/16] Make platform_cuda_legacy depend on CUDA::cudart --- src/Platforms/CUDA/CMakeLists.txt | 4 ++-- src/Platforms/CUDA_legacy/CMakeLists.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt index a2cadf6019..f84c63ee1c 100644 --- a/src/Platforms/CUDA/CMakeLists.txt +++ b/src/Platforms/CUDA/CMakeLists.txt @@ -14,14 +14,14 @@ set(CUDA_LA_SRCS cuBLAS_missing_functions.cu) if(NOT QMC_CUDA2HIP) add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) + target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart) add_library(platform_cuda_LA ${CUDA_LA_SRCS}) target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver) - target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart) else() hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) + target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA) hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS}) target_link_libraries(platform_cuda_runtime PUBLIC platform_rocm_runtime) - target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA) endif() target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime) diff --git a/src/Platforms/CUDA_legacy/CMakeLists.txt b/src/Platforms/CUDA_legacy/CMakeLists.txt index af20038d91..d97c90c1de 100644 --- a/src/Platforms/CUDA_legacy/CMakeLists.txt +++ b/src/Platforms/CUDA_legacy/CMakeLists.txt @@ -13,7 +13,7 @@ set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp) if(NOT QMC_CUDA2HIP) add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) - target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas) + target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas CUDA::cudart) else() hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS}) target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA) From b39aea6a7861a3779f9f641154eda250b226acd6 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Wed, 29 Sep 2021 19:06:57 -0500 Subject: [PATCH 08/16] Set CMAKE_CUDA_ARCHITECTURES early. --- CMake/ClangCompilers.cmake | 4 ++++ CMake/NVHPCCompilers.cmake | 8 ++++++-- CMakeLists.txt | 18 +++++++++++------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index bed72d755d..7128c96c3b 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -19,6 +19,10 @@ if(QMC_OMP) CACHE STRING "Offload target architecture") set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") + if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES) + set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES}) + endif() + if(DEFINED OFFLOAD_ARCH) set(OPENMP_OFFLOAD_COMPILE_OPTIONS "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}") diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake index 6acb60d496..9128b3c9b6 100644 --- a/CMake/NVHPCCompilers.cmake +++ b/CMake/NVHPCCompilers.cmake @@ -8,8 +8,12 @@ if(QMC_OMP) if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.") if(NOT DEFINED OFFLOAD_ARCH) - message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! " - "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.") + if(DEFINED CMAKE_CUDA_ARCHITECTURES) + set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES}) + else() + message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! " + "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.") + endif() endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu") set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}") diff --git a/CMakeLists.txt b/CMakeLists.txt index b86fd6e287..8c44c52e3f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,17 @@ if(ENABLE_CUDA AND QMC_CUDA) message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!") endif(ENABLE_CUDA AND QMC_CUDA) +# set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it +if(ENABLE_CUDA OR QMC_CUDA AND NOT QMC_CUDA2HIP) + if(DEFINED CUDA_ARCH) + unset(CUDA_ARCH CACHE) + message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.") + endif() + if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) + set(CMAKE_CUDA_ARCHITECTURES 70) + endif() +endif() + #-------------------------------------------------------------------- # Set compiler-time parameters # WALKER_MAX_PROPERTIES max number of observables + 12 or so standard @@ -665,19 +676,12 @@ if(QMC_CUDA OR ENABLE_CUDA) if (CMAKE_VERSION VERSION_LESS 3.18.0) message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later") endif() - if(DEFINED CUDA_ARCH) - unset(CUDA_ARCH CACHE) - message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.") - endif() # a few production machines use CUDA 10 which only supports C++14. if(NOT DEFINED CMAKE_CUDA_STANDARD) set(CMAKE_CUDA_STANDARD 14) endif() set(CMAKE_CUDA_STANDARD_REQUIRED TRUE) set(CMAKE_CUDA_EXTENSIONS OFF) - if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) - set(CMAKE_CUDA_ARCHITECTURES 70) - endif() enable_language(CUDA) find_package(CUDAToolkit REQUIRED) # Automatically set the default NVCC flags From 058e4465180197b00157785a2d0012ea13df7369 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 30 Sep 2021 10:06:18 -0400 Subject: [PATCH 09/16] Update recipes under config for CUDA change. --- CMakeLists.txt | 2 +- config/build_olcf_summit.sh | 4 ++-- config/build_olcf_summit_Clang.sh | 15 ++++++++------- config/build_tulip.sh | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c44c52e3f..9ff80f9b64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -690,7 +690,7 @@ if(QMC_CUDA OR ENABLE_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}") endif() set(HAVE_CUDA 1) - message(" CMAKE_CUDA_FLAGS=${CMAKE_CUDA_FLAGS}") + message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}") endif() else(QMC_CUDA OR ENABLE_CUDA) if(QMC_CUDA2HIP) diff --git a/config/build_olcf_summit.sh b/config/build_olcf_summit.sh index dc40f90f57..2429214ae5 100755 --- a/config/build_olcf_summit.sh +++ b/config/build_olcf_summit.sh @@ -12,8 +12,8 @@ echo "Either source $BUILD_MODULES or load these same modules to run QMCPACK" declare -A builds=( ["cpu"]=" -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \ ["complex_cpu"]="-DQMC_COMPLEX=1 -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \ - ["legacy_gpu"]="-DQMC_CUDA=1 -DCUDA_ARCH=sm_70 " \ - ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCUDA_ARCH=sm_70 " ) + ["legacy_gpu"]="-DQMC_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 " \ + ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCMAKE_CUDA_ARCHITECTURES=70 " ) mkdir bin diff --git a/config/build_olcf_summit_Clang.sh b/config/build_olcf_summit_Clang.sh index 1defa4ce34..598734a300 100755 --- a/config/build_olcf_summit_Clang.sh +++ b/config/build_olcf_summit_Clang.sh @@ -32,28 +32,29 @@ module load llvm/main-20210811-cuda10.1 TYPE=Release Compiler=Clang -source_folder=.. +source_folder=~/opt/qmcpack for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \ cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx do -CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D QMC_MATH_VENDOR=IBM_MASS -D MASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=`which jsrun` -D MPIEXEC_NUMPROC_FLAG='-n' -D MPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off'" +CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -DMPIEXEC_EXECUTABLE=`which jsrun` -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off' -DCMAKE_CXX_STANDARD_LIBRARIES=/sw/summit/gcc/9.3.0-2/lib64/libstdc++.a" if [[ $name == *"cplx"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1" + CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON" fi if [[ $name == *"_MP"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_MIXED_PRECISION=1" + CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON" fi if [[ $name == *"offload"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70" + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DUSE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70" fi if [[ $name == *"cuda"* ]]; then - CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D CUDA_HOST_COMPILER=/usr/bin/gcc -D CUDA_NVCC_FLAGS='-Xcompiler;-mno-float128'" + CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++" + CUDA_FLAGS="-Xcompiler -mno-float128" fi folder=build_summit_${Compiler}_${name} @@ -64,7 +65,7 @@ echo "**********************************" mkdir $folder cd $folder if [ ! -f CMakeCache.txt ] ; then -cmake $CMAKE_FLAGS -D CMAKE_C_COMPILER=mpicc -D CMAKE_CXX_COMPILER=mpicxx $source_folder +cmake $CMAKE_FLAGS -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_CUDA_FLAGS="$CUDA_FLAGS" $source_folder cmake . fi make -j16 diff --git a/config/build_tulip.sh b/config/build_tulip.sh index 717afc09bd..324c93bac3 100644 --- a/config/build_tulip.sh +++ b/config/build_tulip.sh @@ -47,7 +47,7 @@ elif [[ $build == *"MI60"* ]]; then fi if [[ $build == *"cuda"* ]]; then - CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCUDA_ARCH=sm_70 -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_ROOT -DCUDA_HOST_COMPILER=`which gcc`" + CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDAToolkit_ROOT=$CUDA_ROOT -DCMAKE_CUDA_HOST_COMPILER=`which g++`" fi if [[ $build == *"cplx"* ]]; then From afd22943d1aaccbc43ce2c861bab4030fdd7313c Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 30 Sep 2021 16:06:50 -0400 Subject: [PATCH 10/16] Check LLVM offload only contains one architecture Signed-off-by: Steven Hahn --- CMake/ClangCompilers.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 7128c96c3b..9638b6aef1 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -20,6 +20,10 @@ if(QMC_OMP) set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES) + list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) + if(NOT NUMBER_CUDA_ARCHITECTURES EQUAL "1") + message(FATAL_ERROR "LLVM does not support offload to multiple architectures!") + endif() set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES}) endif() From 8f1b5af1742588d7fe0bf0ae11a34828bee138e8 Mon Sep 17 00:00:00 2001 From: Steven Hahn Date: Thu, 30 Sep 2021 16:30:05 -0400 Subject: [PATCH 11/16] update documentation Signed-off-by: Steven Hahn --- docs/installation.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index b941e284c1..c218502bc2 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -289,7 +289,7 @@ the path to the source directory. particularly for large electron counts. ENABLE_CUDA ON/OFF(default). Enable CUDA code path for NVIDIA GPU acceleration. Production quality for AFQMC. Pre-production quality for real-space. - Use CUDA_ARCH, default sm_70, to set the actual GPU architecture. + Use CMAKE_CUDA_ARCHITECTURES, default 70, to set the actual GPU architecture. ENABLE_OFFLOAD ON/OFF(default). Enable OpenMP target offload for GPU acceleration. ENABLE_TIMERS ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse to avoid potential slowdown in tiny systems. @@ -448,7 +448,7 @@ For example, using Clang 11 on Summit. :: - -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CUDA_ARCH=sm_70 -D CUDA_HOST_COMPILER=`which gcc` + -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CUDA_HOST_COMPILER=`which gcc` Installation from CMake From be7893e67aeef6964fe9bfb0229e5e09698f6a92 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 30 Sep 2021 16:58:44 -0500 Subject: [PATCH 12/16] Our header only wrappers needs cuda include path. --- src/Platforms/CUDA/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Platforms/CUDA/CMakeLists.txt b/src/Platforms/CUDA/CMakeLists.txt index f84c63ee1c..ca1dd00921 100644 --- a/src/Platforms/CUDA/CMakeLists.txt +++ b/src/Platforms/CUDA/CMakeLists.txt @@ -16,7 +16,7 @@ if(NOT QMC_CUDA2HIP) add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart) add_library(platform_cuda_LA ${CUDA_LA_SRCS}) - target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver) + target_link_libraries(platform_cuda_LA PUBLIC CUDA::cublas CUDA::cusolver) else() hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS}) target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA) @@ -24,4 +24,4 @@ else() target_link_libraries(platform_cuda_runtime PUBLIC platform_rocm_runtime) endif() -target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime) +target_link_libraries(platform_cuda_LA PUBLIC platform_cuda_runtime) From a9e69a14d3634299298796a333e3ff29089f53ae Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 30 Sep 2021 18:43:55 -0500 Subject: [PATCH 13/16] Make NVHPC support CMAKE_CUDA_ARCHITECTURES as a list. --- CMake/ClangCompilers.cmake | 9 ++++++--- CMake/NVHPCCompilers.cmake | 8 +++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 9638b6aef1..7c38a6295d 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -21,10 +21,13 @@ if(QMC_OMP) if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES) list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) - if(NOT NUMBER_CUDA_ARCHITECTURES EQUAL "1") - message(FATAL_ERROR "LLVM does not support offload to multiple architectures!") + if(NUMBER_CUDA_ARCHITECTURES EQUAL "1") + set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES}) + else() + message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! " + "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. " + "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH") endif() - set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES}) endif() if(DEFINED OFFLOAD_ARCH) diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake index 9128b3c9b6..c02531cc5a 100644 --- a/CMake/NVHPCCompilers.cmake +++ b/CMake/NVHPCCompilers.cmake @@ -9,7 +9,13 @@ if(QMC_OMP) message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.") if(NOT DEFINED OFFLOAD_ARCH) if(DEFINED CMAKE_CUDA_ARCHITECTURES) - set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES}) + list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) + if(NUMBER_CUDA_ARCHITECTURES EQUAL "1") + set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES}) + else() + string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}") + set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}") + endif() else() message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! " "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.") From fe7fba7087f1f44cd011217e141db38444720216 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 30 Sep 2021 19:00:23 -0500 Subject: [PATCH 14/16] Allow OFFLOAD_ARCH not being set for NVHPC. --- CMake/NVHPCCompilers.cmake | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake index c02531cc5a..f4ef80aba4 100644 --- a/CMake/NVHPCCompilers.cmake +++ b/CMake/NVHPCCompilers.cmake @@ -7,22 +7,24 @@ if(QMC_OMP) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores") if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.") - if(NOT DEFINED OFFLOAD_ARCH) - if(DEFINED CMAKE_CUDA_ARCHITECTURES) - list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) - if(NUMBER_CUDA_ARCHITECTURES EQUAL "1") - set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES}) - else() - string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}") - set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}") - endif() + if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES) + list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES) + if(NUMBER_CUDA_ARCHITECTURES EQUAL "1") + set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES}) else() + string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}") + set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}") + endif() + endif() + + if(DEFINED OFFLOAD_ARCH) + if(NOT OFFLOAD_ARCH MATCHES "cc") message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! " "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.") endif() + set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}") endif() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu") - set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores") endif() From 587634d816b20c12836ccb321ad02ac99ac064b2 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 30 Sep 2021 19:10:31 -0500 Subject: [PATCH 15/16] Update installation.rst --- CMake/ClangCompilers.cmake | 2 +- docs/installation.rst | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 7c38a6295d..eab5cf13df 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -26,7 +26,7 @@ if(QMC_OMP) else() message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! " "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. " - "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH") + "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.") endif() endif() diff --git a/docs/installation.rst b/docs/installation.rst index c218502bc2..a1c46191f9 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -416,13 +416,7 @@ to be reached. The following compilers have been verified: :: OFFLOAD_TARGET for the offload target. default nvptx64-nvidia-cuda. - OFFLOAD_ARCH for the target architecture if not using the compiler default. - -- IBM XL 16.1. Support NVIDIA GPUs. - - :: - - -D ENABLE_OFFLOAD=ON + OFFLOAD_ARCH for the target architecture (sm_80, gfx906, ...) if not using the compiler default. - AMD AOMP Clang 11.8. Support AMD GPUs. @@ -448,7 +442,7 @@ For example, using Clang 11 on Summit. :: - -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CUDA_HOST_COMPILER=`which gcc` + -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -D ENABLE_CUDA=ON -D CMAKE_CUDA_ARCHITECTURES=70 -D CMAKE_CUDA_HOST_COMPILER=`which gcc` Installation from CMake From d947ca1ae5c58d9f6a364f1c3571c317e7cf0c78 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Fri, 1 Oct 2021 12:08:26 -0400 Subject: [PATCH 16/16] Update build_olcf_summit_Clang.sh --- config/build_olcf_summit_Clang.sh | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/config/build_olcf_summit_Clang.sh b/config/build_olcf_summit_Clang.sh index 598734a300..65f254982d 100755 --- a/config/build_olcf_summit_Clang.sh +++ b/config/build_olcf_summit_Clang.sh @@ -32,7 +32,18 @@ module load llvm/main-20210811-cuda10.1 TYPE=Release Compiler=Clang -source_folder=~/opt/qmcpack +if [[ $# -eq 0 ]]; then + source_folder=`pwd` +else + source_folder=$1 +fi + +if [[ -f $source_folder/CMakeLists.txt ]]; then + echo Using QMCPACK source directory $source_folder +else + echo "Source directory $source_folder doesn't contain CMakeLists.txt. Pass QMCPACK source directory as the first argument." + exit +fi for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \ cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx