Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eliminate deprecated find_package(CUDA) from qmcpack #3492

Merged
merged 19 commits into from
Oct 1, 2021
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMake/ClangCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ if(QMC_OMP)
CACHE STRING "Offload target architecture")
set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}")

if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
quantumsteve marked this conversation as resolved.
Show resolved Hide resolved
endif()

if(DEFINED OFFLOAD_ARCH)
set(OPENMP_OFFLOAD_COMPILE_OPTIONS
"${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")
Expand Down
8 changes: 6 additions & 2 deletions CMake/NVHPCCompilers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,12 @@ if(QMC_OMP)
if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
if(NOT DEFINED OFFLOAD_ARCH)
message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
"Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
if(DEFINED CMAKE_CUDA_ARCHITECTURES)
set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
else()
message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
"Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
endif()
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu")
set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
Expand Down
60 changes: 29 additions & 31 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ if(ENABLE_CUDA AND QMC_CUDA)
message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!")
endif(ENABLE_CUDA AND QMC_CUDA)

# set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it
if(ENABLE_CUDA OR QMC_CUDA AND NOT QMC_CUDA2HIP)
if(DEFINED CUDA_ARCH)
unset(CUDA_ARCH CACHE)
message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
endif()
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 70)
endif()
endif()

#--------------------------------------------------------------------
# Set compiler-time parameters
# WALKER_MAX_PROPERTIES max number of observables + 12 or so standard
Expand Down Expand Up @@ -662,38 +673,25 @@ if(QMC_CUDA OR ENABLE_CUDA)
if(QMC_CUDA2HIP)
message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
else(QMC_CUDA2HIP)
# FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF
# It happened -ffast-math from host caused numerical issue in CUDA kernels.
option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
find_package(CUDA REQUIRED)
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
#set(CUDA_NVCC_FLAGS
# "-arch=sm_20;-Drestrict=__restrict__;-DNO_CUDA_MAIN;-O3;-use_fast_math")
if(CUDA_NVCC_FLAGS MATCHES "arch")
# User defined NVCC flags
message(STATUS "Setting CUDA FLAGS=${CUDA_NVCC_FLAGS}")
else(CUDA_NVCC_FLAGS MATCHES "arch")
# Automatically set the default NVCC flags
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Drestrict=__restrict__;-DNO_CUDA_MAIN;-std=c++14")
if(QMC_COMPLEX)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DQMC_COMPLEX=${QMC_COMPLEX}")
endif()
if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-g;-G")
else()
# Temporarily disable fast_math because it causes multiple test failures
# SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3;-use_fast_math")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3")
endif()
set(CUDA_ARCH
sm_70
CACHE STRING "CUDA architecture sm_XX")
set(CUDA_NVCC_FLAGS "-arch=${CUDA_ARCH};${CUDA_NVCC_FLAGS}")
endif(CUDA_NVCC_FLAGS MATCHES "arch")
include_directories(${CUDA_INCLUDE_DIRS})
if (CMAKE_VERSION VERSION_LESS 3.18.0)
message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
endif()
# a few production machines use CUDA 10 which only supports C++14.
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 14)
endif()
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CUDA_EXTENSIONS OFF)
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
# Automatically set the default NVCC flags
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
if(QMC_COMPLEX)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}")
endif()
set(HAVE_CUDA 1)
message(" CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
endif(QMC_CUDA2HIP)
message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
endif()
else(QMC_CUDA OR ENABLE_CUDA)
if(QMC_CUDA2HIP)
message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.")
Expand Down
4 changes: 2 additions & 2 deletions config/build_olcf_summit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ echo "Either source $BUILD_MODULES or load these same modules to run QMCPACK"

declare -A builds=( ["cpu"]=" -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
["complex_cpu"]="-DQMC_COMPLEX=1 -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
["legacy_gpu"]="-DQMC_CUDA=1 -DCUDA_ARCH=sm_70 " \
["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCUDA_ARCH=sm_70 " )
["legacy_gpu"]="-DQMC_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 " \
["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCMAKE_CUDA_ARCHITECTURES=70 " )

mkdir bin

Expand Down
15 changes: 8 additions & 7 deletions config/build_olcf_summit_Clang.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,29 @@ module load llvm/main-20210811-cuda10.1
TYPE=Release
Compiler=Clang

source_folder=..
source_folder=~/opt/qmcpack
ye-luo marked this conversation as resolved.
Show resolved Hide resolved

for name in offload_cuda_real_MP offload_cuda_real offload_cuda_cplx_MP offload_cuda_cplx \
cpu_real_MP cpu_real cpu_cplx_MP cpu_cplx
do

CMAKE_FLAGS="-D CMAKE_BUILD_TYPE=$TYPE -D QMC_MATH_VENDOR=IBM_MASS -D MASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -D MPIEXEC_EXECUTABLE=`which jsrun` -D MPIEXEC_NUMPROC_FLAG='-n' -D MPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off'"
CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=$TYPE -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1 -DMPIEXEC_EXECUTABLE=`which jsrun` -DMPIEXEC_NUMPROC_FLAG='-n' -DMPIEXEC_PREFLAGS='-c;16;-g;1;-b;packed:16;--smpiargs=off' -DCMAKE_CXX_STANDARD_LIBRARIES=/sw/summit/gcc/9.3.0-2/lib64/libstdc++.a"

if [[ $name == *"cplx"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_COMPLEX=1"
CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_COMPLEX=ON"
fi

if [[ $name == *"_MP"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D QMC_MIXED_PRECISION=1"
CMAKE_FLAGS="$CMAKE_FLAGS -DQMC_MIXED_PRECISION=ON"
fi

if [[ $name == *"offload"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_OFFLOAD=ON -D USE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70"
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_OFFLOAD=ON -DUSE_OBJECT_TARGET=ON -DOFFLOAD_ARCH=sm_70"
fi

if [[ $name == *"cuda"* ]]; then
CMAKE_FLAGS="$CMAKE_FLAGS -D ENABLE_CUDA=1 -D CUDA_ARCH=sm_70 -D CUDA_HOST_COMPILER=/usr/bin/gcc -D CUDA_NVCC_FLAGS='-Xcompiler;-mno-float128'"
CMAKE_FLAGS="$CMAKE_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCMAKE_CUDA_HOST_COMPILER=/usr/bin/g++"
CUDA_FLAGS="-Xcompiler -mno-float128"
fi

folder=build_summit_${Compiler}_${name}
Expand All @@ -64,7 +65,7 @@ echo "**********************************"
mkdir $folder
cd $folder
if [ ! -f CMakeCache.txt ] ; then
cmake $CMAKE_FLAGS -D CMAKE_C_COMPILER=mpicc -D CMAKE_CXX_COMPILER=mpicxx $source_folder
cmake $CMAKE_FLAGS -DCMAKE_C_COMPILER=mpicc -DCMAKE_CXX_COMPILER=mpicxx -DCMAKE_CUDA_FLAGS="$CUDA_FLAGS" $source_folder
cmake .
fi
make -j16
Expand Down
2 changes: 1 addition & 1 deletion config/build_tulip.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ elif [[ $build == *"MI60"* ]]; then
fi

if [[ $build == *"cuda"* ]]; then
CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCUDA_ARCH=sm_70 -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_ROOT -DCUDA_HOST_COMPILER=`which gcc`"
CTEST_FLAGS="$CTEST_FLAGS -DENABLE_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=70 -DCUDAToolkit_ROOT=$CUDA_ROOT -DCMAKE_CUDA_HOST_COMPILER=`which g++`"
fi

if [[ $build == *"cplx"* ]]; then
Expand Down
11 changes: 3 additions & 8 deletions src/AFQMC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,11 @@ elseif(ENABLE_HIP)
Numerics/detail/HIP/Kernels/inplace_product.hip.cpp
Numerics/detail/HIP/Kernels/get_diagonal.hip.cpp)
set(AFQMC_SRCS ${AFQMC_SRCS} Memory/HIP/hip_utilities.cpp Memory/HIP/hip_arch.cpp Memory/HIP/hip_init.cpp)
else(ENABLE_CUDA)

endif(ENABLE_CUDA)
endif()

if(ENABLE_CUDA)
cuda_add_library(afqmc ${AFQMC_SRCS})
cuda_add_cublas_to_target(afqmc)
target_link_libraries(afqmc PRIVATE ${CUDA_cusparse_LIBRARY})
target_link_libraries(afqmc PRIVATE ${CUDA_cusolver_LIBRARY})
target_link_libraries(afqmc PRIVATE ${CUDA_curand_LIBRARY})
add_library(afqmc ${AFQMC_SRCS})
target_link_libraries(afqmc PRIVATE CUDA::curand CUDA::cusparse CUDA::cusolver CUDA::cublas)
elseif(ENABLE_HIP)
set_source_files_properties(${AFQMC_HIP_SRCS} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
hip_add_library(afqmc_hip_lib ${AFQMC_HIP_SRCS})
Expand Down
2 changes: 1 addition & 1 deletion src/Particle/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ target_link_libraries(qmcparticle PUBLIC qmcnumerics qmcutil platform_runtime)

if(QMC_CUDA)
if(NOT QMC_CUDA2HIP)
cuda_add_library(qmcparticle_cuda accept_kernel.cu)
add_library(qmcparticle_cuda accept_kernel.cu)
else()
hip_add_library(qmcparticle_cuda accept_kernel.cu)
endif(NOT QMC_CUDA2HIP)
Expand Down
11 changes: 6 additions & 5 deletions src/Platforms/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ set(CUDA_RT_SRCS CUDAfill.cpp CUDAallocator.cpp CUDAruntime.cpp)
set(CUDA_LA_SRCS cuBLAS_missing_functions.cu)

if(NOT QMC_CUDA2HIP)
cuda_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
cuda_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
cuda_add_cublas_to_target(platform_cuda_LA)
add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
target_link_libraries(platform_cuda_runtime PUBLIC CUDA::cudart)
add_library(platform_cuda_LA ${CUDA_LA_SRCS})
target_link_libraries(platform_cuda_LA PRIVATE CUDA::cublas CUDA::cusolver)
else()
hip_add_library(platform_cuda_runtime ${CUDA_RT_SRCS})
target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
hip_add_library(platform_cuda_LA ${CUDA_LA_SRCS})
target_link_libraries(platform_cuda_runtime PUBLIC platform_rocm_runtime)
target_link_libraries(platform_cuda_LA PUBLIC platform_rocm_LA)
endif()

target_link_libraries(platform_cuda_LA PRIVATE ${CUDA_cusolver_LIBRARY} platform_cuda_runtime)
target_link_libraries(platform_cuda_LA PRIVATE platform_cuda_runtime)
5 changes: 2 additions & 3 deletions src/Platforms/CUDA_legacy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
set(CUDA_LEGACY_SRCS cuda_inverse.cu gpu_vector.cpp gpu_misc.cpp)

if(NOT QMC_CUDA2HIP)
cuda_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
cuda_add_cublas_to_target(platform_cuda_legacy)
target_link_libraries(platform_cuda_legacy PRIVATE ${CUDA_LIBRARIES})
add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
target_link_libraries(platform_cuda_legacy PUBLIC CUDA::cublas CUDA::cudart)
else()
hip_add_library(platform_cuda_legacy ${CUDA_LEGACY_SRCS})
target_link_libraries(platform_cuda_legacy PUBLIC platform_rocm_runtime platform_rocm_LA)
Expand Down
2 changes: 1 addition & 1 deletion src/Platforms/tests/CUDA/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(UTEST_EXE test_${SRC_DIR})
set(UTEST_NAME deterministic-unit_test_${SRC_DIR})

if(NOT QMC_CUDA2HIP)
cuda_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
else()
hip_add_library(cuda_device_value_test_kernels test_device_value_kernels.cu)
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/QMCHamiltonians/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ else()
endif()
if(QMC_CUDA)
if(NOT QMC_CUDA2HIP)
cuda_add_library(qmcham_cuda ${HAMSRCS_CUDA})
add_library(qmcham_cuda ${HAMSRCS_CUDA})
else()
hip_add_library(qmcham_cuda ${HAMSRCS_CUDA})
endif(NOT QMC_CUDA2HIP)
Expand Down
4 changes: 2 additions & 2 deletions src/QMCWaveFunctions/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ endif(USE_OBJECT_TARGET)

if(QMC_CUDA OR ENABLE_CUDA)
if(NOT QMC_CUDA2HIP)
cuda_add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
else()
hip_add_library(qmcwfs_cuda ${WFSSRCS_CUDA})
target_link_libraries(qmcwfs_cuda PUBLIC platform_LA)
Expand All @@ -208,7 +208,7 @@ target_link_libraries(qmcwfs PRIVATE einspline platform_LA Math::FFTW3)
if(ENABLE_CUDA)
set(DIRECT_INVERSION_SRCS detail/CUDA/cuBLAS_LU.cu)
if(NOT QMC_CUDA2HIP)
cuda_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
else()
hip_add_library(qmcwfs_direct_inversion_cuda "${DIRECT_INVERSION_SRCS}")
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/einspline/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ set(SRCS
if(QMC_CUDA)
set(SRCS ${SRCS} multi_bspline_create_cuda.cu bspline_create_cuda.cu)
if(NOT QMC_CUDA2HIP)
cuda_add_library(einspline ${SRCS})
add_library(einspline ${SRCS})
else()
hip_add_library(einspline ${SRCS})
endif(NOT QMC_CUDA2HIP)
Expand Down
2 changes: 1 addition & 1 deletion src/einspline/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ set(SRCS test_one.cpp test_3d.cpp)
if(QMC_CUDA)
set(SRCS ${SRCS} test_cuda.cu)
if(NOT QMC_CUDA2HIP)
cuda_add_library(cudatests test_cuda.cu)
add_library(cudatests test_cuda.cu)
else()
hip_add_library(cudatests test_cuda.cu)
endif(NOT QMC_CUDA2HIP)
Expand Down