diff --git a/CMake/CheckSIMDAlignement.cmake b/CMake/CheckSIMDAlignement.cmake new file mode 100644 index 0000000000..45af9936b2 --- /dev/null +++ b/CMake/CheckSIMDAlignement.cmake @@ -0,0 +1,12 @@ +# Check if AVX512 is activated in the compilation +# Since cross-compiling is not unusual on HPC systems (Cray), +# try_compile is robust against +try_compile(CXX_COMPILER_HAVE_AVX512_MACRO ${CMAKE_BINARY_DIR} + ${PROJECT_CMAKE}/try_compile_sources/checkAVX512.cxx + CMAKE_FLAGS "${CMAKE_CXX_FLAGS}") + +if (CXX_COMPILER_HAVE_AVX512_MACRO) + set(default_alignment 64) +else() + set(default_alignment 32) +endif() diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake index 5eb01213cb..12025da282 100644 --- a/CMake/ClangCompilers.cmake +++ b/CMake/ClangCompilers.cmake @@ -16,14 +16,14 @@ IF(QMC_OMP) SET(ENABLE_OPENMP 1) IF(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") SET(OFFLOAD_TARGET "nvptx64-nvidia-cuda" CACHE STRING "Offload target architecture") - SET(CLANG_OPENMP_OFFLOAD_FLAGS "-fopenmp-targets=${OFFLOAD_TARGET}") + SET(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}") IF(DEFINED OFFLOAD_ARCH) - SET(CLANG_OPENMP_OFFLOAD_FLAGS "${CLANG_OPENMP_OFFLOAD_FLAGS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}") + SET(OPENMP_OFFLOAD_COMPILE_OPTIONS "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}") ENDIF() IF(OFFLOAD_TARGET MATCHES "nvptx64") - SET(CLANG_OPENMP_OFFLOAD_FLAGS "${CLANG_OPENMP_OFFLOAD_FLAGS} -Wno-unknown-cuda-version") + SET(OPENMP_OFFLOAD_COMPILE_OPTIONS "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Wno-unknown-cuda-version") ENDIF() # Intel clang compiler needs a different flag for the host side OpenMP library when offload is used. @@ -118,12 +118,6 @@ ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64" OR CMAKE_SYSTEM_PROCESSOR MATCHES ENDIF() ENDIF() -# Add OpenMP offload flags -# This step is intentionally put after the -march parsing for CPUs. -IF(DEFINED CLANG_OPENMP_OFFLOAD_FLAGS) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CLANG_OPENMP_OFFLOAD_FLAGS}") -ENDIF() - # Add static flags if necessary IF(QMC_BUILD_STATIC) SET(CMAKE_CXX_LINK_FLAGS " -static") diff --git a/CMake/GNUCompilers.cmake b/CMake/GNUCompilers.cmake index 00aa17b019..87365d3f8f 100644 --- a/CMake/GNUCompilers.cmake +++ b/CMake/GNUCompilers.cmake @@ -10,11 +10,10 @@ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") IF(QMC_OMP) SET(ENABLE_OPENMP 1) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp") - IF(ENABLE_OFFLOAD) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + IF(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment") SET(OFFLOAD_TARGET "nvptx-none" CACHE STRING "Offload target architecture") - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -foffload=${OFFLOAD_TARGET} -foffload=\"-lm -latomic\"") - ELSE() - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + SET(OPENMP_OFFLOAD_COMPILE_OPTIONS "-foffload=${OFFLOAD_TARGET} -foffload=\"-lm -latomic\"") ENDIF() ENDIF(QMC_OMP) diff --git a/CMake/IBMCompilers.cmake b/CMake/IBMCompilers.cmake index 32c66154b9..266c2efbe0 100644 --- a/CMake/IBMCompilers.cmake +++ b/CMake/IBMCompilers.cmake @@ -26,10 +26,10 @@ SET( CMAKE_CXX_FLAGS_RELWITHDEBINFO "-g -O3" ) IF(QMC_OMP) SET(ENABLE_OPENMP 1) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -qsmp=omp") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qsmp=omp") IF(ENABLE_OFFLOAD) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qsmp=omp -qoffload") - ELSE(ENABLE_OFFLOAD) - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qsmp=omp") + set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-qoffload") ENDIF(ENABLE_OFFLOAD) ELSE(QMC_OMP) SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qnothreaded") diff --git a/CMake/PGICompilers.cmake b/CMake/PGICompilers.cmake index 0ff5336183..93d7ed50b7 100644 --- a/CMake/PGICompilers.cmake +++ b/CMake/PGICompilers.cmake @@ -15,7 +15,7 @@ IF(QMC_OMP) MESSAGE(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! " "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.") ENDIF() - SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu -gpu=${OFFLOAD_ARCH}") + SET(OPENMP_OFFLOAD_COMPILE_OPTIONS "-mp=gpu -gpu=${OFFLOAD_ARCH}") ELSE() SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores") ENDIF() diff --git a/CMake/try_compile_sources/checkAVX512.cxx b/CMake/try_compile_sources/checkAVX512.cxx new file mode 100644 index 0000000000..2865e6e0e4 --- /dev/null +++ b/CMake/try_compile_sources/checkAVX512.cxx @@ -0,0 +1,8 @@ +// Check if AVX512 is activated by the compiler +int main(int argc, char **argv) +{ +#if !defined(__AVX512F__) +#error "AVX512 not found" +#endif + return 0; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index d20aab97b2..85b66d4f14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,7 +144,6 @@ SET(OHMMS_DIM 3 CACHE STRING "Select physical dimension") SET(OHMMS_INDEXTYPE int) MESSAGE(STATUS "defining the float point precision") SET(OHMMS_PRECISION_FULL double) -set(QMC_CLINE 32 CACHE STRING "Cache/SIMD alignment in bytes") IF(QMC_CUDA) SET(QMC_MIXED_PRECISION 1 CACHE BOOL "Enable/disable mixed precision") @@ -429,6 +428,26 @@ IF (BUILD_AFQMC AND NOT APPLE) LINK_LIBRARIES("rt") ENDIF() +#--------------------------------------------------------------------------- +# Check SIMD alignment for CPU only +# This is intentionally placed before adding OpenMP offload compile options +# to avoid contamination from device compilation pass +#--------------------------------------------------------------------------- +include(CMake/CheckSIMDAlignement.cmake) +set(QMC_CLINE ${default_alignment} CACHE STRING "Cache/SIMD alignment in bytes") +math(EXPR alignment_remainder "${QMC_CLINE} % ${default_alignment}") +if (alignment_remainder) + message(FATAL_ERROR "QMC_CLINE must be a multiple of ${default_alignment}. Bad cached value is ${QMC_CLINE}") +endif() +message(STATUS "Setting QMC_CLINE to ${QMC_CLINE}") + +#--------------------------------------------------------- +# Set up OpenMP offload compile options +#--------------------------------------------------------- +if (ENABLE_OFFLOAD AND DEFINED OPENMP_OFFLOAD_COMPILE_OPTIONS) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OPENMP_OFFLOAD_COMPILE_OPTIONS}") +endif() + #------------------------------------------------------------------- # check MPI installation #-------------------------------------------------------------------