Merge branch 'develop' into matrix_update_engines_direct_2

PDoakORNL · Oct 6, 2021 · fea01c4 · fea01c4
2 parents 4e8b3f4 + f7874e4
commit fea01c4
Show file tree

Hide file tree

Showing 70 changed files with 1,092 additions and 3,972 deletions.
diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
@@ -1,16 +1,14 @@
-
 name: GitHub Actions CI
 
-on: 
+on:
   push:
-    branches: 
-    - develop
+    branches:
+      - develop
   pull_request:
-    branches: 
-    - develop
+    branches:
+      - develop
 
 jobs:
-
   linux:
     runs-on: ubuntu-latest
     container: ${{ matrix.container }}
@@ -20,79 +18,112 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        jobname: [
-          gcc-openmpi-real-coverage,
-          gcc-openmpi-complex-coverage,
-          gcc11-real-werror,
-          gcc11-complex-werror,
-          clang-real-asan,
-          clang-complex-asan,
-          clang-openmpi-real-ubsan,
-          clang-latest-openmp-offload
-        ]
+        jobname:
+          [
+            gcc-openmpi-real-coverage,
+            gcc-openmpi-complex-coverage,
+            gcc11-real-werror,
+            gcc11-complex-werror,
+            clang-real-asan,
+            clang-complex-asan,
+            clang-openmpi-real-ubsan,
+            clang-latest-openmp-offload,
+          ]
         include:
-        - jobname: gcc-openmpi-real-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: gcc-openmpi-complex-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: gcc11-real-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-
-        - jobname: gcc11-complex-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-
-        - jobname: clang-real-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-complex-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-openmpi-real-ubsan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-latest-openmp-offload
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
-            options: -u 1001
+          - jobname: gcc-openmpi-real-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc-openmpi-complex-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc11-real-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: gcc11-complex-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: clang-real-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-complex-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-openmpi-real-ubsan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-latest-openmp-offload
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
+              options: -u 1001
+
+    steps:
+      - name: Checkout Action
+        uses: actions/checkout@v1
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Coverage
+        if: contains(matrix.jobname, 'coverage')
+        run: tests/test_automation/github-actions/ci/run_step.sh coverage
+
+      - name: Upload Coverage
+        if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
+        uses: codecov/codecov-action@v1
+        with:
+          file: ../qmcpack-build/coverage.xml
+          flags: tests-deterministic # optional
+          name: codecov-QMCPACK # optional
+          fail_ci_if_error: true # optional (default = false)
+
+  macos:
+    runs-on: macos-latest
+    env:
+      GH_JOBNAME: ${{ matrix.jobname }}
+      GH_OS: macOS
+
+    strategy:
+      fail-fast: false
+      matrix:
+        jobname: [macOS-gcc11-real]
 
     steps:
-    - name: Checkout Action
-      uses: actions/checkout@v1
-
-    - name: Configure
-      run: tests/test_automation/github-actions/ci/run_step.sh configure
-
-    - name: Build
-      run: tests/test_automation/github-actions/ci/run_step.sh build
-
-    - name: Test
-      run: tests/test_automation/github-actions/ci/run_step.sh test
-
-    - name: Coverage
-      if: contains(matrix.jobname, 'coverage')
-      run: tests/test_automation/github-actions/ci/run_step.sh coverage
-
-    - name: Upload Coverage
-      if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
-      uses: codecov/codecov-action@v1
-      with:
-        file:  ../qmcpack-build/coverage.xml
-        flags: tests-deterministic # optional
-        name: codecov-QMCPACK # optional
-        fail_ci_if_error: true # optional (default = false)
+      - name: Checkout Action
+        uses: actions/checkout@v2
+
+      - name: Setup Dependencies
+        run: |
+             brew install ninja hdf5 fftw boost
+             pip3 install numpy h5py pandas
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Install
+        run: tests/test_automation/github-actions/ci/run_step.sh install
diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
@@ -19,6 +19,17 @@ if(QMC_OMP)
         CACHE STRING "Offload target architecture")
     set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}")
 
+    if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
+                            "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
+                            "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
+      endif()
+    endif()
+
     if(DEFINED OFFLOAD_ARCH)
       set(OPENMP_OFFLOAD_COMPILE_OPTIONS
           "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")

diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
@@ -7,12 +7,24 @@ if(QMC_OMP)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores")
   if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
-    if(NOT DEFINED OFFLOAD_ARCH)
-      message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
-                          "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+    if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
+        set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}")
+      endif()
+    endif()
+
+    if(DEFINED OFFLOAD_ARCH)
+      if(NOT OFFLOAD_ARCH MATCHES "cc")
+        message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
+                            "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+      endif()
+      set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
     endif()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu")
-    set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
   else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
   endif()

diff --git a/CMake/ctest_script.cmake b/CMake/ctest_script.cmake
@@ -202,8 +202,8 @@ if(DEFINED QMC_MIXED_PRECISION)
   set(CTEST_OPTIONS "${CTEST_OPTIONS};-DQMC_MIXED_PRECISION=${QMC_MIXED_PRECISION}")
 endif()
 
-if(DEFINED CUDA_ARCH)
-  set(CTEST_OPTIONS "${CTEST_OPTIONS};-DCUDA_ARCH='${CUDA_ARCH}'")
+if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
 endif()
 
 if(DEFINED BUILD_AFQMC)

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -72,6 +72,17 @@ if(ENABLE_CUDA AND QMC_CUDA)
   message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!")
 endif(ENABLE_CUDA AND QMC_CUDA)
 
+# set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it
+if(ENABLE_CUDA OR QMC_CUDA AND NOT QMC_CUDA2HIP)
+  if(DEFINED CUDA_ARCH)
+    unset(CUDA_ARCH CACHE)
+    message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
+  endif()
+  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES 70)
+  endif()
+endif()
+
 #--------------------------------------------------------------------
 # Set compiler-time parameters
 # WALKER_MAX_PROPERTIES max number of observables + 12 or so standard
@@ -662,38 +673,25 @@ if(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
   else(QMC_CUDA2HIP)
-    # FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF
-    # It happened -ffast-math from host caused numerical issue in CUDA kernels.
-    option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-    find_package(CUDA REQUIRED)
-    set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-    #set(CUDA_NVCC_FLAGS
-    #  "-arch=sm_20;-Drestrict=__restrict__;-DNO_CUDA_MAIN;-O3;-use_fast_math")
-    if(CUDA_NVCC_FLAGS MATCHES "arch")
-      # User defined NVCC flags
-      message(STATUS "Setting CUDA FLAGS=${CUDA_NVCC_FLAGS}")
-    else(CUDA_NVCC_FLAGS MATCHES "arch")
-      # Automatically set the default NVCC flags
-      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Drestrict=__restrict__;-DNO_CUDA_MAIN;-std=c++14")
-      if(QMC_COMPLEX)
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DQMC_COMPLEX=${QMC_COMPLEX}")
-      endif()
-      if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-g;-G")
-      else()
-        # Temporarily disable fast_math because it causes multiple test failures
-        # SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3;-use_fast_math")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3")
-      endif()
-      set(CUDA_ARCH
-          sm_70
-          CACHE STRING "CUDA architecture sm_XX")
-      set(CUDA_NVCC_FLAGS "-arch=${CUDA_ARCH};${CUDA_NVCC_FLAGS}")
-    endif(CUDA_NVCC_FLAGS MATCHES "arch")
-    include_directories(${CUDA_INCLUDE_DIRS})
+    if (CMAKE_VERSION VERSION_LESS 3.18.0)
+      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
+    endif()
+    # a few production machines use CUDA 10 which only supports C++14.
+    if(NOT DEFINED CMAKE_CUDA_STANDARD)
+      set(CMAKE_CUDA_STANDARD 14)
+    endif()
+    set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+    set(CMAKE_CUDA_EXTENSIONS OFF)
+    enable_language(CUDA)
+    find_package(CUDAToolkit REQUIRED)
+    # Automatically set the default NVCC flags
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
+    if(QMC_COMPLEX)
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}")
+    endif()
     set(HAVE_CUDA 1)
-    message("   CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
-  endif(QMC_CUDA2HIP)
+    message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+  endif()
 else(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.")

diff --git a/README.md b/README.md
@@ -166,7 +166,7 @@ make -j 8
                          and use float and double for CUDA base and full precision.
      ENABLE_CUDA         ON/OFF(default). Enable CUDA code path for NVIDIA GPU acceleration.
                          Production quality for AFQMC. Pre-production quality for real-space.
-                         Use CUDA_ARCH, default sm_70, to set the actual GPU architecture.
+                         Use CMAKE_CUDA_ARCHITECTURES, default 70, to set the actual GPU architecture.
      ENABLE_OFFLOAD      ON/OFF(default). Experimental feature. Enable OpenMP target offload for GPU acceleration.
      ENABLE_TIMERS       ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse
                          to avoid potential slowdown in tiny systems.

diff --git a/config/build_olcf_summit.sh b/config/build_olcf_summit.sh
@@ -12,8 +12,8 @@ echo "Either source $BUILD_MODULES or load these same modules to run QMCPACK"
 
 declare -A builds=( ["cpu"]=" -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
                     ["complex_cpu"]="-DQMC_COMPLEX=1  -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
-                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCUDA_ARCH=sm_70 " \
-		    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCUDA_ARCH=sm_70 " )
+                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 " \
+                    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCMAKE_CUDA_ARCHITECTURES=70 " )
 
 mkdir bin