Squashed commit of the following:

commit 22cc717 Author: Peter Doak <doakpw@ornl.gov> Date: Wed Oct 6 13:05:22 2021 -0400 propagating mw_invertPsiM const API changes commit fea01c4 Merge: 4e8b3f4 f7874e4 Author: Peter Doak <doakpw@ornl.gov> Date: Wed Oct 6 12:38:07 2021 -0400 Merge branch 'develop' into matrix_update_engines_direct_2 commit f7874e4 Merge: 319938f 3a1fc75 Author: Ye Luo <yeluo@anl.gov> Date: Wed Oct 6 10:57:22 2021 -0500 Merge pull request QMCPACK#3514 from ye-luo/fix-omp-nocuda Fix omp offload without cuda commit 3a1fc75 Merge: 4b2ef72 319938f Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 21:49:12 2021 -0500 Merge branch 'develop' into fix-omp-nocuda commit 319938f Merge: c093820 1a71e25 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 21:27:24 2021 -0500 Merge pull request QMCPACK#3510 from camelto2/dirac_converter_msd Dirac converter with MSD wave functions commit 4b2ef72 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 20:43:26 2021 -0500 Mark all the RefVector const. commit 3191d09 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 20:38:22 2021 -0500 Revert integration with DiracMatrixComputeOMPTarget commit 1a71e25 Merge: 9414af2 c093820 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 18:36:14 2021 -0500 Merge branch 'develop' into dirac_converter_msd commit 9414af2 Author: camelto2 <cmelton@sandia.gov> Date: Tue Oct 5 17:28:37 2021 -0600 fix libxml leak commit c093820 Merge: 132b560 309afa1 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 16:55:34 2021 -0500 Merge pull request QMCPACK#3513 from quantumsteve/available_supported cif2cell_supported not set commit 309afa1 Merge: 94b98f5 132b560 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 15:51:21 2021 -0500 Merge branch 'develop' into available_supported commit 132b560 Merge: 67490d9 486884f Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 15:28:08 2021 -0500 Merge pull request QMCPACK#3512 from ye-luo/fix-StlPrettyPrint Collapsed style in StlPrettyPrint. commit 94b98f5 Author: Steven Hahn <hahnse@ornl.gov> Date: Tue Oct 5 15:02:00 2021 -0400 copy/paste error Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit 486884f Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 13:47:02 2021 -0500 Remove unused header inclusion. commit a468c4a Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 01:12:10 2021 -0500 Collapsed style in StlPrettyPrint. commit 67490d9 Merge: aca1bf6 4ea1d47 Author: Ye Luo <yeluo@anl.gov> Date: Tue Oct 5 12:22:50 2021 -0500 Merge pull request QMCPACK#3511 from quantumsteve/cif2cell_available Check if cif2cell is available before running ntest_nexus_structure commit 4ea1d47 Author: Steven Hahn <hahnse@ornl.gov> Date: Tue Oct 5 11:02:22 2021 -0400 Add Python 3.10.0 Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit 20d2e84 Author: Steven Hahn <hahnse@ornl.gov> Date: Tue Oct 5 10:03:43 2021 -0400 Check if cif2cell is available before running test Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit bbfe24f Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 16:09:36 2021 -0600 fix uninnitialized warning commit ee35c16 Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 14:51:18 2021 -0600 remove unused code in DiracParser commit 0983b0a Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 14:18:26 2021 -0600 update manual entry for convert4qmc with DIRAC commit 8e064a0 Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 14:11:11 2021 -0600 clang formatting commit 806d493 Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 14:04:21 2021 -0600 update unit and converter tests to reflect changes in MSD for dirac I updated the gold.orbs.h5 to use convert4qmc -dirac df_Bi.out -TargetState 14 -prefix gold -nojastrow This tests the converter to make sure it can grab an arbitrary excited state to store in the hdf5. However, this change made it so that I needed to update the multi_slater unit tests, which relied on this gold.orbs.h5 file. Also, added to the unit test is reading the CI coeffs directly from the h5 file commit 3c48afd Merge: a4ebddb aca1bf6 Author: camelto2 <cmelton@sandia.gov> Date: Mon Oct 4 09:30:53 2021 -0600 Merge remote-tracking branch 'upstream/develop' into dirac_converter_msd commit aca1bf6 Merge: 618cdb2 2318723 Author: Paul R. C. Kent <kentpr@ornl.gov> Date: Sun Oct 3 13:02:42 2021 -0400 Merge pull request QMCPACK#3505 from anbenali/Convert4qmcGuessH5Format Convert4qmc guess h5 format commit 2318723 Author: Anouar Benali <abenali.sci@gmail.com> Date: Sun Oct 3 01:09:41 2021 -0500 fixing passing an hdf5 file without the -orbitals tag. closing bug # QMCPACK#3503 commit 273ecb0 Author: Anouar Benali <abenali.sci@gmail.com> Date: Sat Oct 2 23:57:51 2021 -0500 Change default behavior of convert4qmc: Will only generate inputs with Jastrows unless -nojastrow is specified, then only inputs with no jastrow will be generated commit 618cdb2 Merge: c0ed5f1 0a13c01 Author: Paul R. C. Kent <kentpr@ornl.gov> Date: Sat Oct 2 13:32:16 2021 -0400 Merge pull request QMCPACK#3502 from ye-luo/fix-nightly Fix and update nightly test scripts. commit 0a13c01 Author: Ye Luo <yeluo@anl.gov> Date: Sat Oct 2 11:29:44 2021 -0500 Fix and update nightly test scripts. commit c0ed5f1 Merge: 4b9b609 b7ce2c8 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 18:33:06 2021 -0500 Merge pull request QMCPACK#3501 from ye-luo/fix-hip Fix HIP CMake commit b7ce2c8 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 17:27:49 2021 -0500 Fix HIP CMake. commit 4b9b609 Merge: 91fa888 4042431 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 15:37:41 2021 -0500 Merge pull request QMCPACK#3489 from prckent/ppconvertcmakeclean Fix ppconvert memory bugs and enable in testing commit 4042431 Merge: d48bd76 91fa888 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 14:37:23 2021 -0500 Merge remote-tracking branch 'origin/develop' into ppconvertcmakeclean commit d48bd76 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 14:37:06 2021 -0500 Update README.md about CUDA_ARCH commit 91fa888 Author: William F Godoy <williamfgc@yahoo.com> Date: Wed Sep 29 11:10:37 2021 -0400 Add docs for macOS CI commit fe5aa2d Author: William F Godoy <williamfgc@yahoo.com> Date: Wed Sep 29 09:46:20 2021 -0400 Add Python dependencies on macOS runner commit 60724b9 Author: William F Godoy <williamfgc@yahoo.com> Date: Tue Sep 28 17:06:13 2021 -0400 Reduce brew dependencies commit 72d4c01 Author: William F Godoy <williamfgc@yahoo.com> Date: Tue Sep 28 16:39:07 2021 -0400 Add macOS CI on GitHub Actions gcc-11 real build brew dependencies commit 7ee1a1d Merge: d008890 a4bb505 Author: Paul R. C. Kent <kentpr@ornl.gov> Date: Fri Oct 1 13:37:48 2021 -0400 Merge pull request QMCPACK#3492 from quantumsteve/cudatoolkit Eliminate deprecated find_package(CUDA) from qmcpack commit a4ebddb Author: camelto2 <cmelton@sandia.gov> Date: Fri Oct 1 10:43:24 2021 -0600 fix for spinors in h5 and xml for MSD commit a4bb505 Merge: d947ca1 d008890 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 12:08:40 2021 -0400 Merge remote-tracking branch 'origin/develop' into cudatoolkit commit d947ca1 Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 12:08:26 2021 -0400 Update build_olcf_summit_Clang.sh commit d008890 Merge: a556925 d546e4e Author: Ye Luo <yeluo@anl.gov> Date: Fri Oct 1 11:08:04 2021 -0500 Merge pull request QMCPACK#3498 from ye-luo/opt-J2 Fix and optimize offload J2 commit 3884a95 Author: camelto2 <cmelton@sandia.gov> Date: Thu Sep 30 22:27:46 2021 -0600 correct MSD xml generated commit d546e4e Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 22:19:53 2021 -0500 Minimize recompute in J2. commit 517dea7 Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 01:14:49 2021 -0500 Cure non-determinisitic offload J2. Reproducer: NiO a64 batched_driver performance test. Run 1 VMC step with 1 thread over and over. The scalar.dat is not deterministic. Kinetic is different. mw_updateVGL. Inject print before and after the offload region. walker 13 and electron 741. Sometimes the value is not updated even if a walker is accepted. commit 587634d Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 19:10:31 2021 -0500 Update installation.rst commit f8ed8db Merge: fe7fba7 a556925 Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 19:01:19 2021 -0500 Merge remote-tracking branch 'origin/develop' into cudatoolkit commit fe7fba7 Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 19:00:23 2021 -0500 Allow OFFLOAD_ARCH not being set for NVHPC. commit a9e69a1 Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 18:43:55 2021 -0500 Make NVHPC support CMAKE_CUDA_ARCHITECTURES as a list. commit be7893e Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 16:58:44 2021 -0500 Our header only wrappers needs cuda include path. commit 9b36461 Merge: 575ea73 34953ef Author: camelto2 <cmelton@sandia.gov> Date: Thu Sep 30 14:38:47 2021 -0600 Merge remote-tracking branch 'upstream/develop' into dirac_converter_msd commit 575ea73 Author: camelto2 <cmelton@sandia.gov> Date: Thu Sep 30 14:37:43 2021 -0600 enable complete open shell CI parsing for DIRAC converter commit 8f1b5af Author: Steven Hahn <hahnse@ornl.gov> Date: Thu Sep 30 16:30:05 2021 -0400 update documentation Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit afd2294 Author: Steven Hahn <hahnse@ornl.gov> Date: Thu Sep 30 16:06:50 2021 -0400 Check LLVM offload only contains one architecture Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit 3ca4e31 Merge: 3ba75e9 b5571ec Author: Paul R. C. Kent <kentpr@ornl.gov> Date: Thu Sep 30 14:51:29 2021 -0400 Merge pull request #1 from williamfgc/ppconvertcmakeclean-fix-leaks Ppconvertcmakeclean fix memory leaks commit b5571ec Author: William F Godoy <williamfgc@yahoo.com> Date: Thu Sep 30 13:46:49 2021 -0400 Revert clang-format related changes commit 9b2e2fb Author: William F Godoy <williamfgc@yahoo.com> Date: Thu Sep 30 11:47:07 2021 -0400 Fix leaks associated with Grid raw pointer Use std::shared_ptr to enable copy constructors commit 216d9dd Author: William F Godoy <williamfgc@yahoo.com> Date: Thu Sep 30 10:47:27 2021 -0400 Address leaks in XMLWriterClass Use std::shared_ptr to allow for deep copy constructors commit 058e446 Author: Ye Luo <yeluo@anl.gov> Date: Thu Sep 30 10:06:18 2021 -0400 Update recipes under config for CUDA change. commit b39aea6 Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 19:06:57 2021 -0500 Set CMAKE_CUDA_ARCHITECTURES early. commit ebf7849 Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 19:02:47 2021 -0500 Make platform_cuda_legacy depend on CUDA::cudart commit 81457ad Merge: 57e55d3 ed14d86 Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 17:52:01 2021 -0500 Merge remote-tracking branch 'origin/develop' into cudatoolkit commit 57e55d3 Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 17:30:50 2021 -0500 Set back CUDA default to C++14. commit 734415c Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 17:24:08 2021 -0500 CMAKE_CUDA_ARCHITECTURES needs CMake 3.18. commit 707977e Author: Ye Luo <yeluo@anl.gov> Date: Wed Sep 29 17:22:44 2021 -0500 More accurate stopper message. commit 761745b Author: Steven Hahn <hahnse@ornl.gov> Date: Wed Sep 29 17:36:10 2021 -0400 Apply changes recommended by @ye-lou. Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit b607510 Author: Steven Hahn <hahnse@ornl.gov> Date: Wed Sep 29 15:06:19 2021 -0400 Don't change required CMake version Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit ed13bd8 Author: Steven Hahn <hahnse@ornl.gov> Date: Fri Sep 24 10:47:26 2021 -0400 Eliminate depricated find_package(CUDA) from qmcpack Replace it with first-class language support and find_package(CUDAToolkit) Signed-off-by: Steven Hahn <hahnse@ornl.gov> commit 3ba75e9 Author: Paul Kent <kentpr@ornl.gov> Date: Tue Sep 28 22:04:05 2021 -0400 Test label commit dc39dba Author: Paul Kent <kentpr@ornl.gov> Date: Tue Sep 28 21:55:32 2021 -0400 Try public in cmake
PDoakORNL · Oct 6, 2021 · 0a34412 · 0a34412
1 parent 4e8b3f4
commit 0a34412
Show file tree

Hide file tree

Showing 73 changed files with 1,103 additions and 3,983 deletions.
diff --git a/.github/workflows/ci-github-actions.yaml b/.github/workflows/ci-github-actions.yaml
@@ -1,16 +1,14 @@
-
 name: GitHub Actions CI
 
-on: 
+on:
   push:
-    branches: 
-    - develop
+    branches:
+      - develop
   pull_request:
-    branches: 
-    - develop
+    branches:
+      - develop
 
 jobs:
-
   linux:
     runs-on: ubuntu-latest
     container: ${{ matrix.container }}
@@ -20,79 +18,112 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        jobname: [
-          gcc-openmpi-real-coverage,
-          gcc-openmpi-complex-coverage,
-          gcc11-real-werror,
-          gcc11-complex-werror,
-          clang-real-asan,
-          clang-complex-asan,
-          clang-openmpi-real-ubsan,
-          clang-latest-openmp-offload
-        ]
+        jobname:
+          [
+            gcc-openmpi-real-coverage,
+            gcc-openmpi-complex-coverage,
+            gcc11-real-werror,
+            gcc11-complex-werror,
+            clang-real-asan,
+            clang-complex-asan,
+            clang-openmpi-real-ubsan,
+            clang-latest-openmp-offload,
+          ]
         include:
-        - jobname: gcc-openmpi-real-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: gcc-openmpi-complex-coverage
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: gcc11-real-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-
-        - jobname: gcc11-complex-werror
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu2110-serial
-            options: -u 1001
-
-        - jobname: clang-real-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-complex-asan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-openmpi-real-ubsan
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-openmpi
-            options: -u 1001
-
-        - jobname: clang-latest-openmp-offload
-          container: 
-            image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
-            options: -u 1001
+          - jobname: gcc-openmpi-real-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc-openmpi-complex-coverage
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: gcc11-real-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: gcc11-complex-werror
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu2110-serial
+              options: -u 1001
+
+          - jobname: clang-real-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-complex-asan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-openmpi-real-ubsan
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-openmpi
+              options: -u 1001
+
+          - jobname: clang-latest-openmp-offload
+            container:
+              image: williamfgc/qmcpack-ci:ubuntu20-clang-latest
+              options: -u 1001
+
+    steps:
+      - name: Checkout Action
+        uses: actions/checkout@v1
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Coverage
+        if: contains(matrix.jobname, 'coverage')
+        run: tests/test_automation/github-actions/ci/run_step.sh coverage
+
+      - name: Upload Coverage
+        if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
+        uses: codecov/codecov-action@v1
+        with:
+          file: ../qmcpack-build/coverage.xml
+          flags: tests-deterministic # optional
+          name: codecov-QMCPACK # optional
+          fail_ci_if_error: true # optional (default = false)
+
+  macos:
+    runs-on: macos-latest
+    env:
+      GH_JOBNAME: ${{ matrix.jobname }}
+      GH_OS: macOS
+
+    strategy:
+      fail-fast: false
+      matrix:
+        jobname: [macOS-gcc11-real]
 
     steps:
-    - name: Checkout Action
-      uses: actions/checkout@v1
-
-    - name: Configure
-      run: tests/test_automation/github-actions/ci/run_step.sh configure
-
-    - name: Build
-      run: tests/test_automation/github-actions/ci/run_step.sh build
-
-    - name: Test
-      run: tests/test_automation/github-actions/ci/run_step.sh test
-
-    - name: Coverage
-      if: contains(matrix.jobname, 'coverage')
-      run: tests/test_automation/github-actions/ci/run_step.sh coverage
-
-    - name: Upload Coverage
-      if: contains(matrix.jobname, 'coverage') && github.repository_owner == 'QMCPACK'
-      uses: codecov/codecov-action@v1
-      with:
-        file:  ../qmcpack-build/coverage.xml
-        flags: tests-deterministic # optional
-        name: codecov-QMCPACK # optional
-        fail_ci_if_error: true # optional (default = false)
+      - name: Checkout Action
+        uses: actions/checkout@v2
+
+      - name: Setup Dependencies
+        run: |
+             brew install ninja hdf5 fftw boost
+             pip3 install numpy h5py pandas
+
+      - name: Configure
+        run: tests/test_automation/github-actions/ci/run_step.sh configure
+
+      - name: Build
+        run: tests/test_automation/github-actions/ci/run_step.sh build
+
+      - name: Test
+        run: tests/test_automation/github-actions/ci/run_step.sh test
+
+      - name: Install
+        run: tests/test_automation/github-actions/ci/run_step.sh install
diff --git a/CMake/ClangCompilers.cmake b/CMake/ClangCompilers.cmake
@@ -19,6 +19,17 @@ if(QMC_OMP)
         CACHE STRING "Offload target architecture")
     set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-fopenmp-targets=${OFFLOAD_TARGET}")
 
+    if(NOT DEFINED OFFLOAD_ARCH AND OFFLOAD_TARGET MATCHES "nvptx64" AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH sm_${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        message(FATAL_ERROR "LLVM does not yet support offload to multiple architectures! "
+                            "Deriving OFFLOAD_ARCH from CMAKE_CUDA_ARCHITECTURES failed. "
+                            "Please keep only one entry in CMAKE_CUDA_ARCHITECTURES or set OFFLOAD_ARCH.")
+      endif()
+    endif()
+
     if(DEFINED OFFLOAD_ARCH)
       set(OPENMP_OFFLOAD_COMPILE_OPTIONS
           "${OPENMP_OFFLOAD_COMPILE_OPTIONS} -Xopenmp-target=${OFFLOAD_TARGET} -march=${OFFLOAD_ARCH}")

diff --git a/CMake/NVHPCCompilers.cmake b/CMake/NVHPCCompilers.cmake
@@ -7,12 +7,24 @@ if(QMC_OMP)
   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mp=allcores")
   if(ENABLE_OFFLOAD AND NOT CMAKE_SYSTEM_NAME STREQUAL "CrayLinuxEnvironment")
     message(WARNING "QMCPACK OpenMP offload is not ready for NVIDIA HPC compiler.")
-    if(NOT DEFINED OFFLOAD_ARCH)
-      message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
-                          "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+    if(NOT DEFINED OFFLOAD_ARCH AND DEFINED CMAKE_CUDA_ARCHITECTURES)
+      list(LENGTH CMAKE_CUDA_ARCHITECTURES NUMBER_CUDA_ARCHITECTURES)
+      if(NUMBER_CUDA_ARCHITECTURES EQUAL "1")
+        set(OFFLOAD_ARCH cc${CMAKE_CUDA_ARCHITECTURES})
+      else()
+        string(REPLACE ";" ",cc" OFFLOAD_ARCH "${CMAKE_CUDA_ARCHITECTURES}")
+        set(OFFLOAD_ARCH "cc${OFFLOAD_ARCH}")
+      endif()
+    endif()
+
+    if(DEFINED OFFLOAD_ARCH)
+      if(NOT OFFLOAD_ARCH MATCHES "cc")
+        message(FATAL_ERROR "NVIDIA HPC compiler requires -gpu=ccXX option set based on the target GPU architecture! "
+                            "Please add -DOFFLOAD_ARCH=ccXX to cmake. For example, cc70 is for Volta.")
+      endif()
+      set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
     endif()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=gpu")
-    set(OPENMP_OFFLOAD_COMPILE_OPTIONS "-gpu=${OFFLOAD_ARCH}")
   else()
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mp=allcores")
   endif()

diff --git a/CMake/ctest_script.cmake b/CMake/ctest_script.cmake
@@ -202,8 +202,8 @@ if(DEFINED QMC_MIXED_PRECISION)
   set(CTEST_OPTIONS "${CTEST_OPTIONS};-DQMC_MIXED_PRECISION=${QMC_MIXED_PRECISION}")
 endif()
 
-if(DEFINED CUDA_ARCH)
-  set(CTEST_OPTIONS "${CTEST_OPTIONS};-DCUDA_ARCH='${CUDA_ARCH}'")
+if(DEFINED CMAKE_CUDA_ARCHITECTURES)
+  set(CTEST_OPTIONS "${CTEST_OPTIONS};-DCMAKE_CUDA_ARCHITECTURES='${CMAKE_CUDA_ARCHITECTURES}'")
 endif()
 
 if(DEFINED BUILD_AFQMC)

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -72,6 +72,17 @@ if(ENABLE_CUDA AND QMC_CUDA)
   message(FATAL_ERROR "ENABLE_CUDA=ON and QMC_CUDA=ON can not be set together!")
 endif(ENABLE_CUDA AND QMC_CUDA)
 
+# set CMAKE_CUDA_ARCHITECTURES early such that offload compilers may take advantage of it
+if(ENABLE_CUDA OR QMC_CUDA AND NOT QMC_CUDA2HIP)
+  if(DEFINED CUDA_ARCH)
+    unset(CUDA_ARCH CACHE)
+    message(FATAL_ERROR "CUDA_ARCH option has been removed. Use -DCMAKE_CUDA_ARCHITECTURES=80 if -DCUDA_ARCH=sm_80 was used.")
+  endif()
+  if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+    set(CMAKE_CUDA_ARCHITECTURES 70)
+  endif()
+endif()
+
 #--------------------------------------------------------------------
 # Set compiler-time parameters
 # WALKER_MAX_PROPERTIES max number of observables + 12 or so standard
@@ -662,38 +673,25 @@ if(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(STATUS "CUDA2HIP enabled") # all the HIP and ROCm settings will be handled by ENABLE_ROCM
   else(QMC_CUDA2HIP)
-    # FindCUDA default CUDA_PROPAGATE_HOST_FLAGS to ON but we prefer OFF
-    # It happened -ffast-math from host caused numerical issue in CUDA kernels.
-    option(CUDA_PROPAGATE_HOST_FLAGS "Propagate C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-    find_package(CUDA REQUIRED)
-    set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
-    #set(CUDA_NVCC_FLAGS
-    #  "-arch=sm_20;-Drestrict=__restrict__;-DNO_CUDA_MAIN;-O3;-use_fast_math")
-    if(CUDA_NVCC_FLAGS MATCHES "arch")
-      # User defined NVCC flags
-      message(STATUS "Setting CUDA FLAGS=${CUDA_NVCC_FLAGS}")
-    else(CUDA_NVCC_FLAGS MATCHES "arch")
-      # Automatically set the default NVCC flags
-      set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-Drestrict=__restrict__;-DNO_CUDA_MAIN;-std=c++14")
-      if(QMC_COMPLEX)
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-DQMC_COMPLEX=${QMC_COMPLEX}")
-      endif()
-      if(CMAKE_BUILD_TYPE STREQUAL "DEBUG")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-g;-G")
-      else()
-        # Temporarily disable fast_math because it causes multiple test failures
-        # SET(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3;-use_fast_math")
-        set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS};-O3")
-      endif()
-      set(CUDA_ARCH
-          sm_70
-          CACHE STRING "CUDA architecture sm_XX")
-      set(CUDA_NVCC_FLAGS "-arch=${CUDA_ARCH};${CUDA_NVCC_FLAGS}")
-    endif(CUDA_NVCC_FLAGS MATCHES "arch")
-    include_directories(${CUDA_INCLUDE_DIRS})
+    if (CMAKE_VERSION VERSION_LESS 3.18.0)
+      message(FATAL_ERROR "QMC_CUDA or ENABLE_CUDA require CMake 3.18.0 or later")
+    endif()
+    # a few production machines use CUDA 10 which only supports C++14.
+    if(NOT DEFINED CMAKE_CUDA_STANDARD)
+      set(CMAKE_CUDA_STANDARD 14)
+    endif()
+    set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
+    set(CMAKE_CUDA_EXTENSIONS OFF)
+    enable_language(CUDA)
+    find_package(CUDAToolkit REQUIRED)
+    # Automatically set the default NVCC flags
+    set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Drestrict=__restrict__ -DNO_CUDA_MAIN")
+    if(QMC_COMPLEX)
+      set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DQMC_COMPLEX=${QMC_COMPLEX}")
+    endif()
     set(HAVE_CUDA 1)
-    message("   CUDA_NVCC_FLAGS=${CUDA_NVCC_FLAGS}")
-  endif(QMC_CUDA2HIP)
+    message("Project CUDA_FLAGS: ${CMAKE_CUDA_FLAGS}")
+  endif()
 else(QMC_CUDA OR ENABLE_CUDA)
   if(QMC_CUDA2HIP)
     message(FATAL_ERROR "QMC_CUDA2HIP requires QMC_CUDA=ON or ENABLE_CUDA=ON.")

diff --git a/README.md b/README.md
@@ -166,7 +166,7 @@ make -j 8
                          and use float and double for CUDA base and full precision.
      ENABLE_CUDA         ON/OFF(default). Enable CUDA code path for NVIDIA GPU acceleration.
                          Production quality for AFQMC. Pre-production quality for real-space.
-                         Use CUDA_ARCH, default sm_70, to set the actual GPU architecture.
+                         Use CMAKE_CUDA_ARCHITECTURES, default 70, to set the actual GPU architecture.
      ENABLE_OFFLOAD      ON/OFF(default). Experimental feature. Enable OpenMP target offload for GPU acceleration.
      ENABLE_TIMERS       ON(default)/OFF. Enable fine-grained timers. Timers are on by default but at level coarse
                          to avoid potential slowdown in tiny systems.

diff --git a/config/build_olcf_summit.sh b/config/build_olcf_summit.sh
@@ -12,8 +12,8 @@ echo "Either source $BUILD_MODULES or load these same modules to run QMCPACK"
 
 declare -A builds=( ["cpu"]=" -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
                     ["complex_cpu"]="-DQMC_COMPLEX=1  -DQMC_MATH_VENDOR=IBM_MASS -DMASS_ROOT=/sw/summit/xl/16.1.1-10/xlmass/9.1.1" \
-                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCUDA_ARCH=sm_70 " \
-		    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCUDA_ARCH=sm_70 " )
+                    ["legacy_gpu"]="-DQMC_CUDA=1 -DCMAKE_CUDA_ARCHITECTURES=70 " \
+                    ["complex_legacy_gpu"]="-DQMC_CUDA=1 -DQMC_COMPLEX=1 -DCMAKE_CUDA_ARCHITECTURES=70 " )
 
 mkdir bin