QMCPACK · PDoakORNL · Jun 19, 2019 · Jun 17, 2019 · Jun 17, 2019 · Jun 17, 2019
diff --git a/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp b/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp
@@ -27,6 +27,11 @@ void applyCuspCorrection(const Matrix<CuspCorrectionParameters>& info,
 {
   typedef QMCTraits::RealType RealType;
 
+  NewTimer* cuspApplyTimer =
+      TimerManager.createTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium);
+
+  ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer);
+
   LCAOrbitalSet phi = LCAOrbitalSet(lcwc.myBasisSet);
   phi.setOrbitalSetSize(lcwc.OrbitalSetSize);
   phi.BasisSetSize = lcwc.BasisSetSize;
@@ -188,6 +193,13 @@ void generateCuspInfo(int orbital_set_size,
 {
   typedef QMCTraits::RealType RealType;
 
+  NewTimer* cuspCreateTimer =
+      TimerManager.createTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium);
+  NewTimer* splitPhiEtaTimer = TimerManager.createTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine);
+  NewTimer* computeTimer = TimerManager.createTimer("CuspCorrectionConstruction::computeCorrection", timer_level_fine);
+
+  ScopedTimer createCuspTimerWrapper(cuspCreateTimer);
+
   LCAOrbitalSet phi = LCAOrbitalSet(lcwc.myBasisSet);
   phi.setOrbitalSetSize(lcwc.OrbitalSetSize);
   phi.BasisSetSize = lcwc.BasisSetSize;
@@ -211,55 +223,84 @@ void generateCuspInfo(int orbital_set_size,
   int start_mo = offset[Comm.rank()];
   int end_mo   = offset[Comm.rank() + 1];
   app_log() << "  Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl;
-  for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++)
+
+#pragma omp parallel
   {
-    app_log() << "   Working on MO: " << mo_idx << std::endl;
+    ParticleSet localTargetPtcl(targetPtcl);
+    ParticleSet localSourcePtcl(sourcePtcl);
+
+    LCAOrbitalSet local_phi(phi);
+    local_phi.myBasisSet = phi.myBasisSet->makeClone();
+    local_phi.IsCloned   = true;
+    local_phi.C          = nullptr;
+    local_phi.setIdentity(false);
+
+    LCAOrbitalSet local_eta(eta);
+    local_eta.myBasisSet = eta.myBasisSet->makeClone();
+    local_eta.IsCloned   = true;
+    local_eta.C          = nullptr;
+    local_eta.setIdentity(false);
+
+// Specify dynamic scheduling explicitly for load balancing.   Each iteration should take enough
+// time that scheduling overhead is not an issue.
+#pragma omp for schedule(dynamic) collapse(2)
     for (int center_idx = 0; center_idx < num_centers; center_idx++)
     {
-      *(eta.C) = *(lcwc.C);
-      *(phi.C) = *(lcwc.C);
-      splitPhiEta(center_idx, corrCenter, phi, eta);
-
-      bool corrO = false;
-      auto& cref(*(phi.C));
-      for (int ip = 0; ip < cref.cols(); ip++)
-      {
-        if (std::abs(cref(mo_idx, ip)) > 0)
-        {
-          corrO = true;
-          break;
-        }
-      }
-
-      if (corrO)
+      for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++)
       {
-        OneMolecularOrbital etaMO(&targetPtcl, &sourcePtcl, &eta);
-        etaMO.changeOrbital(center_idx, mo_idx);
+        app_log() << "   Working on MO: " << mo_idx << " Center: " << center_idx << std::endl;
 
-        OneMolecularOrbital phiMO(&targetPtcl, &sourcePtcl, &phi);
-        phiMO.changeOrbital(center_idx, mo_idx);
+        splitPhiEtaTimer->start();
 
-        SpeciesSet& tspecies(sourcePtcl.getSpeciesSet());
-        int iz     = tspecies.addAttribute("charge");
-        RealType Z = tspecies(iz, sourcePtcl.GroupID[center_idx]);
+        *(local_eta.C) = *(lcwc.C);
+        *(local_phi.C) = *(lcwc.C);
+        splitPhiEta(center_idx, corrCenter, local_phi, local_eta);
 
-        RealType Rc_max = 0.2;
-        RealType rc     = 0.1;
+        splitPhiEtaTimer->stop();
 
-        RealType dx = rc * 1.2 / npts;
-        ValueVector_t pos(npts);
-        ValueVector_t ELideal(npts);
-        ValueVector_t ELcurr(npts);
-        for (int i = 0; i < npts; i++)
+        bool corrO = false;
+        auto& cref(*(local_phi.C));
+        for (int ip = 0; ip < cref.cols(); ip++)
         {
-          pos[i] = (i + 1.0) * dx;
+          if (std::abs(cref(mo_idx, ip)) > 0)
+          {
+            corrO = true;
+            break;
+          }
         }
 
-        RealType eta0 = etaMO.phi(0.0);
-        ValueVector_t ELorig(npts);
-        CuspCorrection cusp(info(center_idx, mo_idx));
-        minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal);
-        info(center_idx, mo_idx) = cusp.cparam;
+        if (corrO)
+        {
+          OneMolecularOrbital etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta);
+          etaMO.changeOrbital(center_idx, mo_idx);
+
+          OneMolecularOrbital phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi);
+          phiMO.changeOrbital(center_idx, mo_idx);
+
+          SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet());
+          int iz     = tspecies.addAttribute("charge");
+          RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]);
+
+          RealType Rc_max = 0.2;
+          RealType rc     = 0.1;
+
+          RealType dx = rc * 1.2 / npts;
+          ValueVector_t pos(npts);
+          ValueVector_t ELideal(npts);
+          ValueVector_t ELcurr(npts);
+          for (int i = 0; i < npts; i++)
+          {
+            pos[i] = (i + 1.0) * dx;
+          }
+
+          RealType eta0 = etaMO.phi(0.0);
+          ValueVector_t ELorig(npts);
+          CuspCorrection cusp(info(center_idx, mo_idx));
+          computeTimer->start();
+          minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal);
+          computeTimer->stop();
+          info(center_idx, mo_idx) = cusp.cparam;
+        }
       }
     }
   }