Skip to content

Commit

Permalink
Cure non-determinisitic offload J2.
Browse files Browse the repository at this point in the history
Reproducer:
NiO a64 batched_driver performance test. Run 1 VMC step with 1 thread over and over.
The scalar.dat is not deterministic. Kinetic is different.
mw_updateVGL. Inject print before and after the offload region. walker 13 and electron 741.
Sometimes the value is not updated even if a walker is accepted.
  • Loading branch information
ye-luo committed Oct 1, 2021
1 parent a556925 commit 517dea7
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/QMCWaveFunctions/Jastrow/BsplineFunctor.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
PRAGMA_OFFLOAD("omp parallel for reduction(+: val_sum, grad_x, grad_y, grad_z, lapl)")
for (int j = 0; j < n_src; j++)
{
if (j == iat) continue;
const int ig = grp_ids[j];
const T* coefs = mw_coefs[ig];
T DeltaRInv = mw_DeltaRInv[ig];
Expand All @@ -227,7 +228,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
T u(0);
T dudr(0);
T d2udr2(0);
if (j != iat && r < cutoff_radius)
if (r < cutoff_radius)
{
u = evaluate_impl(dist[j], coefs, DeltaRInv, dudr, d2udr2);
dudr *= T(1) / r;
Expand Down Expand Up @@ -541,7 +542,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
T* mw_DeltaRInv = reinterpret_cast<T*>(transfer_buffer_ptr + sizeof(T*) * num_groups);
T* mw_cutoff_radius = mw_DeltaRInv + num_groups;
int* accepted_indices = reinterpret_cast<int*>(transfer_buffer_ptr + (sizeof(T*) + sizeof(T) * 2) * num_groups);
int ip = accepted_indices[iw];
const int ip = accepted_indices[iw];

const T* dist_new = mw_dist + ip * dist_stride;
const T* dipl_x_new = dist_new + n_padded;
Expand All @@ -564,6 +565,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
PRAGMA_OFFLOAD("omp parallel for")
for (int j = 0; j < n_src; j++)
{
if (j == iat) continue;
const int ig = grp_ids[j];
const T* coefs = mw_coefs[ig];
T DeltaRInv = mw_DeltaRInv[ig];
Expand All @@ -573,7 +575,7 @@ struct BsplineFunctor : public OptimizableFunctorBase
T u(0);
T dudr(0);
T d2udr2(0);
if (j != iat && r < cutoff_radius)
if (r < cutoff_radius)
{
u = evaluate_impl(dist_old[j], coefs, DeltaRInv, dudr, d2udr2);
dudr *= T(1) / r;
Expand Down

0 comments on commit 517dea7

Please sign in to comment.