From 67ac55c41890c15fc657069ee817179d7f370480 Mon Sep 17 00:00:00 2001
From: Paul Young <yyang173@illinois.edu>
Date: Thu, 12 Apr 2018 18:03:10 -0500
Subject: [PATCH 1/5] add estimator-sofk tests

add tests to check consistency between scalar.dat and stat.h5 output
paths
---
 tests/estimator/CMakeLists.txt                | 30 ++++++
 tests/estimator/sofk/allp_dat-h5.xml          | 91 ++++++++++++++++++
 .../sofk/check_collectables_h5dat-allp.py     | 92 ++++++++++++++++++
 .../sofk/check_collectables_h5dat-pbyp.py     | 92 ++++++++++++++++++
 .../sofk/check_properties_h5dat-allp.py       | 96 +++++++++++++++++++
 .../sofk/check_properties_h5dat-pbyp.py       | 96 +++++++++++++++++++
 tests/estimator/sofk/pbyp_dat-h5.xml          | 92 ++++++++++++++++++
 7 files changed, 589 insertions(+)
 create mode 100644 tests/estimator/sofk/allp_dat-h5.xml
 create mode 100755 tests/estimator/sofk/check_collectables_h5dat-allp.py
 create mode 100755 tests/estimator/sofk/check_collectables_h5dat-pbyp.py
 create mode 100755 tests/estimator/sofk/check_properties_h5dat-allp.py
 create mode 100755 tests/estimator/sofk/check_properties_h5dat-pbyp.py
 create mode 100644 tests/estimator/sofk/pbyp_dat-h5.xml
diff --git a/tests/estimator/CMakeLists.txt b/tests/estimator/CMakeLists.txt
index 96d517556b..51f9fe9b3c 100644
--- a/tests/estimator/CMakeLists.txt
+++ b/tests/estimator/CMakeLists.txt
@@ -46,3 +46,33 @@ if (add_test)
     latdev_check.py
   )
 endif()
+
+set(sofk_python_reqs numpy;pandas;h5py)
+CHECK_PYTHON_REQS(sofk_python_reqs estimator-sofk add_test)
+
+if (add_test)
+  SIMPLE_RUN_AND_CHECK(estimator-sofk_pbyp-properties
+    "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
+    pbyp_dat-h5.xml
+    1 16
+    check_properties_h5dat-pbyp.py
+  )
+  SIMPLE_RUN_AND_CHECK(estimator-sofk_pbyp-collectables
+    "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
+    pbyp_dat-h5.xml
+    1 16
+    check_collectables_h5dat-pbyp.py
+  )
+  SIMPLE_RUN_AND_CHECK(estimator-sofk_allp-properties
+    "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
+    allp_dat-h5.xml
+    1 16
+    check_properties_h5dat-allp.py
+  )
+  SIMPLE_RUN_AND_CHECK(estimator-sofk_allp-collectables
+    "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
+    allp_dat-h5.xml
+    1 16
+    check_collectables_h5dat-allp.py
+  )
+endif()
diff --git a/tests/estimator/sofk/allp_dat-h5.xml b/tests/estimator/sofk/allp_dat-h5.xml
new file mode 100644
index 0000000000..c531707ebd
--- /dev/null
+++ b/tests/estimator/sofk/allp_dat-h5.xml
@@ -0,0 +1,91 @@
+<?xml version="1.0"?>
+<simulation>
+   <project id="dat-h5_allp" series="0">
+      <application name="qmcapp" role="molecu" class="serial" version="1.0"/>
+   </project>
+   <qmcsystem>
+      <simulationcell>
+         <parameter name="lattice" units="bohr">
+                  3.77945227        0.00000000        0.00000000
+                 -0.00000000        3.77945227        0.00000000
+                 -0.00000000       -0.00000000        3.77945227
+         </parameter>
+         <parameter name="bconds">
+            p p p
+         </parameter>
+         <parameter name="LR_dim_cutoff"       >    15                 </parameter>
+      </simulationcell>
+      <particleset name="e" random="yes">
+         <group name="u" size="1" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+         <group name="d" size="1" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+      </particleset>
+      <particleset name="ion0">
+         <group name="H" size="2" mass="1837.36221934">
+            <parameter name="charge"              >    1                     </parameter>
+            <parameter name="valence"             >    1                     </parameter>
+            <parameter name="atomicnumber"        >    1                     </parameter>
+            <parameter name="mass"                >    1837.36221934            </parameter>
+            <attrib name="position" datatype="posArray" condition="0">
+                     0.00000000        0.00000000        0.00000000
+                     1.88972614        1.88972614        1.88972614
+            </attrib>
+         </group>
+      </particleset>
+      <wavefunction name="psi0" target="e">
+         <determinantset type="einspline" href="pwscf.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" source="ion0" meshfactor="1.0" precision="float">
+            <slaterdeterminant>
+               <determinant id="updet" size="1">
+                  <occupation mode="ground" spindataset="0"/>
+               </determinant>
+               <determinant id="downdet" size="1">
+                  <occupation mode="ground" spindataset="0"/>
+               </determinant>
+            </slaterdeterminant>
+         </determinantset>
+         <jastrow type="One-Body" name="J1" function="bspline" source="ion0" print="yes">
+            <correlation elementType="H" size="8" cusp="1.0">
+               <coefficients id="eH" type="Array">                  
+0.00206602038 -0.002841926986 0.0036266191 -0.001913930279 8.457152991e-06 
+0.0007380321824 3.635172529e-05 0.0001299635851
+               </coefficients>
+            </correlation>
+         </jastrow>
+         <jastrow type="Two-Body" name="J2" function="bspline" print="yes">
+            <correlation speciesA="u" speciesB="d" size="8">
+               <coefficients id="ud" type="Array">                  
+0.5954603818 0.5062051797 0.3746940461 0.2521010502 0.1440163317 0.07796688253 
+0.03804420551 0.01449320872
+               </coefficients>
+            </correlation>
+         </jastrow>
+      </wavefunction>
+      <hamiltonian name="h0" type="generic" target="e">
+         <pairpot type="coulomb" name="ElecElec" source="e" target="e"/>
+         <pairpot type="coulomb" name="IonIon" source="ion0" target="ion0"/>
+         <pairpot type="coulomb" name="ElecIon" source="ion0" target="e"/>
+         <estimator type="sk" name="sk" hdf5="no"/>
+         <estimator type="sk" name="h5sk" hdf5="yes"/>
+      </hamiltonian>
+   </qmcsystem>
+   <qmc method="vmc" move="byp">
+      <parameter name="walkers"             >    64              </parameter>
+      <parameter name="blocks"              >    16            </parameter>
+      <parameter name="steps"               >    2            </parameter>
+      <parameter name="subSteps"            >    2               </parameter>
+      <parameter name="timestep"            >    2.0             </parameter>
+      <parameter name="warmupSteps"         >    16             </parameter>
+   </qmc>
+   <qmc method="dmc" move="byp">
+      <parameter name="targetwalkers"       >    64              </parameter>
+      <parameter name="blocks"              >    16            </parameter>
+      <parameter name="steps"               >    1            </parameter>
+      <parameter name="timestep"            >    2.0             </parameter>
+      <parameter name="warmupSteps"         >    0             </parameter>
+   </qmc>
+</simulation>
diff --git a/tests/estimator/sofk/check_collectables_h5dat-allp.py b/tests/estimator/sofk/check_collectables_h5dat-allp.py
new file mode 100755
index 0000000000..dd2f17e772
--- /dev/null
+++ b/tests/estimator/sofk/check_collectables_h5dat-allp.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+import sys
+import os
+import h5py
+import numpy as np
+from check_properties_h5dat import read
+
+
+def get_last_sk(fdat,fh5):
+  """ extract S(k) at the longest k vector from scalar.dat and stat.h5
+
+  Args:
+    fdat (str): name of scalar.dat file
+    fh5  (str): name of stat.h5 file
+  Return:
+    tuple: (myy, h5y), S(k_max) at each block from scalar.dat and stat.h5
+  """
+
+  # get S(k) from scalar.dat
+  df = read(fdat)
+  sk_cols = [col for col in df.columns if col.startswith('sk')]
+  myy = df[sk_cols[-1]].values
+
+  # get S(k) from stat.h5
+  fp = h5py.File(fh5, 'r')
+  h5y = fp['h5sk/value'].value.T[-1]
+  fp.close()
+
+  return myy, h5y
+# end def
+
+
+def show_scalar_trace(data, seriesl):
+  import matplotlib.pyplot as plt
+  method_map = {0:'VMC',1:'DMC'}
+  fig,ax_arr = plt.subplots(1, 2, sharey=True)
+  ax_arr[0].set_ylabel('S(k->inf)')
+  iplot = 0
+  for iseries in seriesl:
+    ax = ax_arr[iplot]
+    ax.set_title(method_map[iseries])
+    ax.set_xlabel('block')
+    ax.set_ylim(0.3, 1.2)
+
+    entry = data[iseries]
+    daty  = entry['daty']
+    h5y   = entry['h5y']
+
+    sline = ax.plot(daty)
+    hline = ax.plot(h5y, ls='--', lw=2, alpha=0.8)
+
+    ax.legend(
+      handles = [sline[0], hline[0]]
+     ,labels  = ['scalar.dat', 'stat.h5']
+     ,loc=0
+    )
+
+    iplot += 1
+  # end for iseries
+  plt.show()
+
+
+if __name__ == '__main__':
+
+  prefix = 'dat-h5_allp'
+  seriesl= [0,1]  # a list of series IDs to check
+
+  # check Properties v.s. Collectables
+  collectable_success_map = {}
+  data = {}
+  for iseries in seriesl:
+
+    # define files to read
+    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
+    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
+
+    daty, h5y = get_last_sk(fdat, fh5)
+    success   = np.allclose(daty, h5y, atol=0.1)
+    collectable_success_map[iseries] = success
+
+    # save data for plotting
+    data[iseries] = {'daty':daty, 'h5y':h5y}
+  # end for 
+  all_success = np.all( collectable_success_map.values() )
+
+  if all_success:
+    sys.exit(0)
+  else:
+    #show_scalar_trace(data, seriesl)
+    sys.exit(1)
+
+# end __main__
diff --git a/tests/estimator/sofk/check_collectables_h5dat-pbyp.py b/tests/estimator/sofk/check_collectables_h5dat-pbyp.py
new file mode 100755
index 0000000000..ee790c9678
--- /dev/null
+++ b/tests/estimator/sofk/check_collectables_h5dat-pbyp.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+import sys
+import os
+import h5py
+import numpy as np
+from check_properties_h5dat import read
+
+
+def get_last_sk(fdat,fh5):
+  """ extract S(k) at the longest k vector from scalar.dat and stat.h5
+
+  Args:
+    fdat (str): name of scalar.dat file
+    fh5  (str): name of stat.h5 file
+  Return:
+    tuple: (myy, h5y), S(k_max) at each block from scalar.dat and stat.h5
+  """
+
+  # get S(k) from scalar.dat
+  df = read(fdat)
+  sk_cols = [col for col in df.columns if col.startswith('sk')]
+  myy = df[sk_cols[-1]].values
+
+  # get S(k) from stat.h5
+  fp = h5py.File(fh5, 'r')
+  h5y = fp['h5sk/value'].value.T[-1]
+  fp.close()
+
+  return myy, h5y
+# end def
+
+
+def show_scalar_trace(data, seriesl):
+  import matplotlib.pyplot as plt
+  method_map = {0:'VMC',1:'DMC'}
+  fig,ax_arr = plt.subplots(1, 2, sharey=True)
+  ax_arr[0].set_ylabel('S(k->inf)')
+  iplot = 0
+  for iseries in seriesl:
+    ax = ax_arr[iplot]
+    ax.set_title(method_map[iseries])
+    ax.set_xlabel('block')
+    ax.set_ylim(0.3, 1.2)
+
+    entry = data[iseries]
+    daty  = entry['daty']
+    h5y   = entry['h5y']
+
+    sline = ax.plot(daty)
+    hline = ax.plot(h5y, ls='--', lw=2, alpha=0.8)
+
+    ax.legend(
+      handles = [sline[0], hline[0]]
+     ,labels  = ['scalar.dat', 'stat.h5']
+     ,loc=0
+    )
+
+    iplot += 1
+  # end for iseries
+  plt.show()
+
+
+if __name__ == '__main__':
+
+  prefix = 'dat-h5_pbyp'
+  seriesl= [0,1]  # a list of series IDs to check
+
+  # check Properties v.s. Collectables
+  collectable_success_map = {}
+  data = {}
+  for iseries in seriesl:
+
+    # define files to read
+    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
+    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
+
+    daty, h5y = get_last_sk(fdat, fh5)
+    success   = np.allclose(daty, h5y, atol=0.1)
+    collectable_success_map[iseries] = success
+
+    # save data for plotting
+    data[iseries] = {'daty':daty, 'h5y':h5y}
+  # end for 
+  all_success = np.all( collectable_success_map.values() )
+
+  if all_success:
+    sys.exit(0)
+  else:
+    #show_scalar_trace(data, seriesl)
+    sys.exit(1)
+
+# end __main__
diff --git a/tests/estimator/sofk/check_properties_h5dat-allp.py b/tests/estimator/sofk/check_properties_h5dat-allp.py
new file mode 100755
index 0000000000..7e4ee225ca
--- /dev/null
+++ b/tests/estimator/sofk/check_properties_h5dat-allp.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+import os
+import sys
+import h5py
+import numpy as np
+import pandas as pd
+
+
+def read(fdat):
+  """ read the scalar.dat file in table format readable by numpy.loadtxt.
+
+   The header line should start with '#' and contain column labels.
+
+  Args:
+    dat_fname (str): name of input file
+  Return:
+    pd.DataFrame: df containing the table of data
+  """
+  with open(fdat, 'r') as fp:
+    header = fp.readline()
+  # end with
+  cols = header.replace('#', '').split()
+  df = pd.read_table(fdat, sep='\s+', comment='#', header=None, names=cols)
+  return df
+# end def read
+
+
+def compare_columns_dat_h5(fdat, fh5):
+  """ compare mutual data columns in scalar.dat and stat.h5 files
+
+  Args:
+    fdat (str): name of scalar.dat file
+    fh5  (str): name of stat.h5 file
+  Return:
+    dict: a dictionary holding mutual columns names as key
+  """
+
+  # open database
+  df = read(fdat)
+  dat_cols = df.columns
+
+  fp = h5py.File(fh5,'r')
+  h5_cols = fp.keys()
+
+  # compare mutual columns in .dat v.s. .h5
+  agree_map = {}  # keep track of which columns agree
+  for col in h5_cols:
+    if col not in dat_cols:
+      continue
+
+    # check if col agree between .dat and .h5
+
+    # get .h5 values
+    h5_loc = os.path.join(col, 'value')
+    h5y  = fp[h5_loc].value[:,-1]
+
+    # get .dat values
+    daty = df.loc[:,col].values
+    agree_map[col] = np.allclose(h5y,daty)
+  # end for col
+   
+  # close database
+  fp.close()
+
+  if len(agree_map) == 0:
+    raise RuntimeError('%s and %s have no mutual column' % (fdat, fh5))
+
+  return agree_map
+# end def
+
+
+if __name__ == '__main__':
+
+  prefix = 'dat-h5_allp'
+  seriesl= [0,1]
+
+  # check Properties
+  series_success_map = {}
+  for iseries in seriesl:
+
+    # define files to read
+    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
+    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
+
+    agree_map = compare_columns_dat_h5(fdat, fh5)
+    success = np.all( agree_map.values() )
+    series_success_map[iseries] = success
+  # end for iseries
+  
+  all_success = np.all( series_success_map.values() )
+  if all_success:
+    sys.exit(0)
+  else:
+    sys.exit(1)
+  
+# end __main__
diff --git a/tests/estimator/sofk/check_properties_h5dat-pbyp.py b/tests/estimator/sofk/check_properties_h5dat-pbyp.py
new file mode 100755
index 0000000000..fd501e1f69
--- /dev/null
+++ b/tests/estimator/sofk/check_properties_h5dat-pbyp.py
@@ -0,0 +1,96 @@
+#!/usr/bin/env python
+import os
+import sys
+import h5py
+import numpy as np
+import pandas as pd
+
+
+def read(fdat):
+  """ read the scalar.dat file in table format readable by numpy.loadtxt.
+
+   The header line should start with '#' and contain column labels.
+
+  Args:
+    dat_fname (str): name of input file
+  Return:
+    pd.DataFrame: df containing the table of data
+  """
+  with open(fdat, 'r') as fp:
+    header = fp.readline()
+  # end with
+  cols = header.replace('#', '').split()
+  df = pd.read_table(fdat, sep='\s+', comment='#', header=None, names=cols)
+  return df
+# end def read
+
+
+def compare_columns_dat_h5(fdat, fh5):
+  """ compare mutual data columns in scalar.dat and stat.h5 files
+
+  Args:
+    fdat (str): name of scalar.dat file
+    fh5  (str): name of stat.h5 file
+  Return:
+    dict: a dictionary holding mutual columns names as key
+  """
+
+  # open database
+  df = read(fdat)
+  dat_cols = df.columns
+
+  fp = h5py.File(fh5,'r')
+  h5_cols = fp.keys()
+
+  # compare mutual columns in .dat v.s. .h5
+  agree_map = {}  # keep track of which columns agree
+  for col in h5_cols:
+    if col not in dat_cols:
+      continue
+
+    # check if col agree between .dat and .h5
+
+    # get .h5 values
+    h5_loc = os.path.join(col, 'value')
+    h5y  = fp[h5_loc].value[:,-1]
+
+    # get .dat values
+    daty = df.loc[:,col].values
+    agree_map[col] = np.allclose(h5y,daty)
+  # end for col
+   
+  # close database
+  fp.close()
+
+  if len(agree_map) == 0:
+    raise RuntimeError('%s and %s have no mutual column' % (fdat, fh5))
+
+  return agree_map
+# end def
+
+
+if __name__ == '__main__':
+
+  prefix = 'dat-h5_pbyp'
+  seriesl= [0,1]
+
+  # check Properties
+  series_success_map = {}
+  for iseries in seriesl:
+
+    # define files to read
+    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
+    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
+
+    agree_map = compare_columns_dat_h5(fdat, fh5)
+    success = np.all( agree_map.values() )
+    series_success_map[iseries] = success
+  # end for iseries
+  
+  all_success = np.all( series_success_map.values() )
+  if all_success:
+    sys.exit(0)
+  else:
+    sys.exit(1)
+  
+# end __main__
diff --git a/tests/estimator/sofk/pbyp_dat-h5.xml b/tests/estimator/sofk/pbyp_dat-h5.xml
new file mode 100644
index 0000000000..4384ffd0c6
--- /dev/null
+++ b/tests/estimator/sofk/pbyp_dat-h5.xml
@@ -0,0 +1,92 @@
+<?xml version="1.0"?>
+<simulation>
+   <project id="dat-h5_pbyp" series="0">
+      <application name="qmcapp" role="molecu" class="serial" version="1.0"/>
+   </project>
+   <qmcsystem>
+      <simulationcell>
+         <parameter name="lattice" units="bohr">
+                  3.77945227        0.00000000        0.00000000
+                 -0.00000000        3.77945227        0.00000000
+                 -0.00000000       -0.00000000        3.77945227
+         </parameter>
+         <parameter name="bconds">
+            p p p
+         </parameter>
+         <parameter name="LR_dim_cutoff"       >    15                 </parameter>
+      </simulationcell>
+      <particleset name="e" random="yes">
+         <group name="u" size="1" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+         <group name="d" size="1" mass="1.0">
+            <parameter name="charge"              >    -1                    </parameter>
+            <parameter name="mass"                >    1.0                   </parameter>
+         </group>
+      </particleset>
+      <particleset name="ion0">
+         <group name="H" size="2" mass="1837.36221934">
+            <parameter name="charge"              >    1                     </parameter>
+            <parameter name="valence"             >    1                     </parameter>
+            <parameter name="atomicnumber"        >    1                     </parameter>
+            <parameter name="mass"                >    1837.36221934            </parameter>
+            <attrib name="position" datatype="posArray" condition="0">
+                     0.00000000        0.00000000        0.00000000
+                     1.88972614        1.88972614        1.88972614
+            </attrib>
+         </group>
+      </particleset>
+      <wavefunction name="psi0" target="e">
+         <determinantset type="einspline" href="pwscf.pwscf.h5" tilematrix="1 0 0 0 1 0 0 0 1" twistnum="0" source="ion0" meshfactor="1.0" precision="float">
+            <slaterdeterminant>
+               <determinant id="updet" size="1">
+                  <occupation mode="ground" spindataset="0"/>
+               </determinant>
+               <determinant id="downdet" size="1">
+                  <occupation mode="ground" spindataset="0"/>
+               </determinant>
+            </slaterdeterminant>
+         </determinantset>
+         <jastrow type="One-Body" name="J1" function="bspline" source="ion0" print="yes">
+            <correlation elementType="H" size="8" cusp="1.0">
+               <coefficients id="eH" type="Array">                  
+0.00206602038 -0.002841926986 0.0036266191 -0.001913930279 8.457152991e-06 
+0.0007380321824 3.635172529e-05 0.0001299635851
+               </coefficients>
+            </correlation>
+         </jastrow>
+         <jastrow type="Two-Body" name="J2" function="bspline" print="yes">
+            <correlation speciesA="u" speciesB="d" size="8">
+               <coefficients id="ud" type="Array">                  
+0.5954603818 0.5062051797 0.3746940461 0.2521010502 0.1440163317 0.07796688253 
+0.03804420551 0.01449320872
+               </coefficients>
+            </correlation>
+         </jastrow>
+      </wavefunction>
+      <hamiltonian name="h0" type="generic" target="e">
+         <pairpot type="coulomb" name="ElecElec" source="e" target="e"/>
+         <pairpot type="coulomb" name="IonIon" source="ion0" target="ion0"/>
+         <pairpot type="coulomb" name="ElecIon" source="ion0" target="e"/>
+         <estimator type="sk" name="sk" hdf5="no"/>
+         <estimator type="sk" name="h5sk" hdf5="yes"/>
+      </hamiltonian>
+   </qmcsystem>
+   <qmc method="vmc" move="pbyp">
+      <parameter name="walkers"             >    64              </parameter>
+      <parameter name="blocks"              >    16            </parameter>
+      <parameter name="steps"               >    2            </parameter>
+      <parameter name="subSteps"            >    2               </parameter>
+      <parameter name="timestep"            >    2.0             </parameter>
+      <parameter name="warmupSteps"         >    16             </parameter>
+   </qmc>
+   
+   <qmc method="dmc" move="pbyp">
+      <parameter name="targetwalkers"       >    64              </parameter>
+      <parameter name="blocks"              >    16            </parameter>
+      <parameter name="steps"               >    1            </parameter>
+      <parameter name="timestep"            >    2.0             </parameter>
+      <parameter name="warmupSteps"         >    0             </parameter>
+   </qmc>
+</simulation>

From 8948125dc02f3242052cf34868c029b9bdc600a3 Mon Sep 17 00:00:00 2001
From: Paul Young <yyang173@illinois.edu>
Date: Thu, 12 Apr 2018 18:06:04 -0500
Subject: [PATCH 2/5] add a symbolic link to bccH-1x1x1 wf h5

---
 tests/estimator/sofk/pwscf.pwscf.h5 | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 tests/estimator/sofk/pwscf.pwscf.h5

diff --git a/tests/estimator/sofk/pwscf.pwscf.h5 b/tests/estimator/sofk/pwscf.pwscf.h5
new file mode 120000
index 0000000000..7140f8e389
--- /dev/null
+++ b/tests/estimator/sofk/pwscf.pwscf.h5
@@ -0,0 +1 @@
+../../solids/bccH_1x1x1_ae/pwscf.pwscf.h5
\ No newline at end of file

From c95318c115e6f0d6e3c9024c6bf1ca426b3c2368 Mon Sep 17 00:00:00 2001
From: Paul Young <yyang173@illinois.edu>
Date: Fri, 13 Apr 2018 14:37:08 -0500
Subject: [PATCH 3/5] pass argument to check script

---
 CMake/macros.cmake                            |  2 +-
 tests/estimator/CMakeLists.txt                |  8 +-
 .../sofk/check_collectables_h5dat-pbyp.py     | 92 ------------------
 ...at-allp.py => check_collectables_h5dat.py} |  2 +-
 .../sofk/check_properties_h5dat-pbyp.py       | 96 -------------------
 ...5dat-allp.py => check_properties_h5dat.py} |  2 +-
 6 files changed, 7 insertions(+), 195 deletions(-)
 delete mode 100755 tests/estimator/sofk/check_collectables_h5dat-pbyp.py
 rename tests/estimator/sofk/{check_collectables_h5dat-allp.py => check_collectables_h5dat.py} (98%)
 delete mode 100755 tests/estimator/sofk/check_properties_h5dat-pbyp.py
 rename tests/estimator/sofk/{check_properties_h5dat-allp.py => check_properties_h5dat.py} (98%)

diff --git a/CMake/macros.cmake b/CMake/macros.cmake
index c403ff7b95..cbedffe597 100644
--- a/CMake/macros.cmake
+++ b/CMake/macros.cmake
@@ -247,7 +247,7 @@ function(SIMPLE_RUN_AND_CHECK base_name base_dir input_file procs threads check_
   set(work_dir "${CMAKE_CURRENT_BINARY_DIR}/${full_name}")
   #message(${work_dir})
   add_test(NAME "${test_name}"
-    COMMAND "${check_cmd}"
+    COMMAND "${check_cmd}" ${ARGN}
     WORKING_DIRECTORY "${work_dir}"
   )
 
diff --git a/tests/estimator/CMakeLists.txt b/tests/estimator/CMakeLists.txt
index 51f9fe9b3c..1ad39d650b 100644
--- a/tests/estimator/CMakeLists.txt
+++ b/tests/estimator/CMakeLists.txt
@@ -55,24 +55,24 @@ if (add_test)
     "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
     pbyp_dat-h5.xml
     1 16
-    check_properties_h5dat-pbyp.py
+    check_properties_h5dat.py dat-h5_pbyp
   )
   SIMPLE_RUN_AND_CHECK(estimator-sofk_pbyp-collectables
     "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
     pbyp_dat-h5.xml
     1 16
-    check_collectables_h5dat-pbyp.py
+    check_collectables_h5dat.py dat-h5_pbyp
   )
   SIMPLE_RUN_AND_CHECK(estimator-sofk_allp-properties
     "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
     allp_dat-h5.xml
     1 16
-    check_properties_h5dat-allp.py
+    check_properties_h5dat.py dat-h5_allp
   )
   SIMPLE_RUN_AND_CHECK(estimator-sofk_allp-collectables
     "${CMAKE_SOURCE_DIR}/tests/estimator/sofk"
     allp_dat-h5.xml
     1 16
-    check_collectables_h5dat-allp.py
+    check_collectables_h5dat.py dat-h5_allp
   )
 endif()
diff --git a/tests/estimator/sofk/check_collectables_h5dat-pbyp.py b/tests/estimator/sofk/check_collectables_h5dat-pbyp.py
deleted file mode 100755
index ee790c9678..0000000000
--- a/tests/estimator/sofk/check_collectables_h5dat-pbyp.py
+++ /dev/null
@@ -1,92 +0,0 @@
-#!/usr/bin/env python
-import sys
-import os
-import h5py
-import numpy as np
-from check_properties_h5dat import read
-
-
-def get_last_sk(fdat,fh5):
-  """ extract S(k) at the longest k vector from scalar.dat and stat.h5
-
-  Args:
-    fdat (str): name of scalar.dat file
-    fh5  (str): name of stat.h5 file
-  Return:
-    tuple: (myy, h5y), S(k_max) at each block from scalar.dat and stat.h5
-  """
-
-  # get S(k) from scalar.dat
-  df = read(fdat)
-  sk_cols = [col for col in df.columns if col.startswith('sk')]
-  myy = df[sk_cols[-1]].values
-
-  # get S(k) from stat.h5
-  fp = h5py.File(fh5, 'r')
-  h5y = fp['h5sk/value'].value.T[-1]
-  fp.close()
-
-  return myy, h5y
-# end def
-
-
-def show_scalar_trace(data, seriesl):
-  import matplotlib.pyplot as plt
-  method_map = {0:'VMC',1:'DMC'}
-  fig,ax_arr = plt.subplots(1, 2, sharey=True)
-  ax_arr[0].set_ylabel('S(k->inf)')
-  iplot = 0
-  for iseries in seriesl:
-    ax = ax_arr[iplot]
-    ax.set_title(method_map[iseries])
-    ax.set_xlabel('block')
-    ax.set_ylim(0.3, 1.2)
-
-    entry = data[iseries]
-    daty  = entry['daty']
-    h5y   = entry['h5y']
-
-    sline = ax.plot(daty)
-    hline = ax.plot(h5y, ls='--', lw=2, alpha=0.8)
-
-    ax.legend(
-      handles = [sline[0], hline[0]]
-     ,labels  = ['scalar.dat', 'stat.h5']
-     ,loc=0
-    )
-
-    iplot += 1
-  # end for iseries
-  plt.show()
-
-
-if __name__ == '__main__':
-
-  prefix = 'dat-h5_pbyp'
-  seriesl= [0,1]  # a list of series IDs to check
-
-  # check Properties v.s. Collectables
-  collectable_success_map = {}
-  data = {}
-  for iseries in seriesl:
-
-    # define files to read
-    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
-    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
-
-    daty, h5y = get_last_sk(fdat, fh5)
-    success   = np.allclose(daty, h5y, atol=0.1)
-    collectable_success_map[iseries] = success
-
-    # save data for plotting
-    data[iseries] = {'daty':daty, 'h5y':h5y}
-  # end for 
-  all_success = np.all( collectable_success_map.values() )
-
-  if all_success:
-    sys.exit(0)
-  else:
-    #show_scalar_trace(data, seriesl)
-    sys.exit(1)
-
-# end __main__
diff --git a/tests/estimator/sofk/check_collectables_h5dat-allp.py b/tests/estimator/sofk/check_collectables_h5dat.py
similarity index 98%
rename from tests/estimator/sofk/check_collectables_h5dat-allp.py
rename to tests/estimator/sofk/check_collectables_h5dat.py
index dd2f17e772..5b358439eb 100755
--- a/tests/estimator/sofk/check_collectables_h5dat-allp.py
+++ b/tests/estimator/sofk/check_collectables_h5dat.py
@@ -62,7 +62,7 @@ def show_scalar_trace(data, seriesl):
 
 if __name__ == '__main__':
 
-  prefix = 'dat-h5_allp'
+  prefix = sys.argv[1]
   seriesl= [0,1]  # a list of series IDs to check
 
   # check Properties v.s. Collectables
diff --git a/tests/estimator/sofk/check_properties_h5dat-pbyp.py b/tests/estimator/sofk/check_properties_h5dat-pbyp.py
deleted file mode 100755
index fd501e1f69..0000000000
--- a/tests/estimator/sofk/check_properties_h5dat-pbyp.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-import os
-import sys
-import h5py
-import numpy as np
-import pandas as pd
-
-
-def read(fdat):
-  """ read the scalar.dat file in table format readable by numpy.loadtxt.
-
-   The header line should start with '#' and contain column labels.
-
-  Args:
-    dat_fname (str): name of input file
-  Return:
-    pd.DataFrame: df containing the table of data
-  """
-  with open(fdat, 'r') as fp:
-    header = fp.readline()
-  # end with
-  cols = header.replace('#', '').split()
-  df = pd.read_table(fdat, sep='\s+', comment='#', header=None, names=cols)
-  return df
-# end def read
-
-
-def compare_columns_dat_h5(fdat, fh5):
-  """ compare mutual data columns in scalar.dat and stat.h5 files
-
-  Args:
-    fdat (str): name of scalar.dat file
-    fh5  (str): name of stat.h5 file
-  Return:
-    dict: a dictionary holding mutual columns names as key
-  """
-
-  # open database
-  df = read(fdat)
-  dat_cols = df.columns
-
-  fp = h5py.File(fh5,'r')
-  h5_cols = fp.keys()
-
-  # compare mutual columns in .dat v.s. .h5
-  agree_map = {}  # keep track of which columns agree
-  for col in h5_cols:
-    if col not in dat_cols:
-      continue
-
-    # check if col agree between .dat and .h5
-
-    # get .h5 values
-    h5_loc = os.path.join(col, 'value')
-    h5y  = fp[h5_loc].value[:,-1]
-
-    # get .dat values
-    daty = df.loc[:,col].values
-    agree_map[col] = np.allclose(h5y,daty)
-  # end for col
-   
-  # close database
-  fp.close()
-
-  if len(agree_map) == 0:
-    raise RuntimeError('%s and %s have no mutual column' % (fdat, fh5))
-
-  return agree_map
-# end def
-
-
-if __name__ == '__main__':
-
-  prefix = 'dat-h5_pbyp'
-  seriesl= [0,1]
-
-  # check Properties
-  series_success_map = {}
-  for iseries in seriesl:
-
-    # define files to read
-    fdat = '%s.s00%d.scalar.dat' % (prefix, iseries)
-    fh5  = '%s.s00%d.stat.h5' % (prefix, iseries)
-
-    agree_map = compare_columns_dat_h5(fdat, fh5)
-    success = np.all( agree_map.values() )
-    series_success_map[iseries] = success
-  # end for iseries
-  
-  all_success = np.all( series_success_map.values() )
-  if all_success:
-    sys.exit(0)
-  else:
-    sys.exit(1)
-  
-# end __main__
diff --git a/tests/estimator/sofk/check_properties_h5dat-allp.py b/tests/estimator/sofk/check_properties_h5dat.py
similarity index 98%
rename from tests/estimator/sofk/check_properties_h5dat-allp.py
rename to tests/estimator/sofk/check_properties_h5dat.py
index 7e4ee225ca..3e5c801353 100755
--- a/tests/estimator/sofk/check_properties_h5dat-allp.py
+++ b/tests/estimator/sofk/check_properties_h5dat.py
@@ -71,7 +71,7 @@ def compare_columns_dat_h5(fdat, fh5):
 
 if __name__ == '__main__':
 
-  prefix = 'dat-h5_allp'
+  prefix = sys.argv[1]
   seriesl= [0,1]
 
   # check Properties

From 9fac440b0655319c6b0b2da860dbf8df50c1ce34 Mon Sep 17 00:00:00 2001
From: Paul Young <yyang173@illinois.edu>
Date: Fri, 13 Apr 2018 14:42:26 -0500
Subject: [PATCH 4/5] move math macros out of backflow section

---
 manual/backflow_implementation.tex | 2 --
 manual/qmcpack_manual.tex          | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/manual/backflow_implementation.tex b/manual/backflow_implementation.tex
index 62aa80a4ad..23ce179091 100644
--- a/manual/backflow_implementation.tex
+++ b/manual/backflow_implementation.tex
@@ -1,5 +1,3 @@
-\newcommand{\bs}{\boldsymbol}
-\newcommand{\tr}{\text{tr}}
 \section{Slater-Backflow Wavefunction Implementation Details}
 
 For simplicity, consider $N$ identical fermions of the same spin (e.g. up electrons) at spatial locations $\{\bs{r}_1,\bs{r}_2,\dots,\bs{r}_{N}\}$. Then the Slater determinant can be written as
diff --git a/manual/qmcpack_manual.tex b/manual/qmcpack_manual.tex
index 6b45c18ac9..704978eec1 100644
--- a/manual/qmcpack_manual.tex
+++ b/manual/qmcpack_manual.tex
@@ -140,6 +140,8 @@
 \newcommand{\overlap}[2]{\langle #1 \lvert #2 \rangle}
 \newcommand{\operator}[3]{\ket{#1} #2 \bra{#3}}
 \newcommand{\idop}{\hat{\mathbb{1}}}
+\newcommand{\bs}{\boldsymbol}
+\newcommand{\tr}{\text{tr}} % trace
 
 
 \begin{document}

From ec6778f925549c865d1d7f4a4894b21e021e9f72 Mon Sep 17 00:00:00 2001
From: Paul Young <yyang173@illinois.edu>
Date: Fri, 13 Apr 2018 14:42:54 -0500
Subject: [PATCH 5/5] add estimator manager documentation

---
 manual/developing.tex        |   1 +
 manual/estimator_manager.tex | 298 +++++++++++++++++++++++++++++++++++
 2 files changed, 299 insertions(+)
 create mode 100644 manual/estimator_manager.tex

diff --git a/manual/developing.tex b/manual/developing.tex
index e26af168e9..33a291eb9a 100644
--- a/manual/developing.tex
+++ b/manual/developing.tex
@@ -4,4 +4,5 @@ \chapter{Development Guide}
 The section gives guidance on how to extend the functionality of QMCPACK. Future examples will likely include topics such as the addition of a jastrow function or add a new QMC method.
 
 \input{estimator_implementation}
+\input{estimator_manager}
 \input{backflow_implementation}
diff --git a/manual/estimator_manager.tex b/manual/estimator_manager.tex
new file mode 100644
index 0000000000..e947a45903
--- /dev/null
+++ b/manual/estimator_manager.tex
@@ -0,0 +1,298 @@
+\section{Estimator Output}
+\subsection{Estimator Definition}
+For simplicity, consider a local property $O(\bs{R})$, where $\bs{R}$ is the collection of all particle coordinates. An \textit{estimator} for $O(\bs{R}) $ is a weighted average over walkers
+\begin{align}
+E[O] = \left(\sum\limits_{i=1}^{N^{tot}_{walker}} w_i O(\bs{R}_i) \right) / \left( \sum \limits_{i=1}^{N^{tot}_{walker}} w_i \right). \label{eq:estimator}
+\end{align}
+$N^{tot}_{walker}$ is the total number of walkers collected in the entire simulation. Notice, $N^{tot}_{walker}$ is typically far larger than the number of walkers held in memory at any given simulation step. $w_i$ is the weight of walker $i$.
+
+In a VMC simulation, the weight of every walkers is 1.0. Further, the number of walkers is constant at each step. Therefore, eq.~(\ref{eq:estimator}) simplifies to
+\begin{align}
+E_{VMC}[O] = \frac{1}{N_{step}N_{walker}^{ensemble}} \sum_{s,e} O(\bs{R}_{s,e}).
+\end{align}
+Each walker $\bs{R}_{s,e}$ is labeled by \textit{step index} s, and \textit{ensemble index} e.
+
+In a DMC simulation, the weight of each walker is different and may change from step to step. Further, the ensemble size varies from step to step. Therefore, eq.~(\ref{eq:estimator}) simplifies to
+\begin{align}
+E_{DMC}[O] = \frac{1}{N_{step}} \sum_{s} \left\{ \left(\sum_e w_{s,e} O(\bs{R}_{s,e})  \right) / \left( \sum \limits_{e} w_{s,e} \right)  \right\}.
+\end{align}
+
+I will refer to the average in the $\{\}$ as \textit{ensemble average} and the remaining averages \textit{block average}. The process of calculating $O(\bs{R})$ is \textit{evaluate}.
+
+\subsection{Class Relations}
+A large number of classes are involved in the estimator collection process. They often have misleading class name or method name. Document gotchas in the following list:
+\begin{enumerate}
+\item \verb|EstimatorManager| is an unused copy of \verb|EstimatorManagerBase|. \verb|EstimatorManagerBase| is the class used in the QMC drivers. (PR \#371 explains this)
+\item \verb|EstimatorManagerBase::Estimators| is completely different from \verb|QMCDriver::Estimators|, which is subtly different from \verb|QMCHamiltonianBase::Estimators|. The first is a list of pointers to \verb|ScalarEstimatorBase|. The second is the master estimator (one per MPI group). The third is the slave estimator that exists one per OpenMP thread.
+\item \verb|QMCHamiltonian| is NOT a parent class of \verb|QMCHamiltonianBase|. Instead, \verb|QMCHamiltonian| owns two lists of \verb|QMCHamiltonianBase| named \verb|H| and \verb|auxH|.
+\item \verb|QMCDriver::H| is NOT the same as \verb|QMCHamiltonian::H|. The first is a pointer to a \verb|QMCHamiltonian|. \verb|QMCHamiltonian::H| is a list.
+\item \verb|EstimatorManager::stopBlock(std::vector)| is completely different from \verb|EstimatorManager::|
+\verb|stopBlock(RealType)|, which is the same as \verb|stopBlock(RealType, true)|, but is subtly different from \verb|stopBlock(RealType, false)|. The first three methods are intended to be called by the master estimator which exists one per MPI group. The last method is intended to be called by the slave estimator which exists one per OpenMP thread.
+\end{enumerate}
+
+\subsection{Estimator Output Stages}
+%In QMCPACK, evaluation is done by \verb|QMCHamiltonianBase|; ensemble average is done either by a ``CloneDriver'' (e.g. \verb|VMCSingleOMP|, \verb|DMCOMP|) or \verb|ScalarEstimatorBase|; block average is done by \verb|ScalarEstimatorBase| or \verb|EstimatorManagerBase|. Walkers can be accessed by ``CloneDriver'' and \verb|QMCHamiltonianBase| but not by \verb|EstimatorManagerBase| or \verb|ScalarEstimatorBase|. Output files can be accessed by the latter two classes but not the former two. Therefore, in order to output estimators to file, data must be transferred from \textit{evaluate} classes to \textit{average} classes.
+
+Estimators take four conceptual stages to propagate to the output files: evaluate, load ensemble, unload ensemble, and collect. They are easier to understand in reverse order.
+
+\subsubsection{Collect Stage}
+File output is performed by the master \verb|EstimatorManager| owned by \verb|QMCDriver|. The first 8+ entries in \verb|EstimatorManagerBase::AverageCache| will be written to scalar.dat. The remaining entries in \verb|AverageCache| will be written to stat.h5. File writing is triggered by \verb|EstimatorManagerBase|\\ \verb|::collectBlockAverages| inside \verb|EstimatorManagerBase::stopBlock|.
+
+\begin{lstlisting}
+// In EstimatorManagerBase.cpp::collectBlockAverages
+  if(Archive)
+  {
+    *Archive << std::setw(10) << RecordCount;
+    int maxobjs=std::min(BlockAverages.size(),max4ascii);
+    for(int j=0; j<maxobjs; j++)
+      *Archive << std::setw(FieldWidth) << AverageCache[j];
+    for(int j=0; j<PropertyCache.size(); j++)
+      *Archive << std::setw(FieldWidth) << PropertyCache[j];
+    *Archive << std::endl;
+    for(int o=0; o<h5desc.size(); ++o)
+      h5desc[o]->write(AverageCache.data(),SquaredAverageCache.data());
+    H5Fflush(h_file,H5F_SCOPE_LOCAL);
+  }
+\end{lstlisting}
+
+\verb|EstimatorManagerBase::collectBlockAverages| is triggered from master-thread estimator via either \verb|stopBlock(std::vector)| or \verb|stopBlock(RealType, true)|. Notice, file writing is NOT triggered by the slave-thread estimator method \verb|stopBlock(RealType, false)|.
+
+\begin{lstlisting}
+// In EstimatorManagerBase.cpp
+void EstimatorManagerBase::stopBlock(RealType accept, bool collectall)
+{
+  //take block averages and update properties per block
+  PropertyCache[weightInd]=BlockWeight;
+  PropertyCache[cpuInd] = MyTimer.elapsed();
+  PropertyCache[acceptInd] = accept;
+  for(int i=0; i<Estimators.size(); i++)
+    Estimators[i]->takeBlockAverage(AverageCache.begin(),SquaredAverageCache.begin());
+  if(Collectables)
+  { 
+    Collectables->takeBlockAverage(AverageCache.begin(),SquaredAverageCache.begin());
+  }
+  if(collectall)
+    collectBlockAverages(1);
+}
+\end{lstlisting}
+
+\begin{lstlisting}
+// In ScalarEstimatorBase.h
+template<typename IT>
+inline void takeBlockAverage(IT first, IT first_sq)
+{
+  first += FirstIndex;
+  first_sq += FirstIndex;
+  for(int i=0; i<scalars.size(); i++)
+  {
+    *first++ = scalars[i].mean();
+    *first_sq++ = scalars[i].mean2();
+    scalars_saved[i]=scalars[i]; //save current block
+    scalars[i].clear();
+  }
+}
+\end{lstlisting}
+
+At the collect stage, \verb|ScalarEstimatorBase::scalars| must be populated with ensemble-averaged data. Two derived classes of \verb|ScalarEstimatorBase| are crucial: \verb|LocalEnergyEstimator| will carry \verb|Properties|, where as \verb|CollectablesEstimator| will carry \verb|Collectables|.
+
+\subsubsection{Unload Ensemble Stage}
+\verb|LocalEnergyEstimator::scalars| are populated by \verb|ScalarEstimatorBase::accumulate|, whereas \verb|CollectablesEstimator::scalars| are populated by \verb|CollectablesEstimator::|
+\verb|accumulate_all|. Both accumulate methods are triggered by \verb|EstimatorManagerBase::accumulate|. One confusing aspect about the unload stage is that \verb|EstimatorManagerBase::accumulate| has a master and a slave call signature. A slave estimator such as \verb|QMCUpdateBase::Estimators| should unload a subset of walkers. Thus, the slave estimator should call \verb|accumulate(W,it,it_end)|. However, the master estimator, such as \verb|SimpleFixedNodeBranch::myEstimator| should unload data from the entire walker ensemble. This is achieved by calling \verb|accumulate(W)|.
+
+\begin{lstlisting}
+void EstimatorManagerBase::accumulate(MCWalkerConfiguration& W)
+{ // intended to be called by master estimator only
+  BlockWeight += W.getActiveWalkers();
+  RealType norm=1.0/W.getGlobalNumWalkers();
+  for(int i=0; i< Estimators.size(); i++)
+    Estimators[i]->accumulate(W,W.begin(),W.end(),norm);
+  if(Collectables)//collectables are normalized by QMC drivers
+    Collectables->accumulate_all(W.Collectables,1.0);
+}
+\end{lstlisting}
+
+\begin{lstlisting}
+void EstimatorManagerBase::accumulate(MCWalkerConfiguration& W
+ , MCWalkerConfiguration::iterator it
+ , MCWalkerConfiguration::iterator it_end)
+{ // intended to be called slaveEstimator only
+  BlockWeight += it_end-it;
+  RealType norm=1.0/W.getGlobalNumWalkers();
+  for(int i=0; i< Estimators.size(); i++)
+    Estimators[i]->accumulate(W,it,it_end,norm);
+  if(Collectables)
+    Collectables->accumulate_all(W.Collectables,1.0);
+}
+\end{lstlisting}
+
+\begin{lstlisting}
+// In LocalEnergyEstimator.h
+inline void accumulate(const Walker_t& awalker, RealType wgt)
+{ // ensemble average W.Properties
+  // expect ePtr to be W.Properties; expect wgt = 1/GlobalNumberOfWalkers
+  const RealType* restrict ePtr = awalker.getPropertyBase();
+  RealType wwght= wgt* awalker.Weight;
+  scalars[0](ePtr[LOCALENERGY],wwght);
+  scalars[1](ePtr[LOCALENERGY]*ePtr[LOCALENERGY],wwght);
+  scalars[2](ePtr[LOCALPOTENTIAL],wwght);
+  for(int target=3, source=FirstHamiltonian; target<scalars.size(); ++target, ++source)
+    scalars[target](ePtr[source],wwght);
+}
+\end{lstlisting}
+
+\begin{lstlisting}
+// In CollectablesEstimator.h
+inline void accumulate_all(const MCWalkerConfiguration::Buffer_t& data, RealType wgt)
+{ // ensemble average W.Collectables
+  // expect data to be W.Collectables; expect wgt = 1.0
+  for(int i=0; i<data.size(); ++i)
+    scalars[i](data[i], wgt);
+}
+\end{lstlisting}
+
+At the unload ensemble stage, the data structures \verb|Properties| and \verb|Collectables| must be populated by appropriately normalized values so that the ensemble average can be correctly taken. \verb|QMCDriver| is responsible for the correct loading of data onto the walker ensemble.
+
+\subsubsection{Load Ensemble Stage}
+\verb|Properties| in the Monte Carlo ensemble of walkers \verb|QMCDriver::W| is populated by \verb|QMCHamiltonian|\\ \verb|::saveProperties|. The master \verb|QMCHamiltonian::LocalEnergy|, \verb|::KineticEnergy|, and \verb|::Observables| must be properly populated at the end of the evaluate stage.
+\begin{lstlisting}
+// In QMCHamiltonian.h
+  template<class IT>
+  inline
+  void saveProperty(IT first)
+  { // expect first to be W.Properties
+    first[LOCALPOTENTIAL]= LocalEnergy-KineticEnergy;
+    copy(Observables.begin(),Observables.end(),first+myIndex);
+  }
+\end{lstlisting}
+
+\verb|Collectables|'s load stage is combined with its evaluate stage.
+
+\subsubsection{Evaluate Stage}
+
+The master \verb|QMCHamiltonian::Observables| is populated by slave \verb|QMCHamiltonianBase|
+\verb|::setObservables|. However, the call signature must be \verb|QMCHamiltonianBase::setObservables(|
+\verb|QMCHamiltonian::Observables)|. This call signature is enforced by \verb|QMCHamiltonian::evaluate| and \verb|QMCHamiltonian::auxHevaluate|.
+
+\begin{lstlisting}
+// In QMCHamiltonian.cpp
+QMCHamiltonian::Return_t
+QMCHamiltonian::evaluate(ParticleSet& P)
+{
+  LocalEnergy = 0.0;
+  for(int i=0; i<H.size(); ++i)
+  {
+    myTimers[i]->start();
+    LocalEnergy += H[i]->evaluate(P);
+    H[i]->setObservables(Observables);
+#if !defined(REMOVE_TRACEMANAGER)
+    H[i]->collect_scalar_traces();
+#endif
+    myTimers[i]->stop();
+    H[i]->setParticlePropertyList(P.PropertyList,myIndex);
+  }
+  KineticEnergy=H[0]->Value;
+  P.PropertyList[LOCALENERGY]=LocalEnergy;
+  P.PropertyList[LOCALPOTENTIAL]=LocalEnergy-KineticEnergy;
+  // auxHevaluate(P);
+  return LocalEnergy;
+}
+\end{lstlisting}
+
+\begin{lstlisting}
+// In QMCHamiltonian.cpp
+void QMCHamiltonian::auxHevaluate(ParticleSet& P, Walker_t& ThisWalker)
+{
+#if !defined(REMOVE_TRACEMANAGER)
+  collect_walker_traces(ThisWalker,P.current_step);
+#endif
+  for(int i=0; i<auxH.size(); ++i)
+  {
+    auxH[i]->setHistories(ThisWalker);
+    RealType sink = auxH[i]->evaluate(P);
+    auxH[i]->setObservables(Observables);
+#if !defined(REMOVE_TRACEMANAGER)
+    auxH[i]->collect_scalar_traces();
+#endif
+    auxH[i]->setParticlePropertyList(P.PropertyList,myIndex);
+  }
+}
+\end{lstlisting}
+
+\subsection{Estimator Use Cases}
+
+\subsubsection{VMCSingleOMP pseudo code}
+\begin{lstlisting}
+bool VMCSingleOMP::run()
+{
+  masterEstimator->start(nBlocks);
+  for (int ip=0; ip<NumThreads; ++ip)
+    Movers[ip]->startRun(nBlocks,false);  // slaveEstimator->start(blocks, record)
+  
+  do // block
+  {
+    #pragma omp parallel
+    {
+      Movers[ip]->startBlock(nSteps);  // slaveEstimator->startBlock(steps)
+      RealType cnorm = 1.0/static_cast<RealType>(wPerNode[ip+1]-wPerNode[ip]);
+      do // step
+      {
+        wClones[ip]->resetCollectables();
+        Movers[ip]->advanceWalkers(wit, wit_end, recompute);
+        wClones[ip]->Collectables *= cnorm;
+        Movers[ip]->accumulate(wit, wit_end);
+      } // end step
+      Movers[ip]->stopBlock(false);  // slaveEstimator->stopBlock(acc, false)
+    } // end omp
+    masterEstimator->stopBlock(estimatorClones);  // write files
+  } // end block
+  masterEstimator->stop(estimatorClones);
+}
+\end{lstlisting}
+
+\subsubsection{DMCOMP  pseudo code}
+\begin{lstlisting}
+bool DMCOMP::run()
+{
+  masterEstimator->setCollectionMode(true);
+  
+  masterEstimator->start(nBlocks);
+  for(int ip=0; ip<NumThreads; ip++)
+    Movers[ip]->startRun(nBlocks,false);  // slaveEstimator->start(blocks, record)
+  
+  do // block
+  {
+    masterEstimator->startBlock(nSteps);
+    for(int ip=0; ip<NumThreads; ip++)
+      Movers[ip]->startBlock(nSteps);  // slaveEstimator->startBlock(steps)
+    
+    do // step
+    {
+      #pragma omp parallel
+      {
+      wClones[ip]->resetCollectables();
+      // advanceWalkers
+      } // end omp
+      
+      //branchEngine->branch
+      { // In WalkerControlMPI.cpp::branch
+      wgt_inv=WalkerController->NumContexts/WalkerController->EnsembleProperty.Weight;
+      walkers.Collectables *= wgt_inv;
+      slaveEstimator->accumulate(walkers);
+      }
+      masterEstimator->stopBlock(acc)  // write files
+    }  // end for step
+  }  // end for block
+  
+  masterEstimator->stop();
+}
+\end{lstlisting}
+
+\subsection{Summary}
+
+Two ensemble-level data structures \verb|ParticleSet::Properties| and \verb|::Collectables| serve as intermediaries between evaluate classes and output classes to scalar.dat and stat.h5. \verb|Properties| appears in both scalar.dat and stat.h5, whereas \verb|Collectables| appears only in stat.h5. \verb|Properties| is overwritten by \verb|QMCHamiltonian::Observables| at the end of each step. \verb|QMCHamiltonian::Observables| is filled upon call to \verb|QMCHamiltonian::evaluate| and \verb|::auxHevaluate|. \verb|Collectables| is zeroed at the beginning of each step and accumulated upon call to \verb|::auxHevaluate|.
+
+Data are outputted to scalar.dat in 4 stages: evaluate, load, unload, and collect. In the evaluate stage, \verb|QMCHamiltonian::Observables| is populated by a list of \verb|QMCHamiltonianBase|. In the load stage, \verb|QMCHamiltonian::Observables| is transfered to \verb|Properties| by \verb|QMCDriver|. In the unload stage, \verb|Properties| is copied to \verb|LocalEnergyEstimator::scalars|. In the collect stage, \verb|LocalEnergyEstimator::scalars| is block-averaged to \verb|EstimatorManagerBase|\\ \verb|::AverageCache| and dumped to file. For \verb|Collectables|, the evaluate and load stages are combined in a call to \verb|QMCHamiltonian::auxHevaluate|. In the unload stage, \verb|Collectables| is copied to \verb|CollectablesEstimator::scalars|. In the collect stage, \verb|CollectablesEstimator|\\ \verb|::scalars| is block-averaged to \verb|EstimatorManagerBase::AverageCache| and dumped to file.
+
+\subsection{Appendix: dmc.dat}
+
+There is an additional data structure \verb|ParticleSet::EnsembleProperty|, which is managed by \verb|WalkerControlBase::EnsembleProperty| and directly dumped to dmc.dat via its own averaging procedure. dmc.dat is written by \verb|WalkerControlBase::measureProperties|, which is called by \verb|WalkerControlBase::branch|, which is called by \verb|SimpleFixedNodeBranch|\\ \verb|::branch| for example.