Merge pull request #62 from harlor/reduced_potential_pr

Add pV and U to reduced potential when they are given. Fixes #59.
alchemistry · Nov 26, 2018 · 82ce951 · 82ce951
2 parents eba1f9a + bfbe0b0
commit 82ce951
Show file tree

Hide file tree

Showing 3 changed files with 100 additions and 11 deletions.
diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py
@@ -29,7 +29,9 @@ def extract_u_nk(xvg, T):
 
     """
 
-    col_match = r"\xD\f{}H \xl\f{}"
+    h_col_match = r"\xD\f{}H \xl\f{}"
+    pv_col_match = 'pV'
+    u_col_match = ['Total Energy', 'Potential Energy']
     beta = 1/(k_b * T)
 
     state, lambdas, statevec = _extract_state(xvg)
@@ -43,22 +45,31 @@ def extract_u_nk(xvg, T):
     times = df[df.columns[0]]
 
     # want to grab only dH columns
-    DHcols = [col for col in df.columns if (col_match in col)]
+    DHcols = [col for col in df.columns if (h_col_match in col)]
     dH = df[DHcols]
 
-    # not entirely sure if we need to get potentials relative to
-    # the state actually sampled, but perhaps needed to stack
-    # samples from all states?
-    U = df[df.columns[1]]
-
     # gromacs also gives us pV directly; need this for reduced potential
-    pV = df[df.columns[-1]]
+    pv_cols = [col for col in df.columns if (pv_col_match in col)]
+    pv = None
+    if pv_cols:
+        pv = df[pv_cols[0]]
+
+    # gromacs also gives us total/potential energy U directly; need this for reduced potential
+    u_cols = [col for col in df.columns if any(single_u_col_match in col for single_u_col_match in u_col_match)]
+    u = None
+    if u_cols:
+        u = df[u_cols[0]]
 
     u_k = dict()
-    cols= list()
+    cols = list()
     for col in dH:
         u_col = eval(col.split('to')[1])
-        u_k[u_col] = beta * (dH[col].values + U.values + pV.values)
+        # calculate reduced potential u_k = dH + pV + U
+        u_k[u_col] = beta * dH[col].values
+        if pv_cols:
+            u_k[u_col] += beta * pv.values
+        if u_cols:
+            u_k[u_col] += beta * u.values
         cols.append(u_col)
 
     u_k = pd.DataFrame(u_k, columns=cols,

diff --git a/src/alchemlyb/tests/parsing/test_gmx.py b/src/alchemlyb/tests/parsing/test_gmx.py
@@ -5,6 +5,10 @@
 from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk
 from alchemtest.gmx import load_benzene
 from alchemtest.gmx import load_expanded_ensemble_case_1, load_expanded_ensemble_case_2, load_expanded_ensemble_case_3
+from alchemtest.gmx import load_water_particle_with_total_energy
+from alchemtest.gmx import load_water_particle_with_potential_energy
+from alchemtest.gmx import load_water_particle_without_energy
+from numpy.testing import assert_almost_equal
 
 
 def test_dHdl():
@@ -103,3 +107,77 @@ def test_dHdl_case3():
 
             assert dHdl.index.names == ['time', 'fep-lambda', 'coul-lambda', 'vdw-lambda', 'restraint-lambda']
             assert dHdl.shape == (2500, 4)
+
+def test_u_nk_with_total_energy():
+    """Test that the reduced potential is calculated correctly when the total energy is given.
+
+    """
+
+    # Load dataset
+    dataset = load_water_particle_with_total_energy()
+
+    # Check if the sum of values on the diagonal has the correct value
+    assert_almost_equal(_diag_sum(dataset), 47611377946.58586, decimal=4)
+
+    # Check one specific value in the dataframe
+    assert_almost_equal(
+        extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
+        -11211.578357345974,
+        decimal=6
+    )
+
+def test_u_nk_with_potential_energy():
+    """Test that the reduced potential is calculated correctly when the potential energy is given.
+
+    """
+
+    # Load dataset
+    dataset = load_water_particle_with_potential_energy()
+
+    # Check if the sum of values on the diagonal has the correct value
+    assert_almost_equal(_diag_sum(dataset), 16674041445589.646, decimal=2)
+
+    # Check one specific value in the dataframe
+    assert_almost_equal(
+        extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
+        -15656.558227621246,
+        decimal=6
+    )
+
+
+def test_u_nk_without_energy():
+    """Test that the reduced potential is calculated correctly when no energy is given.
+
+    """
+
+    # Load dataset
+    dataset = load_water_particle_without_energy()
+
+    # Check if the sum of values on the diagonal has the correct value
+    assert_almost_equal(_diag_sum(dataset), 20572988148877.555, decimal=2)
+
+    # Check one specific value in the dataframe
+    assert_almost_equal(
+        extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
+        0.0,
+        decimal=6
+    )
+
+
+def _diag_sum(dataset):
+    """Calculate the sum of diagonal elements (i, i)
+
+    """
+
+    # Initialize the sum variable
+    ds = 0.0
+
+    for leg in dataset['data']:
+        for filename in dataset['data'][leg]:
+            u_nk = extract_u_nk(filename, T=300)
+
+            # Calculate the sum of diagonal elements:
+            for i in range(len(dataset['data'][leg])):
+                ds += u_nk.iloc[i][i]
+
+    return ds
diff --git a/src/alchemlyb/tests/test_preprocessing.py b/src/alchemlyb/tests/test_preprocessing.py
@@ -100,7 +100,7 @@ def slicer(self, *args, **kwargs):
                                  (True, gmx_benzene_dHdl(), 2001),  # 0.00:  g = 1.0559445620585415
                                  (True, gmx_benzene_u_nk(), 2001),  # 'fep': g = 1.0560203916559594
                                  (False, gmx_benzene_dHdl(), 3789),
-                                 (False, gmx_benzene_u_nk(), 3789),
+                                 (False, gmx_benzene_u_nk(), 3571),
                              ])
     def test_conservative(self, data, size, conservative):
         sliced = self.slicer(data, series=data.iloc[:, 0], conservative=conservative)