Merge pull request #1 from hejamu/feature

Added functionality and some fixes
alchemistry · Nov 6, 2022 · 853ae2c · 853ae2c
2 parents b3e66c5 + 33ad6c5
commit 853ae2c
Show file tree

Hide file tree

Showing 10 changed files with 222 additions and 58 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # Flamel
 
-The aim of the project is to develop a foundation for a new version of [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis)
+The aim of this project is to develop the foundation for a new version of [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis)
 that uses the well tested [alchemlyb](https://github.com/alchemistry/alchemlyb) library.
 
 # Installation
@@ -18,8 +18,9 @@ git clone git@github.com:alchemistry/flamel.git
 # Usage
 Currently only Gromacs parser and uncorrelation by dH/dl is supported!
 ```
-usage: flamel.py [-h] [-t TEMPERATURE] [-p PREFIX] [-q SUFFIX] [-e ESTIMATORS]
-                 [-n UNCORR] [-r DECIMAL] [-o OUTPUT] [-a SOFTWARE]
+usage: flamel.py [-h] [-t TEMPERATURE] [-p PREFIX] [-d DATAFILE_DIRECTORY]
+                 [-q SUFFIX] [-e ESTIMATORS] [-n UNCORR] [-j RESULTFILENAME]
+                 [-u UNIT] [-r DECIMAL] [-o OUTPUT] [-a SOFTWARE]
                  [-s EQUILTIME]
 
 Collect data and estimate free energy differences
@@ -31,6 +32,9 @@ optional arguments:
   -p PREFIX, --prefix PREFIX
                         Prefix for datafile sets, i.e.'dhdl' (default).
                         (default: dhdl)
+  -d DATAFILE_DIRECTORY, --dir DATAFILE_DIRECTORY
+                        Directory in which data files are stored. Default:
+                        Current directory. (default: .)
   -q SUFFIX, --suffix SUFFIX
                         Suffix for datafile sets, i.e. 'xvg' (default).
                         (default: xvg)
@@ -43,6 +47,11 @@ optional arguments:
                         default) or 'dE'. In the latter case the energy
                         differences dE_{i,i+1} (dE_{i,i-1} for the last
                         lambda) are used. (default: dhdl)
+  -j RESULTFILENAME, --resultfilename RESULTFILENAME
+                        custom defined result filename prefix. Default:
+                        results (default: results)
+  -u UNIT, --unit UNIT  Unit to report energies: 'kJ', 'kcal', and 'kT'.
+                        Default: 'kJ' (default: kJ)
   -r DECIMAL, --decimal DECIMAL
                         The number of decimal places the free energies are to
                         be reported with. No worries, this is for the text
@@ -67,19 +76,41 @@ flamel.py -p lambda_
 
 You should get a similar overview as [alchemical-analysis](https://github.com/MobleyLab/alchemical-analysis).
 
+You also get a text file `results.txt` with the state overview, as well as a pickle file `results.pickle` with full precision values as well as complementary information about the analysis. 
+
+Example:
+```
+>>> import pandas as pd
+>>> data = pd.read_pickle('results.pickle')
+>>> data.
+data.dF                  data.datafile_directory  data.decimal             data.estimators          data.prefix              data.software            data.temperature         data.unit
+data.dFs                 data.ddFs                data.equiltime           data.output              data.resultfilename      data.suffix              data.uncorr              data.when_analyzed
+>>> data.when_analyzed
+'Wed Nov 11 15:22:32 2020'
+>>> data.equiltime
+0
+>>> data.software
+'Gromacs'
+>>> data.dF['TI']
+{'coul-lambda': (-15.633404527627823, 0.03466623342555742), 'vdw-lambda': (3.8237866774171514, 0.02952686840637163), 'total': (-11.809617850210671, 0.04553661930581169)}
+>>> data.dF['MBAR']['coul-lambda']
+(-15.617280704605726, 0.03241377327730135)
+>>> 
+```
+
 # How it works
 - Step 1: Read the necessary data
 - Step 2: Uncorrelate the data
 - Step 3: Estimate Free energy differences
 - Step 4: Output
 
-Each step is performed in Plugins which can easyly be be replaced by other plugins. 
+Each step is performed in Plugins which can easily be replaced by other plugins. 
 
 # Name
-In the tradition to associate free energy estimations with alchemnistry it's named after: [Nicolas Flamel](https://en.wikipedia.org/wiki/Nicolas_Flamel)
+In the tradition to associate free energy estimations with alchemy, the ancient craft of transmutating one element into another, it's named after: [Nicolas Flamel](https://en.wikipedia.org/wiki/Nicolas_Flamel).
 
-# Sate of development:
-Eventhoug alchemical-analysis is not fully covered by Flamel, it can already reproduce some results calculated using alchemical-analysis:
+# State of development
+Eventhough alchemical-analysis is not fully covered by Flamel, it can already reproduce some results calculated using alchemical-analysis:
 
 In fact for TI, BAR, MBAR you should get exactly the same results:
 
@@ -111,8 +142,12 @@ Alchemical Analysis with the same input files:
     TOTAL:      -29.154  +-  0.241    -29.067  +-  0.170    -29.074  +-  0.220
 ```
 
-# Planed features:
-- **Output of statistical inefficiencies**
+# Planned features
+- [ ] **plotting** 
+Add support for plotting the dHdls of states and the BAR/MBAR overlap matrix (preliminary feature in alchemlyb).
+- [x] **pickle and txt output**
+alchemical-analysis outputs the simple result table as a text file as well as the full precision calculations as a numpy-compatible pickle file.
+- [ ] **Output of statistical inefficiencies**
 alchemical-analysis offers information about the statistical inefficiencies of the input datasets.
-- **Uncorrelation threshold**
-In alchemical-analysis it is possible to specify a threshold for the number of samples to keep in the uncorrelation process.
+- [ ] **Uncorrelation threshold**
+In alchemical-analysis it is possible to specify a threshold for the number of samples to keep in the uncorrelation process.
diff --git a/estimator/bar.py b/estimator/bar.py
@@ -40,4 +40,4 @@ def get_plugin():
 #c29613d34ffafa133c3dc5a90a92ce3a84cbcd0c
 #03649d469383a55c305c1daa55de7792c88a22d3
 #2d3a3ffc3dcf66f311c5c03a8a3214c0d0158554
-#d38701718853261c7667ca50fcbe16ec501310b2
+#d38701718853261c7667ca50fcbe16ec501310b2
diff --git a/flamel.py b/flamel.py
@@ -18,7 +18,7 @@ def get_available_plugin_ids(type):
     if type == 'uncorrelate':
         return ['statistical_inefficiency_dhdl', 'statistical_inefficiency_dhdl_all']
     if type == 'output':
-        return ['simple', 'alchemical_analysis']
+        return ['simple', 'alchemical_analysis', 'pickle']
     if type == 'parser':
         return ['gmx']
 
@@ -90,14 +90,18 @@ def load_plugins(type, selected, *args):
 
 
 def main():
+
     parser = argparse.ArgumentParser(description="""
                     Collect data and estimate free energy differences
                     """, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument('-t', '--temperature', dest='temperature', help="Temperature in K. Default: 298 K.", default=298.0, type=float)
     parser.add_argument('-p', '--prefix', dest='prefix', help='Prefix for datafile sets, i.e.\'dhdl\' (default).', default='dhdl')
+    parser.add_argument('-d', '--dir', dest = 'datafile_directory', help = 'Directory in which data files are stored. Default: Current directory.', default = '.')
     parser.add_argument('-q', '--suffix', dest='suffix', help='Suffix for datafile sets, i.e. \'xvg\' (default).', default='xvg')
     parser.add_argument('-e', dest='estimators', type=str, default=None, help="Comma separated Estimator methods")
     parser.add_argument('-n', '--uncorr', dest='uncorr', help='The observable to be used for the autocorrelation analysis; either \'dhdl_all\' (obtained as a sum over all energy components) or \'dhdl\' (obtained as a sum over those energy components that are changing; default) or \'dE\'. In the latter case the energy differences dE_{i,i+1} (dE_{i,i-1} for the last lambda) are used.', default='dhdl')
+    parser.add_argument('-j', '--resultfilename', dest = 'resultfilename', help = 'custom defined result filename prefix. Default: results', default = 'results')
+    parser.add_argument('-u', '--unit', dest = 'unit', help = 'Unit to report energies: \'kJ\', \'kcal\', and \'kT\'. Default: \'kJ\'', default = 'kJ')
     parser.add_argument('-r', '--decimal', dest='decimal', help='The number of decimal places the free energies are to be reported with. No worries, this is for the text output only; the full-precision data will be stored in \'results.pickle\'. Default: 3.', default=3, type=int)
     parser.add_argument('-o', '--output', dest='output', type=str, default=None, help="Output methods")
     parser.add_argument('-a', '--software', dest='software', help='Package\'s name the data files come from: Gromacs, Sire, Desmond, or AMBER. Default: Gromacs.', default='Gromacs')
@@ -122,9 +126,9 @@ def main():
     dhdls = None
     u_nks = None
     if do_dhdl:
-        dhdls = parser.get_dhdls()
+        dhdls = parser.get_dhdls(args)
     if do_u_nks:
-        u_nks = parser.get_u_nks()
+        u_nks = parser.get_u_nks(args)
 
     # Step 2: Uncorrelate the data
     if uncorrelator.needs_dhdls:

diff --git a/output/alchemical_analysis.py b/output/alchemical_analysis.py
@@ -1,9 +1,8 @@
 import numpy as np
-
+import alchemlyb.postprocessors.units as units
 
 class AlchemicalAnalysis:
-    name = 'alchemical-analysis'
-    k_b = 8.3144621E-3
+    name = 'alchemical_analysis'
 
     @classmethod
     def lenr(cls, text, l=21):
@@ -128,9 +127,8 @@ def output(self,  estimators, args):
             Lambdas
         :return:
         """
-        t = args.temperature
+
         seglen = 2 * args.decimal + 15
-        beta = 1.0 / t / self.k_b
         out = ''
         segments = self.segments(estimators)
 
@@ -143,7 +141,7 @@ def output(self,  estimators, args):
         # Labels
         out += self.lenc('States', 12)
         for estimator in estimators:
-            out += self.lenr(estimator.name + ' (kJ/mol)' + ' '*args.decimal, seglen)
+            out += self.lenr(estimator.name + ' (' + args.unit + ')' + ' '*args.decimal, seglen)
         out += "\n"
 
         # Second ----
@@ -157,11 +155,11 @@ def output(self,  estimators, args):
             out += self.lenc(str(i) + ' -- ' + str(i+1), 12)
 
             for estimator in estimators:
-                df = estimator.delta_f
-                ddf = estimator.d_delta_f
+                df = units.get_unit_converter(args.unit)(estimator.delta_f)
+                ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f)
                 out += self.lenr('%s  +-  %s' % (
-                    self.prepare_value(df.values[i, i+1] / beta, args.decimal),
-                    self.prepare_value(ddf.values[i, i+1] / beta, args.decimal)
+                    self.prepare_value(df.values[i, i+1], args.decimal),
+                    self.prepare_value(ddf.values[i, i+1], args.decimal)
                 ), seglen)
             out += "\n"
 
@@ -175,26 +173,28 @@ def output(self,  estimators, args):
             # Segment Energies
             out += self.lenr('%s:  ' % l_name[:-7], 12)
             for estimator in estimators:
-                df = estimator.delta_f
-                ddf = estimator.d_delta_f
+                df = units.get_unit_converter(args.unit)(estimator.delta_f)
+                ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f)
                 out += self.lenr('%s  +-  %s' % (
-                    self.prepare_value(df.values[segstart, segend] / beta, args.decimal),
-                    self.prepare_value(ddf.values[segstart, segend] / beta, args.decimal)
+                    self.prepare_value(df.values[segstart, segend], args.decimal),
+                    self.prepare_value(ddf.values[segstart, segend], args.decimal)
                 ), seglen)
             out += "\n"
 
         # TOTAL Energies
         out += self.lenr('TOTAL:  ', 12)
         for estimator in estimators:
-            df = estimator.delta_f
-            ddf = estimator.d_delta_f
+            df = units.get_unit_converter(args.unit)(estimator.delta_f)
+            ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f)
             out += self.lenr('%s  +-  %s' % (
-                self.prepare_value(df.values[0, -1] / beta, args.decimal),
-                self.prepare_value(ddf.values[0, -1] / beta, args.decimal)
+                self.prepare_value(df.values[0, -1], args.decimal),
+                self.prepare_value(ddf.values[0, -1], args.decimal)
             ), seglen)
         out += "\n"
-
         print(out)
+        txt_file = open(args.resultfilename + '.txt', 'w')
+        txt_file.write(out)
+        txt_file.close()
 
 
 def get_plugin():

diff --git a/output/pickle.py b/output/pickle.py
@@ -0,0 +1,118 @@
+import numpy as np
+import pickle
+import time
+import os
+
+import alchemlyb.postprocessors.units as units
+
+class Pickle:
+    name = 'pickle'
+
+    @classmethod
+    def ls(cls, estimators):
+        """
+        Return a list of lambda values
+        :param estimators: Series
+            List of estimator plugins
+        :return:
+            The list of lambda values
+        """
+        ls = []
+        if estimators:
+            if estimators[0].needs_dhdls:
+                means = estimators[0].dhdls.mean(level=estimators[0].dhdls.index.names[1:])
+                ls = np.array(means.reset_index()[means.index.names[:]])
+            elif estimators[0].needs_u_nks:
+                means = estimators[0].u_nks.mean(level=estimators[0].u_nks.index.names[1:])
+                ls = np.array(means.reset_index()[means.index.names[:]])
+
+        return ls
+
+    @classmethod
+    def l_types(cls, estimators):
+        """
+        Return a list of lambda types
+        :param estimators: Series
+            List of estimator plugins
+        :return:
+            The list of lambda types
+        """
+        l_types = []
+        if estimators:
+            if estimators[0].needs_dhdls:
+                l_types = estimators[0].dhdls.index.names[1:]
+            elif estimators[0].needs_u_nks:
+                l_types = estimators[0].u_nks.index.names[1:]
+
+        return l_types
+
+    @classmethod
+    def segments(cls, estimators):
+        """
+        Collect and prepare values from different `estimators` into a series of values.
+         :param estimators: Series
+            List of estimator plugins
+        :return:
+            Segments of values to output
+        """
+        segments = []
+        l_types = cls.l_types(estimators)
+        ls = cls.ls(estimators)
+        if estimators:
+            segstart = 0
+            ill = [0] * len(l_types)
+            nl = 0
+            for i in range(len(ls)):
+                l = ls[i]
+                if (i < len(ls) - 1 and list(np.array(ls[i + 1], dtype=bool)).count(True) > nl) or i == len(ls) - 1:
+                    if nl > 0:
+                        inl = np.array(np.array(l, dtype=bool), dtype=int)
+                        l_name = l_types[list(inl - ill).index(1)]
+                        ill = inl
+                        segments.append((segstart, i, l_name))
+
+                    if i + 1 < len(ls):
+                        nl = list(np.array(ls[i + 1], dtype=bool)).count(True)
+                    segstart = i
+        return segments
+
+    def output(self,  estimators, args):
+
+        P = args
+
+        P.datafile_directory = os.getcwd()
+        P.when_analyzed = time.asctime()
+        P.dFs = {}
+        P.ddFs = {}
+        P.dF = {}
+
+        segments = self.segments(estimators)
+
+        for estimator in estimators:
+
+            data = {}
+
+            df = units.get_unit_converter(args.unit)(estimator.delta_f)
+            ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f)
+
+            for segstart, segend, l_name in reversed(segments):
+                data[l_name] = (df.values[segstart, segend],
+                        ddf.values[segstart, segend])
+
+            data['total'] = (df.values[0, -1], ddf.values[0, -1])
+
+            P.dFs[estimator.name] = df
+            P.ddFs[estimator.name] = ddf
+
+            P.dF[estimator.name] = data
+
+        pickle.dump(P, open(args.resultfilename + '.pickle', 'wb'))
+
+
+def get_plugin():
+    """
+    Get simple output plugin
+    :return:
+        simple output plugin
+    """
+    return Pickle()
diff --git a/output/simple.py b/output/simple.py
@@ -1,11 +1,7 @@
-import alchemlyb.preprocessing
-import pandas
-import numpy as np
-
+import alchemlyb.postprocessors.units as units
 
 class Simple:
     name = 'simple'
-    k_b = 8.3144621E-3
 
     def output(self,  estimators, args):
         """
@@ -18,13 +14,13 @@ def output(self,  estimators, args):
             Lambdas
         :return:
         """
-        t = args.temperature
+
         for estimator in estimators:
-            df = estimator.delta_f
-            ddf = estimator.d_delta_f
-            beta = 1.0 / t / self.k_b
-            dfv = df.values[0, -1] / beta
-            ddfv = ddf.values[0, -1] / beta
+            df = units.get_unit_converter(args.unit)(estimator.delta_f)
+            ddf = units.get_unit_converter(args.unit)(estimator.d_delta_f)
+
+            dfv = df.values[0, -1]
+            ddfv = ddf.values[0, -1]
             print("%s: %f +- %f" % (estimator.name, dfv, ddfv))