From cd2ff92b6a72c1f545e8ee9b5aeff1be26e50d9f Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 20:53:01 +0100 Subject: [PATCH 01/18] Create convergence.py --- src/alchemlyb/postprocessors/convergence.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 src/alchemlyb/postprocessors/convergence.py diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py new file mode 100644 index 00000000..e69de29b From aa3054e8b4d64fadc447f95b0f3e19a3267b06fe Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 20:53:21 +0100 Subject: [PATCH 02/18] Update convergence.py --- src/alchemlyb/postprocessors/convergence.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index e69de29b..618d76eb 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -0,0 +1,2 @@ +def test(): + pass \ No newline at end of file From ef21b2d9d6a2343538e1faee00bfb9375a627b77 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sat, 25 Sep 2021 21:14:59 +0100 Subject: [PATCH 03/18] Update convergence.py --- src/alchemlyb/postprocessors/convergence.py | 139 +++++++++++++++++++- 1 file changed, 137 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 618d76eb..641685b9 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -1,2 +1,137 @@ -def test(): - pass \ No newline at end of file +import pandas as pd +def forward_backward_convergence(df_list, estimator='mbar', num=10): + ''' The forward and backward convergence of the free energy estimate. + + Generate the free energy change as a function of time in both + directions, with the specified number of points in the time. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk. + estimator : {'mbar', 'bar', 'ti'} + Name of the estimators. + num : int + The number of time points. + + Returns + ------- + DataFrame + The DataFrame with convergence data. :: + Forward F. Error Backward B. Error + 0 33.988935 0.334676 35.666128 0.324426 + 1 35.075489 0.232150 35.382850 0.230944 + 2 34.919988 0.190424 35.156028 0.189489 + 3 34.929927 0.165316 35.242255 0.164400 + 4 34.957007 0.147852 35.247704 0.147191 + 5 35.003660 0.134952 35.214658 0.134458 + 6 35.070199 0.124956 35.178422 0.124664 + 7 35.019853 0.116970 35.096870 0.116783 + 8 35.035123 0.110147 35.225907 0.109742 + 9 35.113417 0.104280 35.113417 0.104280 + + ''' + self.logger.info('Start convergence analysis.') + self.logger.info('Check data availability.') + + try: + dHdl_list = self.dHdl_sample_list + self.logger.info('Subsampled dHdl is available.') + except AttributeError: + try: + dHdl_list = self.dHdl_list + self.logger.info('Subsampled dHdl not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('dHdl is not available.') + + try: + u_nk_list = self.u_nk_sample_list + self.logger.info('Subsampled u_nk is available.') + except AttributeError: + try: + u_nk_list = self.u_nk_list + self.logger.info('Subsampled u_nk not available, ' + 'use original data instead.') + except AttributeError: # pragma: no cover + self.logger.warning('u_nk is not available.') + + if estimator.lower() == 'mbar': + self.logger.info('Use MBAR estimator for convergence analysis.') + estimator_fit = MBAR().fit + elif estimator.lower() == 'bar': + self.logger.info('Use BAR estimator for convergence analysis.') + estimator_fit = BAR().fit + elif estimator.lower() == 'ti': + self.logger.info('Use TI estimator for convergence analysis.') + estimator_fit = TI().fit + else: # pragma: no cover + self.logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + converter = get_unit_converter(self.units) + + self.logger.info('Begin forward analysis') + forward_list = [] + forward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[:len(data) // forwrev * i]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[:len(data) // forwrev * i]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) + sample = concat(sample) + result = estimator_fit(sample) + forward_list.append(converter(result.delta_f_).iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + forward_error_list.append(error) + else: + forward_error_list.append(converter(result.d_delta_f_).iloc[ + 0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) + + self.logger.info('Begin backward analysis') + backward_list = [] + backward_error_list = [] + for i in range(1, forwrev + 1): + self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + sample = [] + if estimator.lower() in ['mbar', 'bar']: + for data in u_nk_list: + sample.append(data[-len(data) // forwrev * i:]) + elif estimator.lower() == 'ti': + for data in dHdl_list: + sample.append(data[-len(data) // forwrev * i:]) + else: # pragma: no cover + raise NameError( + '{} is not a valid estimator.'.format(estimator)) + sample = concat(sample) + result = estimator_fit(sample) + backward_list.append(converter(result.delta_f_).iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + backward_error_list.append(error) + else: + backward_error_list.append(converter( + result.d_delta_f_).iloc[0, -1]) + self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[ + -1])) + + convergence = pd.DataFrame( + {'Forward ({})'.format(self.units): forward_list, + 'F. Error ({})'.format(self.units): forward_error_list, + 'Backward ({})'.format(self.units): backward_list, + 'B. Error ({})'.format(self.units): backward_error_list}) \ No newline at end of file From 7238298fd978a294b3d1e64fe6cb069cd4e1cb85 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 12:10:50 +0100 Subject: [PATCH 04/18] update --- src/alchemlyb/postprocessors/__init__.py | 3 + src/alchemlyb/postprocessors/convergence.py | 111 +++++++------------- src/alchemlyb/tests/test_convergence.py | 38 +++++++ 3 files changed, 81 insertions(+), 71 deletions(-) create mode 100644 src/alchemlyb/tests/test_convergence.py diff --git a/src/alchemlyb/postprocessors/__init__.py b/src/alchemlyb/postprocessors/__init__.py index 6e769ac4..563c2196 100644 --- a/src/alchemlyb/postprocessors/__init__.py +++ b/src/alchemlyb/postprocessors/__init__.py @@ -1,3 +1,6 @@ +from .convergence import forward_backward_convergence + __all__ = [ 'units', + 'forward_backward_convergence' ] diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 641685b9..7ecc07ea 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -1,4 +1,10 @@ import pandas as pd +import logging +import numpy as np + +from ..estimators import MBAR, BAR, TI +from .. import concat + def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. @@ -31,107 +37,70 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 9 35.113417 0.104280 35.113417 0.104280 ''' - self.logger.info('Start convergence analysis.') - self.logger.info('Check data availability.') - - try: - dHdl_list = self.dHdl_sample_list - self.logger.info('Subsampled dHdl is available.') - except AttributeError: - try: - dHdl_list = self.dHdl_list - self.logger.info('Subsampled dHdl not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('dHdl is not available.') - - try: - u_nk_list = self.u_nk_sample_list - self.logger.info('Subsampled u_nk is available.') - except AttributeError: - try: - u_nk_list = self.u_nk_list - self.logger.info('Subsampled u_nk not available, ' - 'use original data instead.') - except AttributeError: # pragma: no cover - self.logger.warning('u_nk is not available.') + logger = logging.getLogger('alchemlyb.postprocessors.' + 'forward_backward_convergence') + logger.info('Start convergence analysis.') + logger.info('Check data availability.') if estimator.lower() == 'mbar': - self.logger.info('Use MBAR estimator for convergence analysis.') + logger.info('Use MBAR estimator for convergence analysis.') estimator_fit = MBAR().fit elif estimator.lower() == 'bar': - self.logger.info('Use BAR estimator for convergence analysis.') + logger.info('Use BAR estimator for convergence analysis.') estimator_fit = BAR().fit elif estimator.lower() == 'ti': - self.logger.info('Use TI estimator for convergence analysis.') + logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit else: # pragma: no cover - self.logger.warning( + logger.warning( '{} is not a valid estimator.'.format(estimator)) - converter = get_unit_converter(self.units) - - self.logger.info('Begin forward analysis') + logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Forward analysis: {:.2f}%'.format(i / forwrev)) + for i in range(1, num + 1): + logger.info('Forward analysis: {:.2f}%'.format(i / num)) sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[:len(data) // forwrev * i]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[:len(data) // forwrev * i]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) + for data in df_list: + sample.append(data[:len(data) // num * i]) sample = concat(sample) result = estimator_fit(sample) - forward_list.append(converter(result.delta_f_).iloc[0, -1]) + forward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + [result.d_delta_f_.iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) forward_error_list.append(error) else: - forward_error_list.append(converter(result.d_delta_f_).iloc[ - 0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], - forward_error_list[-1])) + forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) - self.logger.info('Begin backward analysis') + logger.info('Begin backward analysis') backward_list = [] backward_error_list = [] - for i in range(1, forwrev + 1): - self.logger.info('Backward analysis: {:.2f}%'.format(i / forwrev)) + for i in range(1, num + 1): + logger.info('Backward analysis: {:.2f}%'.format(i / num)) sample = [] - if estimator.lower() in ['mbar', 'bar']: - for data in u_nk_list: - sample.append(data[-len(data) // forwrev * i:]) - elif estimator.lower() == 'ti': - for data in dHdl_list: - sample.append(data[-len(data) // forwrev * i:]) - else: # pragma: no cover - raise NameError( - '{} is not a valid estimator.'.format(estimator)) + for data in df_list: + sample.append(data[-len(data) // num * i:]) sample = concat(sample) result = estimator_fit(sample) - backward_list.append(converter(result.delta_f_).iloc[0, -1]) + backward_list.append(result.delta_f_.iloc[0, -1]) if estimator.lower() == 'bar': error = np.sqrt(sum( - [converter(result.d_delta_f_).iloc[i, i + 1] ** 2 + [result.d_delta_f_.iloc[i, i + 1] ** 2 for i in range(len(result.d_delta_f_) - 1)])) backward_error_list.append(error) else: - backward_error_list.append(converter( - result.d_delta_f_).iloc[0, -1]) - self.logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], - backward_error_list[ - -1])) + backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[-1])) convergence = pd.DataFrame( - {'Forward ({})'.format(self.units): forward_list, - 'F. Error ({})'.format(self.units): forward_error_list, - 'Backward ({})'.format(self.units): backward_list, - 'B. Error ({})'.format(self.units): backward_error_list}) \ No newline at end of file + {'Forward': forward_list, + 'F. Error': forward_error_list, + 'Backward': backward_list, + 'B. Error': backward_error_list}) + convergence.attrs = df_list[0].attrs + return convergence \ No newline at end of file diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py new file mode 100644 index 00000000..2f5df840 --- /dev/null +++ b/src/alchemlyb/tests/test_convergence.py @@ -0,0 +1,38 @@ +import pytest + +from alchemtest.gmx import load_benzene +from alchemlyb.parsing import gmx +from alchemlyb.postprocessors import forward_backward_convergence + +@pytest.fixture() +def gmx_benzene(): + dataset = load_benzene() + return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset['data']['Coulomb']], \ + [gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset['data']['Coulomb']] + +def test_convergence_ti(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(dHdl, 'TI') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.07, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.11, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.09, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.09, 0.01) + +def test_convergence_mbar(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(u_nk, 'MBAR') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) + +def test_convergence_bar(gmx_benzene): + dHdl, u_nk = gmx_benzene + convergence = forward_backward_convergence(u_nk, 'BAR') + assert convergence.shape == (10, 4) + assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) + assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) + assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) + assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) From 8ad6bb2f198772812f41d0b274a564135e4f57c6 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 12:40:55 +0100 Subject: [PATCH 05/18] update --- docs/postprocessing.rst | 28 +++++++++++++++++++ ...chemlyb.visualisation.plot_convergence.rst | 10 +++++-- src/alchemlyb/postprocessors/convergence.py | 2 +- src/alchemlyb/tests/test_visualisation.py | 9 ++++++ src/alchemlyb/visualisation/convergence.py | 21 ++++++++++++-- 5 files changed, 63 insertions(+), 7 deletions(-) diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index d7451108..87f28b61 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -5,6 +5,34 @@ Tools for postprocessing Tools are available for postprocessing the dataframes. +Time Convergence +---------------- +One way of determining the simulation end point is to compute and plot the +forward and backward convergence of the estimate using +:func:`~alchemlyb.postprocessors.forward_backward_convergence` and +:func:`~alchemlyb.visualisation.plot_convergence`. :: + + >>> import pandas as pd + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.estimators import MBAR + >>> from alchemlyb.visualisation import plot_convergence + >>> from alchemlyb.postprocessors import forward_backward_convergence + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = forward_backward_convergence(data_list, 'mbar') + >>> ax = plot_convergence(dataframe=df) + >>> ax.figure.savefig('dF_t.pdf') + +Will give a plot looks like this + +.. figure:: images/dF_t.png + + A convergence plot of showing that the forward and backward has converged + fully. + +.. autofunction:: alchemlyb.postprocessors.forward_backward_convergence Unit Conversion --------------- diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index ea532ea2..c58a7d90 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -5,9 +5,13 @@ Plot the Forward and Backward Convergence The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change -computed using the equilibrated snapshots between the proper target time frames -in both forward (data points are stored in `forward` and `forward_error`) and -reverse (data points are stored in `backward` and `backward_error`) directions. +computed using the equilibrated snapshots between the proper target time +frames. The data could be provided as a Dataframe as the output from +:func:`alchemlyb.postprocessors.forward_backward_convergence` or provided +explicitly in both forward (data points are stored in `forward` and +`forward_error`) and reverse (data points are stored in `backward` and +`backward_error`) directions. + The unit in the y axis could be labelled to other units by setting *units*, which by default is :math:`kT`. The user can pass :class:`matplotlib.axes.Axes` into the function to have the convergence drawn on a specific axes. diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 7ecc07ea..d0c9523d 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -103,4 +103,4 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 'Backward': backward_list, 'B. Error': backward_error_list}) convergence.attrs = df_list[0].attrs - return convergence \ No newline at end of file + return convergence diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index f45dac77..32d8db12 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -12,6 +12,7 @@ from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation import plot_convergence +from alchemlyb.postprocessors import forward_backward_convergence def test_plot_mbar_omatrix(): '''Just test if the plot runs''' @@ -126,6 +127,14 @@ def test_plot_dF_state(): assert isinstance(fig, matplotlib.figure.Figure) plt.close(fig) +def test_plot_convergence_dataframe(): + bz = load_benzene().data + data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + df = forward_backward_convergence(data_list, 'mbar') + ax = plot_convergence(dataframe=df) + assert isinstance(ax, matplotlib.axes.Axes) + plt.close(ax.figure) + def test_plot_convergence(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index e93bfe8a..823787b9 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -2,7 +2,10 @@ from matplotlib.font_manager import FontProperties as FP import numpy as np -def plot_convergence(forward, forward_error, backward, backward_error, +from ..postprocessors.units import get_unit_converter + +def plot_convergence(forward=None, forward_error=None, backward=None, + backward_error=None, dataframe=None, units='kT', ax=None): """Plot the forward and backward convergence. @@ -16,6 +19,11 @@ def plot_convergence(forward, forward_error, backward, backward_error, A list of free energy estimate from the last X% of data. backward_error : List A list of error from the last X% of data. + dataframe : Dataframe + Output Dataframe from + :func:`~alchemlyb.postprocessors.forward_backward_convergence`. If + Dataframe is provided, `forward`, `forward_error`, `backward`, + `backward_error` will be ignored. units : str The label for the unit of the estimate. Default: "kT" ax : matplotlib.axes.Axes @@ -32,12 +40,19 @@ def plot_convergence(forward, forward_error, backward, backward_error, The code is taken and modified from `Alchemical Analysis `_. - The units variable is for labelling only. Changing it doesn't change the - unit of the underlying variable. + If `dataframe` is not provide, the units variable is for labelling only. + Changing it doesn't change the unit of the underlying variable. .. versionadded:: 0.4.0 """ + if dataframe is not None: + dataframe = get_unit_converter(units)(dataframe) + forward = dataframe['Forward'].to_numpy() + forward_error = dataframe['F. Error'].to_numpy() + backward = dataframe['Backward'].to_numpy() + backward_error = dataframe['B. Error'].to_numpy() + if ax is None: # pragma: no cover fig, ax = plt.subplots(figsize=(8, 6)) From e87867fa1facc181aaed5833b953f106a576c95a Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 13:43:20 +0100 Subject: [PATCH 06/18] update doc --- docs/postprocessing.rst | 2 -- src/alchemlyb/postprocessors/convergence.py | 5 ++++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index 87f28b61..55915280 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -12,10 +12,8 @@ forward and backward convergence of the estimate using :func:`~alchemlyb.postprocessors.forward_backward_convergence` and :func:`~alchemlyb.visualisation.plot_convergence`. :: - >>> import pandas as pd >>> from alchemtest.gmx import load_benzene >>> from alchemlyb.parsing.gmx import extract_u_nk - >>> from alchemlyb.estimators import MBAR >>> from alchemlyb.visualisation import plot_convergence >>> from alchemlyb.postprocessors import forward_backward_convergence diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index d0c9523d..01c891b8 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -24,7 +24,8 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ------- DataFrame The DataFrame with convergence data. :: - Forward F. Error Backward B. Error + + Forward F. Error Backward B. Error 0 33.988935 0.334676 35.666128 0.324426 1 35.075489 0.232150 35.382850 0.230944 2 34.919988 0.190424 35.156028 0.189489 @@ -36,6 +37,8 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 8 35.035123 0.110147 35.225907 0.109742 9 35.113417 0.104280 35.113417 0.104280 + + ''' logger = logging.getLogger('alchemlyb.postprocessors.' 'forward_backward_convergence') From 7d2b936c0e221c45130d6b5e6b511f19a9c2e0e8 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 26 Sep 2021 13:51:43 +0100 Subject: [PATCH 07/18] update doc --- docs/visualisation/alchemlyb.visualisation.plot_convergence.rst | 2 +- src/alchemlyb/postprocessors/convergence.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index c58a7d90..74a85e89 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -6,7 +6,7 @@ Plot the Forward and Backward Convergence The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change computed using the equilibrated snapshots between the proper target time -frames. The data could be provided as a Dataframe as the output from +frames. The data could be provided as a Dataframe from :func:`alchemlyb.postprocessors.forward_backward_convergence` or provided explicitly in both forward (data points are stored in `forward` and `forward_error`) and reverse (data points are stored in `backward` and diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py index 01c891b8..efc3b32f 100644 --- a/src/alchemlyb/postprocessors/convergence.py +++ b/src/alchemlyb/postprocessors/convergence.py @@ -37,8 +37,6 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): 8 35.035123 0.110147 35.225907 0.109742 9 35.113417 0.104280 35.113417 0.104280 - - ''' logger = logging.getLogger('alchemlyb.postprocessors.' 'forward_backward_convergence') From 36f9adc73aea833e6432ef689d2be7ed83387628 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 4 Oct 2021 21:25:07 +0100 Subject: [PATCH 08/18] update --- docs/api_principles.rst | 6 +- docs/convergence.rst | 34 ++++++ docs/index.rst | 1 + docs/postprocessing.rst | 26 ----- src/alchemlyb/convergence/__init__.py | 1 + src/alchemlyb/convergence/convergence.py | 113 ++++++++++++++++++++ src/alchemlyb/postprocessors/__init__.py | 3 - src/alchemlyb/postprocessors/convergence.py | 107 ------------------ src/alchemlyb/tests/test_convergence.py | 2 +- src/alchemlyb/tests/test_visualisation.py | 4 +- src/alchemlyb/visualisation/convergence.py | 34 +++--- 11 files changed, 172 insertions(+), 159 deletions(-) create mode 100644 docs/convergence.rst delete mode 100644 src/alchemlyb/postprocessors/convergence.py diff --git a/docs/api_principles.rst b/docs/api_principles.rst index d629b50c..492ed944 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -57,7 +57,8 @@ The library is structured as follows, following a similar style to │   └── ... ├── postprocessors │   ├── ... - │   └── units.py + │   └── convergence.py + │   └── units.py ├── visualisation │   ├── convergence.py │   ├── dF_state.py @@ -83,8 +84,7 @@ The :mod:`~alchemlyb.estimators` module features classes *a la* **scikit-learn** MBAR, BAR, and thermodynamic integration (TI) as the major methods are all implemented. Correct error estimates require the use of time series with independent samples. -The :mod:`~alchemlyb.convergence` submodule will feature convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator, though the form of this is not yet thought-out. -However, the `gist a41e5756a58e1775e3e3a915f07bfd37`_ shows an example for how this can be done already in practice. +The :mod:`~alchemlyb.convergence` submodule will feature convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. The :mod:`~alchemlyb.postprocessing` submodule contains functions to calculate new quantities or express data in different units. diff --git a/docs/convergence.rst b/docs/convergence.rst new file mode 100644 index 00000000..be20a657 --- /dev/null +++ b/docs/convergence.rst @@ -0,0 +1,34 @@ +Using functions to estimate Convergence +======================================= + +For a result to be valid, we need to ensure that longer simulation time +would not result in different results. Various functions are provided in +this module to estimate the convergence of the data and help user determine +the simulation end point. + +Time Convergence +---------------- +One way of determining the simulation end point is to compute and plot the +forward and backward convergence of the estimate using +:func:`~alchemlyb.convergence.forward_backward_convergence` and +:func:`~alchemlyb.visualisation.plot_convergence`. :: + + >>> from alchemtest.gmx import load_benzene + >>> from alchemlyb.parsing.gmx import extract_u_nk + >>> from alchemlyb.visualisation import plot_convergence + >>> from alchemlyb.convergence import forward_backward_convergence + + >>> bz = load_benzene().data + >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] + >>> df = forward_backward_convergence(data_list, 'mbar') + >>> ax = plot_convergence(dataframe=df) + >>> ax.figure.savefig('dF_t.pdf') + +Will give a plot looks like this + +.. figure:: images/dF_t.png + + A convergence plot of showing that the forward and backward has converged + fully. + +.. autofunction:: alchemlyb.convergence.forward_backward_convergence \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index dfcabf6a..f9b36240 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -70,6 +70,7 @@ Contributions are very welcome. If you have bug reports or feature requests or q parsing preprocessing estimators + convergence postprocessing visualisation diff --git a/docs/postprocessing.rst b/docs/postprocessing.rst index 55915280..d7451108 100644 --- a/docs/postprocessing.rst +++ b/docs/postprocessing.rst @@ -5,32 +5,6 @@ Tools for postprocessing Tools are available for postprocessing the dataframes. -Time Convergence ----------------- -One way of determining the simulation end point is to compute and plot the -forward and backward convergence of the estimate using -:func:`~alchemlyb.postprocessors.forward_backward_convergence` and -:func:`~alchemlyb.visualisation.plot_convergence`. :: - - >>> from alchemtest.gmx import load_benzene - >>> from alchemlyb.parsing.gmx import extract_u_nk - >>> from alchemlyb.visualisation import plot_convergence - >>> from alchemlyb.postprocessors import forward_backward_convergence - - >>> bz = load_benzene().data - >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] - >>> df = forward_backward_convergence(data_list, 'mbar') - >>> ax = plot_convergence(dataframe=df) - >>> ax.figure.savefig('dF_t.pdf') - -Will give a plot looks like this - -.. figure:: images/dF_t.png - - A convergence plot of showing that the forward and backward has converged - fully. - -.. autofunction:: alchemlyb.postprocessors.forward_backward_convergence Unit Conversion --------------- diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index e69de29b..6469cee2 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -0,0 +1 @@ +from convergence import forward_backward_convergence diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index e69de29b..8a4124c4 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -0,0 +1,113 @@ +import pandas as pd +import logging +import numpy as np + +from ..estimators import MBAR, BAR, TI +from .. import concat + + +def forward_backward_convergence(df_list, estimator='mbar', num=10): + ''' The forward and backward convergence of the free energy estimate. + + Generate the free energy change as a function of time in both + directions, with the specified number of points in the time. + + Parameters + ---------- + df_list : list + List of DataFrame of either dHdl or u_nk. + estimator : {'mbar', 'bar', 'ti'} + Name of the estimators. + num : int + The number of time points. + + Returns + ------- + DataFrame + The DataFrame with convergence data. :: + + Forward Forward_Error Backward Backward_Error + t_fraction + 1/10 3.067943 0.070175 3.111035 0.067088 + 2/10 3.122223 0.049303 3.126450 0.048173 + 3/10 3.117742 0.039916 3.094115 0.039099 + 4/10 3.091870 0.034389 3.101558 0.033783 + 5/10 3.093778 0.030814 3.082714 0.030148 + 6/10 3.079128 0.027999 3.085972 0.027652 + 7/10 3.086951 0.025847 3.077004 0.025610 + 8/10 3.079147 0.024122 3.081519 0.023968 + 9/10 3.086575 0.022778 3.090475 0.022633 + 10/10 3.088821 0.021573 3.089027 0.021568 + + + .. versionadded:: 0.6.0 + ''' + logger = logging.getLogger('alchemlyb.postprocessors.' + 'forward_backward_convergence') + logger.info('Start convergence analysis.') + logger.info('Check data availability.') + + if estimator.lower() == 'mbar': + logger.info('Use MBAR estimator for convergence analysis.') + estimator_fit = MBAR().fit + elif estimator.lower() == 'bar': + logger.info('Use BAR estimator for convergence analysis.') + estimator_fit = BAR().fit + elif estimator.lower() == 'ti': + logger.info('Use TI estimator for convergence analysis.') + estimator_fit = TI().fit + else: # pragma: no cover + logger.warning( + '{} is not a valid estimator.'.format(estimator)) + + logger.info('Begin forward analysis') + forward_list = [] + forward_error_list = [] + for i in range(1, num + 1): + logger.info('Forward analysis: {:.2f}%'.format(i / num)) + sample = [] + for data in df_list: + sample.append(data[:len(data) // num * i]) + sample = concat(sample) + result = estimator_fit(sample) + forward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + forward_error_list.append(error) + else: + forward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], + forward_error_list[-1])) + + logger.info('Begin backward analysis') + backward_list = [] + backward_error_list = [] + for i in range(1, num + 1): + logger.info('Backward analysis: {:.2f}%'.format(i / num)) + sample = [] + for data in df_list: + sample.append(data[-len(data) // num * i:]) + sample = concat(sample) + result = estimator_fit(sample) + backward_list.append(result.delta_f_.iloc[0, -1]) + if estimator.lower() == 'bar': + error = np.sqrt(sum( + [result.d_delta_f_.iloc[i, i + 1] ** 2 + for i in range(len(result.d_delta_f_) - 1)])) + backward_error_list.append(error) + else: + backward_error_list.append(result.d_delta_f_.iloc[0, -1]) + logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], + backward_error_list[-1])) + + convergence = pd.DataFrame( + {'Forward': forward_list, + 'Forward_Error': forward_error_list, + 'Backward': backward_list, + 'Backward_Error': backward_error_list}, + index=['{}/{}'.format(i, num) for i in range(1, num + 1)]) + convergence.index.name = 't_fraction' + convergence.attrs = df_list[0].attrs + return convergence diff --git a/src/alchemlyb/postprocessors/__init__.py b/src/alchemlyb/postprocessors/__init__.py index 563c2196..6e769ac4 100644 --- a/src/alchemlyb/postprocessors/__init__.py +++ b/src/alchemlyb/postprocessors/__init__.py @@ -1,6 +1,3 @@ -from .convergence import forward_backward_convergence - __all__ = [ 'units', - 'forward_backward_convergence' ] diff --git a/src/alchemlyb/postprocessors/convergence.py b/src/alchemlyb/postprocessors/convergence.py deleted file mode 100644 index efc3b32f..00000000 --- a/src/alchemlyb/postprocessors/convergence.py +++ /dev/null @@ -1,107 +0,0 @@ -import pandas as pd -import logging -import numpy as np - -from ..estimators import MBAR, BAR, TI -from .. import concat - -def forward_backward_convergence(df_list, estimator='mbar', num=10): - ''' The forward and backward convergence of the free energy estimate. - - Generate the free energy change as a function of time in both - directions, with the specified number of points in the time. - - Parameters - ---------- - df_list : list - List of DataFrame of either dHdl or u_nk. - estimator : {'mbar', 'bar', 'ti'} - Name of the estimators. - num : int - The number of time points. - - Returns - ------- - DataFrame - The DataFrame with convergence data. :: - - Forward F. Error Backward B. Error - 0 33.988935 0.334676 35.666128 0.324426 - 1 35.075489 0.232150 35.382850 0.230944 - 2 34.919988 0.190424 35.156028 0.189489 - 3 34.929927 0.165316 35.242255 0.164400 - 4 34.957007 0.147852 35.247704 0.147191 - 5 35.003660 0.134952 35.214658 0.134458 - 6 35.070199 0.124956 35.178422 0.124664 - 7 35.019853 0.116970 35.096870 0.116783 - 8 35.035123 0.110147 35.225907 0.109742 - 9 35.113417 0.104280 35.113417 0.104280 - - ''' - logger = logging.getLogger('alchemlyb.postprocessors.' - 'forward_backward_convergence') - logger.info('Start convergence analysis.') - logger.info('Check data availability.') - - if estimator.lower() == 'mbar': - logger.info('Use MBAR estimator for convergence analysis.') - estimator_fit = MBAR().fit - elif estimator.lower() == 'bar': - logger.info('Use BAR estimator for convergence analysis.') - estimator_fit = BAR().fit - elif estimator.lower() == 'ti': - logger.info('Use TI estimator for convergence analysis.') - estimator_fit = TI().fit - else: # pragma: no cover - logger.warning( - '{} is not a valid estimator.'.format(estimator)) - - logger.info('Begin forward analysis') - forward_list = [] - forward_error_list = [] - for i in range(1, num + 1): - logger.info('Forward analysis: {:.2f}%'.format(i / num)) - sample = [] - for data in df_list: - sample.append(data[:len(data) // num * i]) - sample = concat(sample) - result = estimator_fit(sample) - forward_list.append(result.delta_f_.iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - forward_error_list.append(error) - else: - forward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kT'.format(forward_list[-1], - forward_error_list[-1])) - - logger.info('Begin backward analysis') - backward_list = [] - backward_error_list = [] - for i in range(1, num + 1): - logger.info('Backward analysis: {:.2f}%'.format(i / num)) - sample = [] - for data in df_list: - sample.append(data[-len(data) // num * i:]) - sample = concat(sample) - result = estimator_fit(sample) - backward_list.append(result.delta_f_.iloc[0, -1]) - if estimator.lower() == 'bar': - error = np.sqrt(sum( - [result.d_delta_f_.iloc[i, i + 1] ** 2 - for i in range(len(result.d_delta_f_) - 1)])) - backward_error_list.append(error) - else: - backward_error_list.append(result.d_delta_f_.iloc[0, -1]) - logger.info('{:.2f} +/- {:.2f} kT'.format(backward_list[-1], - backward_error_list[-1])) - - convergence = pd.DataFrame( - {'Forward': forward_list, - 'F. Error': forward_error_list, - 'Backward': backward_list, - 'B. Error': backward_error_list}) - convergence.attrs = df_list[0].attrs - return convergence diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 2f5df840..5fae5841 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -2,7 +2,7 @@ from alchemtest.gmx import load_benzene from alchemlyb.parsing import gmx -from alchemlyb.postprocessors import forward_backward_convergence +from alchemlyb.convergence import forward_backward_convergence @pytest.fixture() def gmx_benzene(): diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index 32d8db12..bcaf4a32 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -12,7 +12,7 @@ from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation import plot_convergence -from alchemlyb.postprocessors import forward_backward_convergence +from alchemlyb.convergence import forward_backward_convergence def test_plot_mbar_omatrix(): '''Just test if the plot runs''' @@ -131,7 +131,7 @@ def test_plot_convergence_dataframe(): bz = load_benzene().data data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] df = forward_backward_convergence(data_list, 'mbar') - ax = plot_convergence(dataframe=df) + ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 823787b9..d3cccf43 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -1,29 +1,20 @@ import matplotlib.pyplot as plt +import pandas as pd from matplotlib.font_manager import FontProperties as FP import numpy as np from ..postprocessors.units import get_unit_converter -def plot_convergence(forward=None, forward_error=None, backward=None, - backward_error=None, dataframe=None, - units='kT', ax=None): +def plot_convergence(*data, units='kT', ax=None): """Plot the forward and backward convergence. Parameters ---------- - forward : List - A list of free energy estimate from the first X% of data. - forward_error : List - A list of error from the first X% of data. - backward : List - A list of free energy estimate from the last X% of data. - backward_error : List - A list of error from the last X% of data. - dataframe : Dataframe + data : Dataframe or 4 Lists Output Dataframe from - :func:`~alchemlyb.postprocessors.forward_backward_convergence`. If - Dataframe is provided, `forward`, `forward_error`, `backward`, - `backward_error` will be ignored. + :func:`~alchemlyb.postprocessors.convergence.forward_backward_convergence`. + Or given explicitly as `forward`, `forward_error`, `backward`, + `backward_error` see :ref:`plot_convergence `. units : str The label for the unit of the estimate. Default: "kT" ax : matplotlib.axes.Axes @@ -44,14 +35,23 @@ def plot_convergence(forward=None, forward_error=None, backward=None, Changing it doesn't change the unit of the underlying variable. + .. versionchanged:: 0.6.0 + data now takes in dataframe + .. versionadded:: 0.4.0 """ - if dataframe is not None: - dataframe = get_unit_converter(units)(dataframe) + if len(data) == 1 and isinstance(data[0], pd.DataFrame): + dataframe = get_unit_converter(units)(data) forward = dataframe['Forward'].to_numpy() forward_error = dataframe['F. Error'].to_numpy() backward = dataframe['Backward'].to_numpy() backward_error = dataframe['B. Error'].to_numpy() + else: + try: + forward, forward_error, backward, backward_error = data + except ValueError: + raise ValueError('Ensure all four of forward, forward_error, ' + 'backward, backward_error are supplied.') if ax is None: # pragma: no cover fig, ax = plt.subplots(figsize=(8, 6)) From f9d08bc8bd865adf0ea3fb4de53da7a595e547b3 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Mon, 4 Oct 2021 21:32:15 +0100 Subject: [PATCH 09/18] update --- docs/api_principles.rst | 1 - docs/convergence.rst | 6 +++--- .../alchemlyb.visualisation.plot_convergence.rst | 2 +- src/alchemlyb/convergence/__init__.py | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 492ed944..8d912d08 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -57,7 +57,6 @@ The library is structured as follows, following a similar style to │   └── ... ├── postprocessors │   ├── ... - │   └── convergence.py │   └── units.py ├── visualisation │   ├── convergence.py diff --git a/docs/convergence.rst b/docs/convergence.rst index be20a657..14a1b480 100644 --- a/docs/convergence.rst +++ b/docs/convergence.rst @@ -2,8 +2,8 @@ Using functions to estimate Convergence ======================================= For a result to be valid, we need to ensure that longer simulation time -would not result in different results. Various functions are provided in -this module to estimate the convergence of the data and help user determine +would not result in different results. Various functions will be provided in +this module to estimate the convergence of the estimate and help user determine the simulation end point. Time Convergence @@ -21,7 +21,7 @@ forward and backward convergence of the estimate using >>> bz = load_benzene().data >>> data_list = [extract_u_nk(xvg, T=300) for xvg in bz['Coulomb']] >>> df = forward_backward_convergence(data_list, 'mbar') - >>> ax = plot_convergence(dataframe=df) + >>> ax = plot_convergence(df) >>> ax.figure.savefig('dF_t.pdf') Will give a plot looks like this diff --git a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst index 74a85e89..3ca9abbf 100644 --- a/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst +++ b/docs/visualisation/alchemlyb.visualisation.plot_convergence.rst @@ -7,7 +7,7 @@ The function :func:`~alchemlyb.visualisation.plot_convergence` allows the user to visualise the convergence by plotting the free energy change computed using the equilibrated snapshots between the proper target time frames. The data could be provided as a Dataframe from -:func:`alchemlyb.postprocessors.forward_backward_convergence` or provided +:func:`alchemlyb.convergence.forward_backward_convergence` or provided explicitly in both forward (data points are stored in `forward` and `forward_error`) and reverse (data points are stored in `backward` and `backward_error`) directions. diff --git a/src/alchemlyb/convergence/__init__.py b/src/alchemlyb/convergence/__init__.py index 6469cee2..e8dd32b3 100644 --- a/src/alchemlyb/convergence/__init__.py +++ b/src/alchemlyb/convergence/__init__.py @@ -1 +1 @@ -from convergence import forward_backward_convergence +from .convergence import forward_backward_convergence From 305ceac8a84cd4285fc23a18a9fa46354ed7fe60 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:09:20 +0100 Subject: [PATCH 10/18] update --- CHANGES | 4 +++ src/alchemlyb/convergence/convergence.py | 36 ++++++++++------------ src/alchemlyb/tests/test_convergence.py | 8 ++++- src/alchemlyb/visualisation/convergence.py | 13 +++++--- 4 files changed, 36 insertions(+), 25 deletions(-) diff --git a/CHANGES b/CHANGES index 91e5ae66..d7ccf870 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,10 @@ The rules for this file: Changes Enhancements + - Time convergence function forward_backward_convergence + introduced and plot_convergence now takes dataframe from + forward_backward_convergence as input (PR #168). + Fixes - Subsampling now works with bounds and step (PR #167, issue #166). diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 8a4124c4..3e3ec682 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -26,18 +26,17 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): DataFrame The DataFrame with convergence data. :: - Forward Forward_Error Backward Backward_Error - t_fraction - 1/10 3.067943 0.070175 3.111035 0.067088 - 2/10 3.122223 0.049303 3.126450 0.048173 - 3/10 3.117742 0.039916 3.094115 0.039099 - 4/10 3.091870 0.034389 3.101558 0.033783 - 5/10 3.093778 0.030814 3.082714 0.030148 - 6/10 3.079128 0.027999 3.085972 0.027652 - 7/10 3.086951 0.025847 3.077004 0.025610 - 8/10 3.079147 0.024122 3.081519 0.023968 - 9/10 3.086575 0.022778 3.090475 0.022633 - 10/10 3.088821 0.021573 3.089027 0.021568 + Forward Forward_Error Backward Backward_Error data_fraction + 0 3.016442 0.052748 3.065176 0.051036 0.1 + 1 3.078106 0.037170 3.078567 0.036640 0.2 + 2 3.072561 0.030186 3.047357 0.029775 0.3 + 3 3.048325 0.026070 3.057527 0.025743 0.4 + 4 3.049769 0.023359 3.037454 0.023001 0.5 + 5 3.034078 0.021260 3.040484 0.021075 0.6 + 6 3.043274 0.019642 3.032495 0.019517 0.7 + 7 3.035460 0.018340 3.036670 0.018261 0.8 + 8 3.042032 0.017319 3.046597 0.017233 0.9 + 9 3.044149 0.016405 3.044385 0.016402 1.0 .. versionadded:: 0.6.0 @@ -56,15 +55,15 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): elif estimator.lower() == 'ti': logger.info('Use TI estimator for convergence analysis.') estimator_fit = TI().fit - else: # pragma: no cover - logger.warning( + else: + raise ValueError( '{} is not a valid estimator.'.format(estimator)) logger.info('Begin forward analysis') forward_list = [] forward_error_list = [] for i in range(1, num + 1): - logger.info('Forward analysis: {:.2f}%'.format(i / num)) + logger.info('Forward analysis: {:.2f}%'.format(100 * i / num)) sample = [] for data in df_list: sample.append(data[:len(data) // num * i]) @@ -85,7 +84,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): backward_list = [] backward_error_list = [] for i in range(1, num + 1): - logger.info('Backward analysis: {:.2f}%'.format(i / num)) + logger.info('Backward analysis: {:.2f}%'.format(100 * i / num)) sample = [] for data in df_list: sample.append(data[-len(data) // num * i:]) @@ -106,8 +105,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): {'Forward': forward_list, 'Forward_Error': forward_error_list, 'Backward': backward_list, - 'Backward_Error': backward_error_list}, - index=['{}/{}'.format(i, num) for i in range(1, num + 1)]) - convergence.index.name = 't_fraction' + 'Backward_Error': backward_error_list, + 'data_fraction': [i / num for i in range(1, num + 1)]}) convergence.attrs = df_list[0].attrs return convergence diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 5fae5841..ab533a92 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -31,8 +31,14 @@ def test_convergence_mbar(gmx_benzene): def test_convergence_bar(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(u_nk, 'BAR') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01) + +def test_convergence_wrong_estimator(gmx_benzene): + dHdl, u_nk = gmx_benzene + with pytest.raises(ValueError): + convergence = forward_backward_convergence(u_nk, 'www') + \ No newline at end of file diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index d3cccf43..a30585b5 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -10,9 +10,9 @@ def plot_convergence(*data, units='kT', ax=None): Parameters ---------- - data : Dataframe or 4 Lists + data : Dataframe or 4 array_like objects Output Dataframe from - :func:`~alchemlyb.postprocessors.convergence.forward_backward_convergence`. + :func:`~alchemlyb.convergence.forward_backward_convergence`. Or given explicitly as `forward`, `forward_error`, `backward`, `backward_error` see :ref:`plot_convergence `. units : str @@ -31,7 +31,10 @@ def plot_convergence(*data, units='kT', ax=None): The code is taken and modified from `Alchemical Analysis `_. - If `dataframe` is not provide, the units variable is for labelling only. + If `data` is not an :class:pandas.Dataframe` produced by + :func:`~alchemlyb.convergence.forward_backward_convergence`, + the unit will be adjusted accoridng to the units + variable. Otherwise, the units variable is for labelling only. Changing it doesn't change the unit of the underlying variable. @@ -43,9 +46,9 @@ def plot_convergence(*data, units='kT', ax=None): if len(data) == 1 and isinstance(data[0], pd.DataFrame): dataframe = get_unit_converter(units)(data) forward = dataframe['Forward'].to_numpy() - forward_error = dataframe['F. Error'].to_numpy() + forward_error = dataframe['Forward_Error'].to_numpy() backward = dataframe['Backward'].to_numpy() - backward_error = dataframe['B. Error'].to_numpy() + backward_error = dataframe['Backward_Error'].to_numpy() else: try: forward, forward_error, backward, backward_error = data From d28158253a83a942e31bbe0b55a0608fcd098bbd Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:16:06 +0100 Subject: [PATCH 11/18] Update test_convergence.py --- src/alchemlyb/tests/test_convergence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 5f70c24a..f1b1a43c 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -13,7 +13,7 @@ def gmx_benzene(): def test_convergence_ti(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(dHdl, 'TI') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.07, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.11, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.09, 0.01) @@ -22,7 +22,7 @@ def test_convergence_ti(gmx_benzene): def test_convergence_mbar(gmx_benzene): dHdl, u_nk = gmx_benzene convergence = forward_backward_convergence(u_nk, 'MBAR') - assert convergence.shape == (10, 4) + assert convergence.shape == (10, 5) assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01) assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01) assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01) From f6ce6660866bb9bcaddf1497398f58cc0316b475 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:24:38 +0100 Subject: [PATCH 12/18] update --- src/alchemlyb/convergence/convergence.py | 6 +++++- src/alchemlyb/visualisation/convergence.py | 11 +++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index 3e3ec682..db693c76 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -10,7 +10,11 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. Generate the free energy change as a function of time in both - directions, with the specified number of points in the time. + directions, with the specified number of points in the time. For example, + setting `num` to 10 would give the forward convergence which is the free + energy estimate from the first 10%, 20%, 30% .. of the data. The + Backward would give the estimate from the last 10%, 20%, 30% .. of the + data. Parameters ---------- diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index a30585b5..1af944ac 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -8,6 +8,13 @@ def plot_convergence(*data, units='kT', ax=None): """Plot the forward and backward convergence. + The input could be the result from + :func:`~alchemlyb.convergence.forward_backward_convergence` or it could + be given explicitly as `forward`, `forward_error`, `backward`, + `backward_error`. These four array_like objects should have the same + shape and can be used as input for the + :func:`matplotlib.pyplot.errorbar`. + Parameters ---------- data : Dataframe or 4 array_like objects @@ -33,7 +40,7 @@ def plot_convergence(*data, units='kT', ax=None): If `data` is not an :class:pandas.Dataframe` produced by :func:`~alchemlyb.convergence.forward_backward_convergence`, - the unit will be adjusted accoridng to the units + the unit will be adjusted according to the units variable. Otherwise, the units variable is for labelling only. Changing it doesn't change the unit of the underlying variable. @@ -44,7 +51,7 @@ def plot_convergence(*data, units='kT', ax=None): .. versionadded:: 0.4.0 """ if len(data) == 1 and isinstance(data[0], pd.DataFrame): - dataframe = get_unit_converter(units)(data) + dataframe = get_unit_converter(units)(data[0]) forward = dataframe['Forward'].to_numpy() forward_error = dataframe['Forward_Error'].to_numpy() backward = dataframe['Backward'].to_numpy() From e9601fb95021425ac5f674019476193228fccf7d Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:35:34 +0100 Subject: [PATCH 13/18] Update convergence.py --- src/alchemlyb/visualisation/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 1af944ac..a39ae76d 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -59,7 +59,7 @@ def plot_convergence(*data, units='kT', ax=None): else: try: forward, forward_error, backward, backward_error = data - except ValueError: + except ValueError: # pragma: no cover raise ValueError('Ensure all four of forward, forward_error, ' 'backward, backward_error are supplied.') From a4d379abd08c4bcef505a945725aa873ebf09e7b Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Thu, 7 Oct 2021 20:38:48 +0100 Subject: [PATCH 14/18] Update convergence.py --- src/alchemlyb/convergence/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index db693c76..a4c48630 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -9,7 +9,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): ''' The forward and backward convergence of the free energy estimate. - Generate the free energy change as a function of time in both + Generate the free energy estimate as a function of time in both directions, with the specified number of points in the time. For example, setting `num` to 10 would give the forward convergence which is the free energy estimate from the first 10%, 20%, 30% .. of the data. The From 134036772ed3478d6c0974e41fb046aab6e2fbb0 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Fri, 8 Oct 2021 17:32:48 +0100 Subject: [PATCH 15/18] Update convergence.py --- src/alchemlyb/convergence/convergence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/alchemlyb/convergence/convergence.py b/src/alchemlyb/convergence/convergence.py index a4c48630..4ece921c 100644 --- a/src/alchemlyb/convergence/convergence.py +++ b/src/alchemlyb/convergence/convergence.py @@ -45,7 +45,7 @@ def forward_backward_convergence(df_list, estimator='mbar', num=10): .. versionadded:: 0.6.0 ''' - logger = logging.getLogger('alchemlyb.postprocessors.' + logger = logging.getLogger('alchemlyb.convergence.' 'forward_backward_convergence') logger.info('Start convergence analysis.') logger.info('Check data availability.') From b58db4a945c6a729bd87ea484703425b1e0388e1 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Oct 2021 09:03:04 +0100 Subject: [PATCH 16/18] Apply suggestions from code review Co-authored-by: Oliver Beckstein --- CHANGES | 4 ++-- src/alchemlyb/tests/test_convergence.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index b0c8d17d..0b0c2236 100644 --- a/CHANGES +++ b/CHANGES @@ -22,8 +22,8 @@ Changes Enhancements - Convenience function for decorrelating u_nk and dHdl (PR #163). - Time convergence function forward_backward_convergence - introduced and plot_convergence now takes dataframe from - forward_backward_convergence as input (PR #168). + introduced and plot_convergence now takes dataframe from + forward_backward_convergence as input (PR #168). Fixes - Subsampling now works with bounds and step (PR #167, issue #166). diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index f1b1a43c..c14ed36e 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -39,5 +39,5 @@ def test_convergence_bar(gmx_benzene): def test_convergence_wrong_estimator(gmx_benzene): dHdl, u_nk = gmx_benzene - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="{} is not a valid estimator".format("www")): convergence = forward_backward_convergence(u_nk, 'www') From afc8d689c2b6e6bfb8b4da8cc80707f41e5dea15 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Oct 2021 09:23:56 +0100 Subject: [PATCH 17/18] Update convergence.py --- src/alchemlyb/visualisation/convergence.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index a39ae76d..19091da4 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -11,7 +11,14 @@ def plot_convergence(*data, units='kT', ax=None): The input could be the result from :func:`~alchemlyb.convergence.forward_backward_convergence` or it could be given explicitly as `forward`, `forward_error`, `backward`, - `backward_error`. These four array_like objects should have the same + `backward_error`. + + `forward`: A list of free energy estimate from the first X% of data, + where `forward_error` is the corresponding error. + `backward`: A list of free energy estimate from the last X% of data., + where `backward_error` is the corresponding error. + + These four array_like objects should have the same shape and can be used as input for the :func:`matplotlib.pyplot.errorbar`. From a0ade5631ca2dc5cd61fc27404900d69b7f093f9 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Sun, 10 Oct 2021 09:26:19 +0100 Subject: [PATCH 18/18] Update convergence.py --- src/alchemlyb/visualisation/convergence.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/alchemlyb/visualisation/convergence.py b/src/alchemlyb/visualisation/convergence.py index 19091da4..13e4ea34 100644 --- a/src/alchemlyb/visualisation/convergence.py +++ b/src/alchemlyb/visualisation/convergence.py @@ -15,6 +15,7 @@ def plot_convergence(*data, units='kT', ax=None): `forward`: A list of free energy estimate from the first X% of data, where `forward_error` is the corresponding error. + `backward`: A list of free energy estimate from the last X% of data., where `backward_error` is the corresponding error.