diff --git a/CHANGES b/CHANGES index c1f1e1f0..40cb9eaf 100644 --- a/CHANGES +++ b/CHANGES @@ -19,6 +19,8 @@ The rules for this file: Enhancements - Blackfy the codebase (PR #280). + - Refactor the test to make all the parsing done at conftest level + (issue #206, PR #278). Fixes - Remove most of the iloc in the tests (issue #202, PR #254). diff --git a/src/alchemlyb/__init__.py b/src/alchemlyb/__init__.py index d73b1822..8417bcb2 100644 --- a/src/alchemlyb/__init__.py +++ b/src/alchemlyb/__init__.py @@ -53,7 +53,13 @@ def concat(objs, *args, **kwargs): pandas.concat - .. versionadded:: 0.5.0""" + .. versionadded:: 0.5.0 + .. versionchanged:: 1.0.1 + When input is single dataframe, it will be sent out directly. + + """ + if isinstance(objs, (pd.DataFrame, pd.Series)): + return objs # Sanity check try: attrs = objs[0].attrs diff --git a/src/alchemlyb/tests/conftest.py b/src/alchemlyb/tests/conftest.py new file mode 100644 index 00000000..88235545 --- /dev/null +++ b/src/alchemlyb/tests/conftest.py @@ -0,0 +1,271 @@ +"""Storing the fixture to be used for the tests. Note that this file will only contain +fixture that are made directly from parsing the files. Any additional operations like +concat should be done at local level.""" + +import pytest +from alchemtest.amber import load_bace_example, load_simplesolvated +from alchemtest.gmx import ( + load_benzene, + load_expanded_ensemble_case_1, + load_expanded_ensemble_case_2, + load_expanded_ensemble_case_3, + load_water_particle_with_total_energy, + load_water_particle_with_potential_energy, + load_water_particle_without_energy, + load_ABFE, +) +from alchemtest.gomc import load_benzene as gomc_load_benzene +from alchemtest.namd import ( + load_tyr2ala, + load_idws, + load_restarted, + load_restarted_reversed, +) + +from alchemlyb.parsing import gmx, amber, gomc, namd + + +@pytest.fixture +def gmx_benzene(): + dataset = load_benzene() + return dataset["data"] + + +@pytest.fixture +def gmx_benzene_Coulomb_dHdl(gmx_benzene): + return [gmx.extract_dHdl(file, T=300) for file in gmx_benzene["Coulomb"]] + + +@pytest.fixture +def gmx_benzene_VDW_dHdl(gmx_benzene): + return [gmx.extract_dHdl(file, T=300) for file in gmx_benzene["VDW"]] + + +@pytest.fixture +def gmx_benzene_Coulomb_u_nk(gmx_benzene): + return [gmx.extract_u_nk(file, T=300) for file in gmx_benzene["Coulomb"]] + + +@pytest.fixture +def gmx_benzene_VDW_u_nk(gmx_benzene): + return [gmx.extract_u_nk(file, T=300) for file in gmx_benzene["VDW"]] + + +@pytest.fixture +def gmx_benzene_VDW_dHdl(gmx_benzene): + return [gmx.extract_dHdl(file, T=300) for file in gmx_benzene["VDW"]] + + +@pytest.fixture +def gmx_ABFE(): + dataset = load_ABFE() + return dataset["data"] + + +@pytest.fixture +def gmx_ABFE_complex_n_uk(gmx_ABFE): + return [gmx.extract_u_nk(file, T=300) for file in gmx_ABFE["complex"]] + + +@pytest.fixture +def gmx_ABFE_complex_dHdl(gmx_ABFE): + return [gmx.extract_dHdl(file, T=300) for file in gmx_ABFE["complex"]] + + +@pytest.fixture +def gmx_expanded_ensemble_case_1(): + dataset = load_expanded_ensemble_case_1() + + return [ + gmx.extract_u_nk(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_expanded_ensemble_case_1_dHdl(): + dataset = load_expanded_ensemble_case_1() + + return [ + gmx.extract_dHdl(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_expanded_ensemble_case_2(): + dataset = load_expanded_ensemble_case_2() + + return [ + gmx.extract_u_nk(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_expanded_ensemble_case_2_dHdl(): + dataset = load_expanded_ensemble_case_2() + + return [ + gmx.extract_dHdl(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_expanded_ensemble_case_3(): + dataset = load_expanded_ensemble_case_3() + + return [ + gmx.extract_u_nk(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_expanded_ensemble_case_3_dHdl(): + dataset = load_expanded_ensemble_case_3() + + return [ + gmx.extract_dHdl(filename, T=300, filter=False) + for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_with_total_energy(): + dataset = load_water_particle_with_total_energy() + + return [ + gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_with_total_energy_dHdl(): + dataset = load_water_particle_with_total_energy() + + return [ + gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_with_potential_energy(): + dataset = load_water_particle_with_potential_energy() + + return [ + gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_with_potential_energy_dHdl(): + dataset = load_water_particle_with_potential_energy() + + return [ + gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_without_energy(): + dataset = load_water_particle_without_energy() + + return [ + gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def gmx_water_particle_without_energy_dHdl(): + dataset = load_water_particle_without_energy() + + return [ + gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"] + ] + + +@pytest.fixture +def amber_simplesolvated(): + dataset = load_simplesolvated() + return dataset["data"] + + +@pytest.fixture +def amber_simplesolvated_charge_dHdl(amber_simplesolvated): + return [ + amber.extract_dHdl(filename, T=298.0) + for filename in amber_simplesolvated["charge"] + ] + + +@pytest.fixture +def amber_simplesolvated_vdw_dHdl(amber_simplesolvated): + return [ + amber.extract_dHdl(filename, T=298.0) + for filename in amber_simplesolvated["vdw"] + ] + + +@pytest.fixture +def amber_bace_example_complex_vdw(): + dataset = load_bace_example() + + return [ + amber.extract_u_nk(filename, T=298.0) + for filename in dataset["data"]["complex"]["vdw"] + ] + + +@pytest.fixture +def gomc_benzene(): + dataset = gomc_load_benzene() + return dataset["data"] + + +@pytest.fixture +def gomc_benzene_u_nk(gomc_benzene): + return [gomc.extract_u_nk(filename, T=298) for filename in gomc_benzene] + + +@pytest.fixture +def gomc_benzene_dHdl(gomc_benzene): + return [gomc.extract_dHdl(filename, T=298) for filename in gomc_benzene] + + +@pytest.fixture +def namd_tyr2ala(): + dataset = load_tyr2ala() + u_nk1 = namd.extract_u_nk(dataset["data"]["forward"][0], T=300) + u_nk2 = namd.extract_u_nk(dataset["data"]["backward"][0], T=300) + + # combine dataframes of fwd and rev directions + u_nk1[u_nk1.isna()] = u_nk2 + u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:]) + + return u_nk + + +@pytest.fixture +def namd_idws(): + dataset = load_idws() + u_nk = namd.extract_u_nk(dataset["data"]["forward"], T=300) + + return u_nk + + +@pytest.fixture +def namd_idws_restarted(): + dataset = load_restarted() + u_nk = namd.extract_u_nk(dataset["data"]["both"], T=300) + + return u_nk + + +@pytest.fixture +def namd_idws_restarted_reversed(): + dataset = load_restarted_reversed() + u_nk = namd.extract_u_nk(dataset["data"]["both"], T=300) + + return u_nk diff --git a/src/alchemlyb/tests/test_convergence.py b/src/alchemlyb/tests/test_convergence.py index 3a774c31..fd29822a 100644 --- a/src/alchemlyb/tests/test_convergence.py +++ b/src/alchemlyb/tests/test_convergence.py @@ -8,17 +8,8 @@ from alchemlyb.parsing import gmx -@pytest.fixture() -def gmx_benzene(): - dataset = load_benzene() - return [gmx.extract_dHdl(dhdl, T=300) for dhdl in dataset["data"]["Coulomb"]], [ - gmx.extract_u_nk(dhdl, T=300) for dhdl in dataset["data"]["Coulomb"] - ] - - -def test_convergence_ti(gmx_benzene): - dHdl, u_nk = gmx_benzene - convergence = forward_backward_convergence(dHdl, "TI") +def test_convergence_ti(gmx_benzene_Coulomb_dHdl): + convergence = forward_backward_convergence(gmx_benzene_Coulomb_dHdl, "TI") assert convergence.shape == (10, 5) assert convergence.loc[0, "Forward"] == pytest.approx(3.07, 0.01) @@ -28,9 +19,8 @@ def test_convergence_ti(gmx_benzene): @pytest.mark.parametrize("estimator", ["MBAR", "BAR"]) -def test_convergence_fep(gmx_benzene, estimator): - dHdl, u_nk = gmx_benzene - convergence = forward_backward_convergence(u_nk, estimator) +def test_convergence_fep(gmx_benzene_Coulomb_u_nk, estimator): + convergence = forward_backward_convergence(gmx_benzene_Coulomb_u_nk, estimator) assert convergence.shape == (10, 5) assert convergence.loc[0, "Forward"] == pytest.approx(3.02, 0.01) assert convergence.loc[0, "Backward"] == pytest.approx(3.06, 0.01) @@ -38,21 +28,20 @@ def test_convergence_fep(gmx_benzene, estimator): assert convergence.loc[9, "Backward"] == pytest.approx(3.04, 0.01) -def test_convergence_wrong_estimator(gmx_benzene): - dHdl, u_nk = gmx_benzene +def test_convergence_wrong_estimator(gmx_benzene_Coulomb_dHdl): with pytest.raises(ValueError, match="is not available in"): - forward_backward_convergence(u_nk, "WWW") + forward_backward_convergence(gmx_benzene_Coulomb_dHdl, "WWW") -def test_convergence_wrong_cases(gmx_benzene): - dHdl, u_nk = gmx_benzene +def test_convergence_wrong_cases(gmx_benzene_Coulomb_u_nk): with pytest.warns(DeprecationWarning, match="Using lower-case strings for"): - forward_backward_convergence(u_nk, "mbar") + forward_backward_convergence(gmx_benzene_Coulomb_u_nk, "mbar") -def test_convergence_method(gmx_benzene): - dHdl, u_nk = gmx_benzene - convergence = forward_backward_convergence(u_nk, "MBAR", num=2, method="adaptive") +def test_convergence_method(gmx_benzene_Coulomb_u_nk): + convergence = forward_backward_convergence( + gmx_benzene_Coulomb_u_nk, "MBAR", num=2, method="adaptive" + ) assert len(convergence) == 2 @@ -75,12 +64,7 @@ def test_cummean_long_none_integter(): def test_R_c_converged(): - data = pd.Series( - data=[ - 0, - ] - * 100 - ) + data = pd.Series(data=[0] * 100) data.attrs["temperature"] = 310 data.attrs["energy_unit"] = "kcal/mol" value, running_average = fwdrev_cumavg_Rc(data) @@ -96,17 +80,7 @@ def test_R_c_notconverged(): def test_R_c_real(): - data = pd.Series( - data=np.hstack( - ( - range(10), - [ - 4.5, - ] - * 10, - ) - ) - ) + data = pd.Series(data=np.hstack((range(10), [4.5] * 10))) data.attrs["temperature"] = 310 data.attrs["energy_unit"] = "kcal/mol" value, running_average = fwdrev_cumavg_Rc(data, tol=2.0) @@ -114,24 +88,8 @@ def test_R_c_real(): def test_A_c_real(): - data = pd.Series( - data=np.hstack( - ( - range(10), - [ - 4.5, - ] - * 10, - ) - ) - ) + data = pd.Series(data=np.hstack((range(10), [4.5] * 10))) data.attrs["temperature"] = 310 data.attrs["energy_unit"] = "kcal/mol" - value = A_c( - [ - data, - ] - * 2, - tol=2.0, - ) + value = A_c([data] * 2, tol=2.0) np.testing.assert_allclose(value, 0.65) diff --git a/src/alchemlyb/tests/test_fep_estimators.py b/src/alchemlyb/tests/test_fep_estimators.py index d399c3d6..9d041eb8 100644 --- a/src/alchemlyb/tests/test_fep_estimators.py +++ b/src/alchemlyb/tests/test_fep_estimators.py @@ -1,163 +1,12 @@ """Tests for all FEP-based estimators in ``alchemlyb``. """ -import alchemtest.amber -import alchemtest.gmx -import alchemtest.gomc -import alchemtest.namd import numpy as np import pytest from alchemtest.generic import load_MBAR_BGFS -from alchemtest.gmx import load_benzene, load_ABFE import alchemlyb from alchemlyb.estimators import MBAR, BAR, AutoMBAR -from alchemlyb.parsing import gmx, amber, namd, gomc -from alchemlyb.parsing.gmx import extract_u_nk - - -def gmx_benzene_coul_u_nk(): - dataset = alchemtest.gmx.load_benzene() - - u_nk = alchemlyb.concat( - [gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["Coulomb"]] - ) - - return u_nk - - -def gmx_benzene_vdw_u_nk(): - dataset = alchemtest.gmx.load_benzene() - - u_nk = alchemlyb.concat( - [gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["VDW"]] - ) - - return u_nk - - -def gmx_expanded_ensemble_case_1(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_1() - - u_nk = alchemlyb.concat( - [ - gmx.extract_u_nk(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - - return u_nk - - -def gmx_expanded_ensemble_case_2(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_2() - - u_nk = alchemlyb.concat( - [ - gmx.extract_u_nk(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - - return u_nk - - -def gmx_expanded_ensemble_case_3(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_3() - - u_nk = alchemlyb.concat( - [ - gmx.extract_u_nk(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - - return u_nk - - -def gmx_water_particle_with_total_energy(): - dataset = alchemtest.gmx.load_water_particle_with_total_energy() - - u_nk = alchemlyb.concat( - [gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - - return u_nk - - -def gmx_water_particle_with_potential_energy(): - dataset = alchemtest.gmx.load_water_particle_with_potential_energy() - - u_nk = alchemlyb.concat( - [gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - - return u_nk - - -def gmx_water_particle_without_energy(): - dataset = alchemtest.gmx.load_water_particle_without_energy() - - u_nk = alchemlyb.concat( - [gmx.extract_u_nk(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - - return u_nk - - -def amber_bace_example_complex_vdw(): - dataset = alchemtest.amber.load_bace_example() - - u_nk = alchemlyb.concat( - [ - amber.extract_u_nk(filename, T=298.0) - for filename in dataset["data"]["complex"]["vdw"] - ] - ) - return u_nk - - -def gomc_benzene_u_nk(): - dataset = alchemtest.gomc.load_benzene() - - u_nk = alchemlyb.concat( - [gomc.extract_u_nk(filename, T=298) for filename in dataset["data"]] - ) - - return u_nk - - -def namd_tyr2ala(): - dataset = alchemtest.namd.load_tyr2ala() - u_nk1 = namd.extract_u_nk(dataset["data"]["forward"][0], T=300) - u_nk2 = namd.extract_u_nk(dataset["data"]["backward"][0], T=300) - - # combine dataframes of fwd and rev directions - u_nk1[u_nk1.isna()] = u_nk2 - u_nk = u_nk1.sort_index(level=u_nk1.index.names[1:]) - - return u_nk - - -def namd_idws(): - dataset = alchemtest.namd.load_idws() - u_nk = namd.extract_u_nk(dataset["data"]["forward"], T=300) - - return u_nk - - -def namd_idws_restarted(): - dataset = alchemtest.namd.load_restarted() - u_nk = namd.extract_u_nk(dataset["data"]["both"], T=300) - - return u_nk - - -def namd_idws_restarted_reversed(): - dataset = alchemtest.namd.load_restarted_reversed() - u_nk = namd.extract_u_nk(dataset["data"]["both"], T=300) - - return u_nk class FEPestimatorMixin: @@ -182,23 +31,22 @@ class TestMBAR(FEPestimatorMixin): cls = MBAR @pytest.fixture( - scope="class", params=[ - (gmx_benzene_coul_u_nk, 3.041, 0.02088), - (gmx_benzene_vdw_u_nk, -3.007, 0.04519), - (gmx_expanded_ensemble_case_1, 75.923, 0.14124), - (gmx_expanded_ensemble_case_2, 75.915, 0.14372), - (gmx_expanded_ensemble_case_3, 76.173, 0.11345), - (gmx_water_particle_with_total_energy, -11.680, 0.083655), - (gmx_water_particle_with_potential_energy, -11.675, 0.083589), - (gmx_water_particle_without_energy, -11.654, 0.083415), - (amber_bace_example_complex_vdw, 2.41149, 0.0620658), - (gomc_benzene_u_nk, -0.79994, 0.091579), + ("gmx_benzene_Coulomb_u_nk", 3.041, 0.02088), + ("gmx_benzene_VDW_u_nk", -3.007, 0.04519), + ("gmx_expanded_ensemble_case_1", 75.923, 0.14124), + ("gmx_expanded_ensemble_case_2", 75.915, 0.14372), + ("gmx_expanded_ensemble_case_3", 76.173, 0.11345), + ("gmx_water_particle_with_total_energy", -11.680, 0.083655), + ("gmx_water_particle_with_potential_energy", -11.675, 0.083589), + ("gmx_water_particle_without_energy", -11.654, 0.083415), + ("amber_bace_example_complex_vdw", 2.41149, 0.0620658), + ("gomc_benzene_u_nk", -0.79994, 0.091579), ], ) def X_delta_f(self, request): get_unk, E, dE = request.param - return get_unk(), E, dE + return alchemlyb.concat(request.getfixturevalue(get_unk)), E, dE def test_mbar(self, X_delta_f): self.compare_delta_f(X_delta_f) @@ -209,16 +57,11 @@ class TestAutoMBAR(TestMBAR): class TestMBAR_fail: - @pytest.fixture(scope="class") - def n_uk_list(self): - n_uk_list = [ - gmx.extract_u_nk(dhdl, T=300) for dhdl in load_ABFE()["data"]["complex"] - ] - return n_uk_list - - def test_failback_adaptive(self, n_uk_list): + def test_failback_adaptive(self, gmx_ABFE_complex_n_uk): # The hybr will fail on this while adaptive will work - mbar = AutoMBAR().fit(alchemlyb.concat([n_uk[:2] for n_uk in n_uk_list])) + mbar = AutoMBAR().fit( + alchemlyb.concat([n_uk[:2] for n_uk in gmx_ABFE_complex_n_uk]) + ) assert np.isclose( mbar.d_delta_f_[(0.0, 0.0, 0.0)][(1.0, 1.0, 1.0)], 1.76832, 0.1 ) @@ -241,27 +84,26 @@ class TestBAR(FEPestimatorMixin): cls = BAR @pytest.fixture( - scope="class", params=[ - (gmx_benzene_coul_u_nk, 3.044, 0.01640), - (gmx_benzene_vdw_u_nk, -3.033, 0.03438), - (gmx_expanded_ensemble_case_1, 75.993, 0.11056), - (gmx_expanded_ensemble_case_2, 76.009, 0.11220), - (gmx_expanded_ensemble_case_3, 76.219, 0.08886), - (gmx_water_particle_with_total_energy, -11.675, 0.065055), - (gmx_water_particle_with_potential_energy, -11.724, 0.064964), - (gmx_water_particle_without_energy, -11.660, 0.064914), - (amber_bace_example_complex_vdw, 2.39294, 0.051192), - (namd_tyr2ala, 11.0044, 0.10235), - (namd_idws, 0.221147, 0.041003), - (namd_idws_restarted, 7.081127, 0.0344211), - (namd_idws_restarted_reversed, -4.18405, 0.03457), - (gomc_benzene_u_nk, -0.87095, 0.071263), + ("gmx_benzene_Coulomb_u_nk", 3.044, 0.01640), + ("gmx_benzene_VDW_u_nk", -3.033, 0.03438), + ("gmx_expanded_ensemble_case_1", 75.993, 0.11056), + ("gmx_expanded_ensemble_case_2", 76.009, 0.11220), + ("gmx_expanded_ensemble_case_3", 76.219, 0.08886), + ("gmx_water_particle_with_total_energy", -11.675, 0.065055), + ("gmx_water_particle_with_potential_energy", -11.724, 0.064964), + ("gmx_water_particle_without_energy", -11.660, 0.064914), + ("amber_bace_example_complex_vdw", 2.39294, 0.051192), + ("namd_tyr2ala", 11.0044, 0.10235), + ("namd_idws", 0.221147, 0.041003), + ("namd_idws_restarted", 7.081127, 0.0344211), + ("namd_idws_restarted_reversed", -4.18405, 0.03457), + ("gomc_benzene_u_nk", -0.87095, 0.071263), ], ) def X_delta_f(self, request): get_unk, E, dE = request.param - return get_unk(), E, dE + return alchemlyb.concat(request.getfixturevalue(get_unk)), E, dE def test_bar(self, X_delta_f): self.compare_delta_f(X_delta_f) @@ -280,14 +122,9 @@ class Test_Units: """Test the units.""" @staticmethod - @pytest.fixture(scope="class") - def u_nk(): - bz = load_benzene().data - u_nk_coul = alchemlyb.concat( - [extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]] - ) - u_nk_coul.attrs = extract_u_nk(load_benzene().data["Coulomb"][0], T=300).attrs - return u_nk_coul + @pytest.fixture() + def u_nk(gmx_benzene_Coulomb_u_nk): + return alchemlyb.concat(gmx_benzene_Coulomb_u_nk) def test_bar(self, u_nk): bar = BAR().fit(u_nk) diff --git a/src/alchemlyb/tests/test_preprocessing.py b/src/alchemlyb/tests/test_preprocessing.py index 0d204cdb..ea8231ab 100644 --- a/src/alchemlyb/tests/test_preprocessing.py +++ b/src/alchemlyb/tests/test_preprocessing.py @@ -1,16 +1,11 @@ """Tests for preprocessing functions. """ -import alchemtest.gmx import numpy as np import pytest -from alchemtest.gmx import load_benzene -from alchemtest.namd import load_idws from numpy.testing import assert_allclose import alchemlyb -from alchemlyb.parsing import gmx, namd -from alchemlyb.parsing.gmx import extract_u_nk, extract_dHdl from alchemlyb.preprocessing import ( slicing, statistical_inefficiency, @@ -22,71 +17,35 @@ ) -def gmx_benzene_dHdl(): - dataset = alchemtest.gmx.load_benzene() - return gmx.extract_dHdl(dataset["data"]["Coulomb"][0], T=300) - - -# When issue #206 is addressed make the gmx_benzene_dHdl() function the -# fixture, remove the wrapper below, and replace -# gmx_benzene_dHdl_fixture --> gmx_benzene_dHdl -@pytest.fixture() -def gmx_benzene_dHdl_fixture(): - return gmx_benzene_dHdl() +def _check_data_is_outside_bounds(data, lower, upper): + """ + Helper function to make sure that `data` has entries that are + below the `lower` bound, and above the `upper` bound. + This is used by slicing tests to make sure that the data + provided is appropriate for the tests. + """ + assert any(data.reset_index()["time"] < lower) + assert any(data.reset_index()["time"] > upper) @pytest.fixture() -def gmx_ABFE(): - dataset = alchemtest.gmx.load_ABFE() - return gmx.extract_u_nk(dataset["data"]["complex"][0], T=300) +def dHdl(gmx_benzene_Coulomb_dHdl): + return gmx_benzene_Coulomb_dHdl[0] @pytest.fixture() -def gmx_ABFE_dhdl(): - dataset = alchemtest.gmx.load_ABFE() - return gmx.extract_dHdl(dataset["data"]["complex"][0], T=300) +def u_nk(gmx_benzene_Coulomb_u_nk): + return gmx_benzene_Coulomb_u_nk[0] @pytest.fixture() -def gmx_ABFE_u_nk(): - dataset = alchemtest.gmx.load_ABFE() - return gmx.extract_u_nk(dataset["data"]["complex"][-1], T=300) +def multi_index_u_nk(gmx_ABFE_complex_n_uk): + return gmx_ABFE_complex_n_uk[0] @pytest.fixture() -def gmx_benzene_u_nk_fixture(): - dataset = alchemtest.gmx.load_benzene() - return gmx.extract_u_nk(dataset["data"]["Coulomb"][0], T=300) - - -def gmx_benzene_u_nk(): - dataset = alchemtest.gmx.load_benzene() - return gmx.extract_u_nk(dataset["data"]["Coulomb"][0], T=300) - - -def gmx_benzene_dHdl_full(): - dataset = alchemtest.gmx.load_benzene() - return alchemlyb.concat( - [gmx.extract_dHdl(i, T=300) for i in dataset["data"]["Coulomb"]] - ) - - -def gmx_benzene_u_nk_full(): - dataset = alchemtest.gmx.load_benzene() - return alchemlyb.concat( - [gmx.extract_u_nk(i, T=300) for i in dataset["data"]["Coulomb"]] - ) - - -def _check_data_is_outside_bounds(data, lower, upper): - """ - Helper function to make sure that `data` has entries that are - below the `lower` bound, and above the `upper` bound. - This is used by slicing tests to make sure that the data - provided is appropriate for the tests. - """ - assert any(data.reset_index()["time"] < lower) - assert any(data.reset_index()["time"] > upper) +def multi_index_dHdl(gmx_ABFE_complex_dHdl): + return gmx_ABFE_complex_dHdl[0] class TestSlicing: @@ -95,20 +54,22 @@ class TestSlicing: def slicer(self, *args, **kwargs): return slicing(*args, **kwargs) - @pytest.mark.parametrize( - ("data", "size"), [(gmx_benzene_dHdl(), 661), (gmx_benzene_u_nk(), 661)] - ) - def test_basic_slicing(self, data, size): - assert len(self.slicer(data, lower=1000, upper=34000, step=5)) == size + @pytest.mark.parametrize(("data", "size"), [("dHdl", 661), ("u_nk", 661)]) + def test_basic_slicing(self, data, size, request): + assert ( + len( + self.slicer( + request.getfixturevalue(data), lower=1000, upper=34000, step=5 + ) + ) + == size + ) - def test_unchanged(self): + def test_unchanged(self, namd_idws): # NAMD energy files only have dE for adjacent lambdas, this ensures # that the slicer will not drop these rows as they have NaN values. - file = load_idws().data["forward"][0] - u_nk = namd.extract_u_nk(file, 298) - # Do the pre-processing as the u_nk are from all lambdas - groups = u_nk.groupby("fep-lambda") + groups = namd_idws.groupby("fep-lambda") for key, group in groups: group = group[~group.index.duplicated(keep="first")] df = self.slicer(group, None, None, None) @@ -117,8 +78,8 @@ def test_unchanged(self): @pytest.mark.parametrize( ("dataloader", "lower", "upper"), [ - ("gmx_benzene_dHdl_fixture", 1000, 34000), - ("gmx_benzene_u_nk_fixture", 1000, 34000), + ("dHdl", 1000, 34000), + ("u_nk", 1000, 34000), ], ) def test_data_is_unchanged(self, dataloader, lower, upper, request): @@ -138,8 +99,8 @@ def test_data_is_unchanged(self, dataloader, lower, upper, request): @pytest.mark.parametrize( ("dataloader", "lower", "upper"), [ - ("gmx_benzene_dHdl_fixture", 1000, 34000), - ("gmx_benzene_u_nk_fixture", 1000, 34000), + ("dHdl", 1000, 34000), + ("u_nk", 1000, 34000), ], ) def test_lower_and_upper_bound(self, dataloader, lower, upper, request): @@ -157,9 +118,10 @@ def test_lower_and_upper_bound(self, dataloader, lower, upper, request): assert all(sliced.reset_index()["time"] >= lower) assert all(sliced.reset_index()["time"] <= upper) - @pytest.mark.parametrize("data", [gmx_benzene_dHdl(), gmx_benzene_u_nk()]) - def test_disordered_exception(self, data): + @pytest.mark.parametrize("dataloader", ["dHdl", "u_nk"]) + def test_disordered_exception(self, dataloader, request): """Test that a shuffled DataFrame yields a KeyError.""" + data = request.getfixturevalue(dataloader) indices = data.index.values np.random.shuffle(indices) @@ -168,66 +130,71 @@ def test_disordered_exception(self, data): with pytest.raises(KeyError): self.slicer(df, lower=200) - @pytest.mark.parametrize("data", [gmx_benzene_dHdl_full(), gmx_benzene_u_nk_full()]) - def test_duplicated_exception(self, data): + @pytest.mark.parametrize( + "dataloader", ["gmx_benzene_Coulomb_dHdl", "gmx_benzene_Coulomb_u_nk"] + ) + def test_duplicated_exception(self, dataloader, request): """Test that a DataFrame with duplicate times yields a KeyError.""" + data = alchemlyb.concat(request.getfixturevalue(dataloader)) with pytest.raises(KeyError): self.slicer(data.sort_index(axis=0), lower=200) - def test_subsample_bounds_and_step(self, gmx_ABFE): + def test_subsample_bounds_and_step(self, multi_index_u_nk): """Make sure that slicing the series also works""" subsample = statistical_inefficiency( - gmx_ABFE, gmx_ABFE.sum(axis=1), lower=100, upper=400, step=2 + multi_index_u_nk, multi_index_u_nk.sum(axis=1), lower=100, upper=400, step=2 ) assert len(subsample) == 76 - def test_multiindex_duplicated(self, gmx_ABFE): - subsample = statistical_inefficiency(gmx_ABFE, gmx_ABFE.sum(axis=1)) + def test_multiindex_duplicated(self, multi_index_u_nk): + subsample = statistical_inefficiency( + multi_index_u_nk, multi_index_u_nk.sum(axis=1) + ) assert len(subsample) == 501 - def test_sort_off(self, gmx_ABFE): - unsorted = alchemlyb.concat([gmx_ABFE[-500:], gmx_ABFE[:500]]) + def test_sort_off(self, multi_index_u_nk): + unsorted = alchemlyb.concat([multi_index_u_nk[-500:], multi_index_u_nk[:500]]) with pytest.raises(KeyError): statistical_inefficiency(unsorted, unsorted.sum(axis=1), sort=False) - def test_sort_on(self, gmx_ABFE): - unsorted = alchemlyb.concat([gmx_ABFE[-500:], gmx_ABFE[:500]]) + def test_sort_on(self, multi_index_u_nk): + unsorted = alchemlyb.concat([multi_index_u_nk[-500:], multi_index_u_nk[:500]]) subsample = statistical_inefficiency(unsorted, unsorted.sum(axis=1), sort=True) assert subsample.reset_index(0)["time"].is_monotonic_increasing - def test_sort_on_noseries(self, gmx_ABFE): - unsorted = alchemlyb.concat([gmx_ABFE[-500:], gmx_ABFE[:500]]) + def test_sort_on_noseries(self, multi_index_u_nk): + unsorted = alchemlyb.concat([multi_index_u_nk[-500:], multi_index_u_nk[:500]]) subsample = statistical_inefficiency(unsorted, None, sort=True) assert subsample.reset_index(0)["time"].is_monotonic_increasing - def test_duplication_off(self, gmx_ABFE): - duplicated = alchemlyb.concat([gmx_ABFE, gmx_ABFE]) + def test_duplication_off(self, multi_index_u_nk): + duplicated = alchemlyb.concat([multi_index_u_nk, multi_index_u_nk]) with pytest.raises(KeyError): statistical_inefficiency( duplicated, duplicated.sum(axis=1), drop_duplicates=False ) - def test_duplication_on_dataframe(self, gmx_ABFE): - duplicated = alchemlyb.concat([gmx_ABFE, gmx_ABFE]) + def test_duplication_on_dataframe(self, multi_index_u_nk): + duplicated = alchemlyb.concat([multi_index_u_nk, multi_index_u_nk]) subsample = statistical_inefficiency( duplicated, duplicated.sum(axis=1), drop_duplicates=True ) assert len(subsample) < 1000 - def test_duplication_on_dataframe_noseries(self, gmx_ABFE): - duplicated = alchemlyb.concat([gmx_ABFE, gmx_ABFE]) + def test_duplication_on_dataframe_noseries(self, multi_index_u_nk): + duplicated = alchemlyb.concat([multi_index_u_nk, multi_index_u_nk]) subsample = statistical_inefficiency(duplicated, None, drop_duplicates=True) assert len(subsample) == 1001 - def test_duplication_on_series(self, gmx_ABFE): - duplicated = alchemlyb.concat([gmx_ABFE, gmx_ABFE]) + def test_duplication_on_series(self, multi_index_u_nk): + duplicated = alchemlyb.concat([multi_index_u_nk, multi_index_u_nk]) subsample = statistical_inefficiency( duplicated.sum(axis=1), duplicated.sum(axis=1), drop_duplicates=True ) assert len(subsample) < 1000 - def test_duplication_on_series_noseries(self, gmx_ABFE): - duplicated = alchemlyb.concat([gmx_ABFE, gmx_ABFE]) + def test_duplication_on_series_noseries(self, multi_index_u_nk): + duplicated = alchemlyb.concat([multi_index_u_nk, multi_index_u_nk]) subsample = statistical_inefficiency( duplicated.sum(axis=1), None, drop_duplicates=True ) @@ -235,18 +202,18 @@ def test_duplication_on_series_noseries(self, gmx_ABFE): class CorrelatedPreprocessors: - @pytest.mark.parametrize( - ("data", "size"), [(gmx_benzene_dHdl(), 4001), (gmx_benzene_u_nk(), 4001)] - ) - def test_subsampling(self, data, size): + @pytest.mark.parametrize(("dataloader", "size"), [("dHdl", 4001), ("u_nk", 4001)]) + def test_subsampling(self, dataloader, size, request): """Basic test for execution; resulting size of dataset sensitive to machine and depends on algorithm. """ + data = request.getfixturevalue(dataloader) assert len(self.slicer(data, series=data.loc[:, data.columns[0]])) <= size - @pytest.mark.parametrize("data", [gmx_benzene_dHdl(), gmx_benzene_u_nk()]) - def test_no_series(self, data): + @pytest.mark.parametrize("dataloader", ["dHdl", "u_nk"]) + def test_no_series(self, dataloader, request): """Check that we get the same result as simple slicing with no Series.""" + data = request.getfixturevalue(dataloader) df_sub = self.slicer(data, lower=200, upper=5000, step=2) df_sliced = slicing(data, lower=200, upper=5000, step=2) @@ -258,17 +225,16 @@ def slicer(self, *args, **kwargs): return statistical_inefficiency(*args, **kwargs) @pytest.mark.parametrize( - ("conservative", "data", "size"), + ("conservative", "dataloader", "size"), [ - (True, gmx_benzene_dHdl(), 2001), - # 0.00: g = 1.0559445620585415 - (True, gmx_benzene_u_nk(), 2001), - # 'fep': g = 1.0560203916559594 - (False, gmx_benzene_dHdl(), 3789), - (False, gmx_benzene_u_nk(), 3571), + (True, "dHdl", 2001), # 0.00: g = 1.0559445620585415 + (True, "u_nk", 2001), # 'fep': g = 1.0560203916559594 + (False, "dHdl", 3789), + (False, "u_nk", 3571), ], ) - def test_conservative(self, data, size, conservative): + def test_conservative(self, dataloader, size, conservative, request): + data = request.getfixturevalue(dataloader) sliced = self.slicer( data, series=data.loc[:, data.columns[0]], conservative=conservative ) @@ -279,22 +245,22 @@ def test_conservative(self, data, size, conservative): assert len(sliced) == size @pytest.mark.parametrize( - "series", + "dataloader,end,step", [ - gmx_benzene_dHdl()["fep"][:20], # wrong length - gmx_benzene_dHdl()["fep"][::-1], # wrong time stamps (reversed) + ("dHdl", 20, None), # wrong length + ("dHdl", None, -1), # wrong time stamps (reversed) ], ) - def test_raise_ValueError_for_mismatched_data(self, series): - data = gmx_benzene_dHdl() + def test_raise_ValueError_for_mismatched_data(self, dataloader, end, step, request): + data = request.getfixturevalue(dataloader) with pytest.raises(ValueError): - self.slicer(data, series=series) + self.slicer(data, series=data["fep"][:end:step]) @pytest.mark.parametrize( ("dataloader", "lower", "upper"), [ - ("gmx_benzene_dHdl_fixture", 1000, 34000), - ("gmx_benzene_u_nk_fixture", 1000, 34000), + ("dHdl", 1000, 34000), + ("u_nk", 1000, 34000), ], ) @pytest.mark.parametrize("use_series", [True, False]) @@ -333,8 +299,8 @@ def test_data_is_unchanged( @pytest.mark.parametrize( ("dataloader", "lower", "upper"), [ - ("gmx_benzene_dHdl_fixture", 1000, 34000), - ("gmx_benzene_u_nk_fixture", 1000, 34000), + ("dHdl", 1000, 34000), + ("u_nk", 1000, 34000), ], ) @pytest.mark.parametrize("use_series", [True, False]) @@ -374,8 +340,8 @@ def test_lower_and_upper_bound_slicer( @pytest.mark.parametrize( ("dataloader", "lower", "upper"), [ - ("gmx_benzene_dHdl_fixture", 1000, 34000), - ("gmx_benzene_u_nk_fixture", 1000, 34000), + ("dHdl", 1000, 34000), + ("u_nk", 1000, 34000), ], ) @pytest.mark.parametrize("conservative", [True, False]) @@ -418,55 +384,38 @@ def slicer(self, *args, **kwargs): class Test_Units: """Test the preprocessing module.""" - @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - return dhdl - - def test_slicing(self, dhdl): + def test_slicing(self, u_nk): """Test if extract_u_nk assign the attr correctly""" - dataset = load_benzene() - u_nk = extract_u_nk(dataset["data"]["Coulomb"][0], 310) new_u_nk = slicing(u_nk) - assert new_u_nk.attrs["temperature"] == 310 + assert new_u_nk.attrs["temperature"] == 300 assert new_u_nk.attrs["energy_unit"] == "kT" - def test_statistical_inefficiency(self, dhdl): + def test_statistical_inefficiency(self, dHdl): """Test if extract_u_nk assign the attr correctly""" - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - new_dhdl = statistical_inefficiency(dhdl) - assert new_dhdl.attrs["temperature"] == 310 + new_dhdl = statistical_inefficiency(dHdl) + assert new_dhdl.attrs["temperature"] == 300 assert new_dhdl.attrs["energy_unit"] == "kT" - def test_equilibrium_detection(self, dhdl): + def test_equilibrium_detection(self, dHdl): """Test if extract_u_nk assign the attr correctly""" - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - new_dhdl = equilibrium_detection(dhdl) - assert new_dhdl.attrs["temperature"] == 310 + new_dhdl = equilibrium_detection(dHdl) + assert new_dhdl.attrs["temperature"] == 300 assert new_dhdl.attrs["energy_unit"] == "kT" @pytest.mark.parametrize(("method", "size"), [("all", 2001), ("dE", 2001)]) -def test_decorrelate_u_nk_single_l(gmx_benzene_u_nk_fixture, method, size): +def test_decorrelate_u_nk_single_l(u_nk, method, size): assert ( - len( - decorrelate_u_nk( - gmx_benzene_u_nk_fixture, method=method, drop_duplicates=True, sort=True - ) - ) + len(decorrelate_u_nk(u_nk, method=method, drop_duplicates=True, sort=True)) == size ) -def test_decorrelate_u_nk_burnin(gmx_benzene_u_nk_fixture): +def test_decorrelate_u_nk_burnin(u_nk): assert ( len( decorrelate_u_nk( - gmx_benzene_u_nk_fixture, + u_nk, method="dE", drop_duplicates=True, sort=True, @@ -477,11 +426,11 @@ def test_decorrelate_u_nk_burnin(gmx_benzene_u_nk_fixture): ) -def test_decorrelate_dhdl_burnin(gmx_benzene_dHdl_fixture): +def test_decorrelate_dhdl_burnin(dHdl): assert ( len( decorrelate_dhdl( - gmx_benzene_dHdl_fixture, + dHdl, drop_duplicates=True, sort=True, remove_burnin=True, @@ -491,12 +440,12 @@ def test_decorrelate_dhdl_burnin(gmx_benzene_dHdl_fixture): ) -@pytest.mark.parametrize(("method", "size"), [("all", 1001), ("dE", 334)]) -def test_decorrelate_u_nk_multiple_l(gmx_ABFE_u_nk, method, size): +@pytest.mark.parametrize(("method", "size"), [("all", 501), ("dE", 501)]) +def test_decorrelate_u_nk_multiple_l(multi_index_u_nk, method, size): assert ( len( decorrelate_u_nk( - gmx_ABFE_u_nk, + multi_index_u_nk, method=method, ) ) @@ -504,60 +453,43 @@ def test_decorrelate_u_nk_multiple_l(gmx_ABFE_u_nk, method, size): ) -def test_decorrelate_dhdl_single_l(gmx_benzene_u_nk_fixture): - assert ( - len(decorrelate_dhdl(gmx_benzene_u_nk_fixture, drop_duplicates=True, sort=True)) - == 2001 - ) +def test_decorrelate_dhdl_single_l(u_nk): + assert len(decorrelate_dhdl(u_nk, drop_duplicates=True, sort=True)) == 2001 -def test_decorrelate_dhdl_multiple_l(gmx_ABFE_dhdl): +def test_decorrelate_dhdl_multiple_l(multi_index_dHdl): assert ( len( decorrelate_dhdl( - gmx_ABFE_dhdl, + multi_index_dHdl, ) ) == 501 ) -def test_raise_non_uk(gmx_ABFE_dhdl): +def test_raise_non_uk(multi_index_dHdl): with pytest.raises(ValueError): decorrelate_u_nk( - gmx_ABFE_dhdl, + multi_index_dHdl, ) class TestDhdl2series: - @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 300) - return dhdl - @pytest.mark.parametrize("methodargs", [{}, {"method": "all"}]) - def test_dhdl2series(self, dhdl, methodargs): - series = dhdl2series(dhdl, **methodargs) - assert len(series) == len(dhdl) - assert_allclose(series, dhdl.sum(axis=1)) + def test_dhdl2series(self, dHdl, methodargs): + series = dhdl2series(dHdl, **methodargs) + assert len(series) == len(dHdl) + assert_allclose(series, dHdl.sum(axis=1)) - def test_other_method_ValueError(self, dhdl): + def test_other_method_ValueError(self, dHdl): with pytest.raises( ValueError, match="Only method='all' is supported for dhdl2series()." ): - dhdl2series(dhdl, method="dE") + dhdl2series(dHdl, method="dE") class TestU_nk2series: - @staticmethod - @pytest.fixture(scope="class") - def u_nk(): - dataset = load_benzene() - u_nk = extract_u_nk(dataset["data"]["Coulomb"][0], 300) - return u_nk - @pytest.mark.parametrize( "methodargs,reference", # reference = sum [ diff --git a/src/alchemlyb/tests/test_ti_estimators.py b/src/alchemlyb/tests/test_ti_estimators.py index b510a09b..1fe2648b 100644 --- a/src/alchemlyb/tests/test_ti_estimators.py +++ b/src/alchemlyb/tests/test_ti_estimators.py @@ -1,138 +1,80 @@ """Tests for all TI-based estimators in ``alchemlyb``. """ -import alchemtest.amber -import alchemtest.gmx -import alchemtest.gomc import pandas as pd import pytest from alchemtest.gmx import load_benzene, load_ABFE import alchemlyb from alchemlyb.estimators import TI -from alchemlyb.parsing import gmx, amber, gomc -from alchemlyb.parsing.gmx import extract_dHdl +from alchemlyb.parsing import amber -def gmx_benzene_coul_dHdl(): - dataset = alchemtest.gmx.load_benzene() - - dHdl = alchemlyb.concat( - [gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["Coulomb"]] - ) - +@pytest.fixture +def Coulomb(gmx_benzene_Coulomb_dHdl): + dHdl = alchemlyb.concat(gmx_benzene_Coulomb_dHdl) return dHdl -def gmx_benzene_vdw_dHdl(): - dataset = alchemtest.gmx.load_benzene() - - dHdl = alchemlyb.concat( - [gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["VDW"]] - ) - +@pytest.fixture +def VDW(gmx_benzene_VDW_dHdl): + dHdl = alchemlyb.concat(gmx_benzene_VDW_dHdl) return dHdl -def gmx_expanded_ensemble_case_1_dHdl(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_1() - - dHdl = alchemlyb.concat( - [ - gmx.extract_dHdl(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - +@pytest.fixture +def expanded_ensemble_case_1(gmx_expanded_ensemble_case_1_dHdl): + dHdl = alchemlyb.concat(gmx_expanded_ensemble_case_1_dHdl) return dHdl -def gmx_expanded_ensemble_case_2_dHdl(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_2() - - dHdl = alchemlyb.concat( - [ - gmx.extract_dHdl(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - +@pytest.fixture +def expanded_ensemble_case_2(gmx_expanded_ensemble_case_2_dHdl): + dHdl = alchemlyb.concat(gmx_expanded_ensemble_case_2_dHdl) return dHdl -def gmx_expanded_ensemble_case_3_dHdl(): - dataset = alchemtest.gmx.load_expanded_ensemble_case_3() - - dHdl = alchemlyb.concat( - [ - gmx.extract_dHdl(filename, T=300, filter=False) - for filename in dataset["data"]["AllStates"] - ] - ) - +@pytest.fixture +def expanded_ensemble_case_3(gmx_expanded_ensemble_case_3_dHdl): + dHdl = alchemlyb.concat(gmx_expanded_ensemble_case_3_dHdl) return dHdl -def gmx_water_particle_with_total_energy_dHdl(): - dataset = alchemtest.gmx.load_water_particle_with_total_energy() - - dHdl = alchemlyb.concat( - [gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - +@pytest.fixture +def water_particle_with_total_energy(gmx_water_particle_with_total_energy_dHdl): + dHdl = alchemlyb.concat(gmx_water_particle_with_total_energy_dHdl) return dHdl -def gmx_water_particle_with_potential_energy_dHdl(): - dataset = alchemtest.gmx.load_water_particle_with_potential_energy() - - dHdl = alchemlyb.concat( - [gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - +@pytest.fixture +def water_particle_with_potential_energy( + gmx_water_particle_with_potential_energy_dHdl, +): + dHdl = alchemlyb.concat(gmx_water_particle_with_potential_energy_dHdl) return dHdl -def gmx_water_particle_without_energy_dHdl(): - dataset = alchemtest.gmx.load_water_particle_without_energy() - - dHdl = alchemlyb.concat( - [gmx.extract_dHdl(filename, T=300) for filename in dataset["data"]["AllStates"]] - ) - +@pytest.fixture +def water_particle_without_energy(gmx_water_particle_without_energy_dHdl): + dHdl = alchemlyb.concat(gmx_water_particle_without_energy_dHdl) return dHdl -def amber_simplesolvated_charge_dHdl(): - dataset = alchemtest.amber.load_simplesolvated() - - dHdl = alchemlyb.concat( - [ - amber.extract_dHdl(filename, T=298.0) - for filename in dataset["data"]["charge"] - ] - ) - +@pytest.fixture +def simplesolvated_charge(amber_simplesolvated_charge_dHdl): + dHdl = alchemlyb.concat(amber_simplesolvated_charge_dHdl) return dHdl -def amber_simplesolvated_vdw_dHdl(): - dataset = alchemtest.amber.load_simplesolvated() - - dHdl = alchemlyb.concat( - [amber.extract_dHdl(filename, T=298.0) for filename in dataset["data"]["vdw"]] - ) - +@pytest.fixture +def simplesolvated_vdw(amber_simplesolvated_vdw_dHdl): + dHdl = alchemlyb.concat(amber_simplesolvated_vdw_dHdl) return dHdl -def gomc_benzene_dHdl(): - dataset = alchemtest.gomc.load_benzene() - - dHdl = alchemlyb.concat( - [gomc.extract_dHdl(filename, T=298) for filename in dataset["data"]] - ) - +@pytest.fixture +def benzene(gomc_benzene_dHdl): + dHdl = alchemlyb.concat(gomc_benzene_dHdl) return dHdl @@ -158,34 +100,33 @@ class TestTI(TIestimatorMixin): kT_amber = amber.k_b * T @pytest.fixture( - scope="class", params=[ - (gmx_benzene_coul_dHdl, 3.089, 0.02157), - (gmx_benzene_vdw_dHdl, -3.056, 0.04863), - (gmx_expanded_ensemble_case_1_dHdl, 76.220, 0.15568), - (gmx_expanded_ensemble_case_2_dHdl, 76.247, 0.15889), - (gmx_expanded_ensemble_case_3_dHdl, 76.387, 0.12532), - (gmx_water_particle_with_total_energy_dHdl, -11.696, 0.091775), - (gmx_water_particle_with_potential_energy_dHdl, -11.751, 0.091149), - (gmx_water_particle_without_energy_dHdl, -11.687, 0.091604), - (amber_simplesolvated_charge_dHdl, -60.114 / kT_amber, 0.08186 / kT_amber), - (amber_simplesolvated_vdw_dHdl, 3.824 / kT_amber, 0.13254 / kT_amber), + ("Coulomb", 3.089, 0.02157), + ("VDW", -3.056, 0.04863), + ("expanded_ensemble_case_1", 76.220, 0.15568), + ("expanded_ensemble_case_2", 76.247, 0.15889), + ("expanded_ensemble_case_3", 76.387, 0.12532), + ("water_particle_with_total_energy", -11.696, 0.091775), + ("water_particle_with_potential_energy", -11.751, 0.091149), + ("water_particle_without_energy", -11.687, 0.091604), + ("simplesolvated_charge", -60.114 / kT_amber, 0.08186 / kT_amber), + ("simplesolvated_vdw", 3.824 / kT_amber, 0.13254 / kT_amber), ], ) def X_delta_f(self, request): get_dHdl, E, dE = request.param - return get_dHdl(), E, dE + return request.getfixturevalue(get_dHdl), E, dE -def test_TI_separate_dhdl_multiple_column(): - dHdl = gomc_benzene_dHdl() +def test_TI_separate_dhdl_multiple_column(benzene): + dHdl = benzene estimator = TI().fit(dHdl) assert all([isinstance(dhdl, pd.Series) for dhdl in estimator.separate_dhdl()]) assert sorted([len(dhdl) for dhdl in estimator.separate_dhdl()]) == [8, 16] -def test_TI_separate_dhdl_single_column(): - dHdl = gmx_benzene_coul_dHdl() +def test_TI_separate_dhdl_single_column(Coulomb): + dHdl = Coulomb estimator = TI().fit(dHdl) assert all([isinstance(dhdl, pd.Series) for dhdl in estimator.separate_dhdl()]) assert [len(dhdl) for dhdl in estimator.separate_dhdl()] == [ @@ -193,47 +134,22 @@ def test_TI_separate_dhdl_single_column(): ] -def test_TI_separate_dhdl_no_pertubed(): +def test_TI_separate_dhdl_no_pertubed(Coulomb): """The test for the case where two lambda are there and one is not pertubed""" - dHdl = gmx_benzene_coul_dHdl() - dHdl.insert( - 1, - "bound-lambda", - [ - 1.0, - ] - * len(dHdl), - ) - dHdl.insert( - 1, - "bound", - [ - 1.0, - ] - * len(dHdl), - ) + dHdl = Coulomb + dHdl.insert(1, "bound-lambda", [1.0] * len(dHdl)) + dHdl.insert(1, "bound", [1.0] * len(dHdl)) dHdl.set_index("bound-lambda", append=True, inplace=True) estimator = TI().fit(dHdl) assert all([isinstance(dhdl, pd.Series) for dhdl in estimator.separate_dhdl()]) - assert [len(dhdl) for dhdl in estimator.separate_dhdl()] == [ - 5, - ] + assert [len(dhdl) for dhdl in estimator.separate_dhdl()] == [5] class Test_Units: """Test the units.""" - @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - bz = load_benzene().data - dHdl_coul = alchemlyb.concat( - [extract_dHdl(xvg, T=300) for xvg in bz["Coulomb"]] - ) - return dHdl_coul - - def test_ti(self, dhdl): - ti = TI().fit(dhdl) + def test_ti(self, Coulomb): + ti = TI().fit(Coulomb) assert ti.delta_f_.attrs["temperature"] == 300 assert ti.delta_f_.attrs["energy_unit"] == "kT" assert ti.d_delta_f_.attrs["temperature"] == 300 @@ -241,8 +157,8 @@ def test_ti(self, dhdl): assert ti.dhdl.attrs["temperature"] == 300 assert ti.dhdl.attrs["energy_unit"] == "kT" - def test_ti_separate_dhdl(self, dhdl): - ti = TI().fit(dhdl) + def test_ti_separate_dhdl(self, Coulomb): + ti = TI().fit(Coulomb) dhdl_list = ti.separate_dhdl() for dhdl in dhdl_list: assert dhdl.attrs["temperature"] == 300 @@ -253,10 +169,9 @@ class Test_MultipleColumnUnits: """Test the case where the index has multiple columns""" @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - data = load_ABFE()["data"]["complex"] - dhdl = alchemlyb.concat([extract_dHdl(data[i], 300) for i in range(30)]) + @pytest.fixture + def dhdl(gmx_ABFE_complex_dHdl): + dhdl = alchemlyb.concat(gmx_ABFE_complex_dHdl) return dhdl def test_ti_separate_dhdl(self, dhdl): diff --git a/src/alchemlyb/tests/test_units.py b/src/alchemlyb/tests/test_units.py index 6fcfe5b9..e3c11ea4 100644 --- a/src/alchemlyb/tests/test_units.py +++ b/src/alchemlyb/tests/test_units.py @@ -1,10 +1,8 @@ import pandas as pd import pytest -from alchemtest.gmx import load_benzene import alchemlyb from alchemlyb import pass_attrs -from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk from alchemlyb.postprocessors.units import to_kT from alchemlyb.preprocessing import ( dhdl2series, @@ -17,22 +15,28 @@ ) -def test_noT(): +@pytest.fixture +def dHdl(gmx_benzene_Coulomb_dHdl): + return gmx_benzene_Coulomb_dHdl[0] + + +@pytest.fixture +def u_nk(gmx_benzene_Coulomb_u_nk): + return gmx_benzene_Coulomb_u_nk[0] + + +def test_noT(dHdl): """Test no temperature error""" - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - dhdl.attrs.pop("temperature", None) + dHdl.attrs.pop("temperature", None) with pytest.raises(TypeError): - to_kT(dhdl) + to_kT(dHdl) -def test_nounit(): +def test_nounit(dHdl): """Test no unit error""" - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - dhdl.attrs.pop("energy_unit", None) + dHdl.attrs.pop("energy_unit", None) with pytest.raises(TypeError): - to_kT(dhdl) + to_kT(dHdl) def test_concat(): @@ -63,45 +67,38 @@ def test_setT(): class Test_Conversion: """Test the preprocessing module.""" - @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - return dhdl - - def test_kt2kt_number(self, dhdl): - new_dhdl = to_kT(dhdl) + def test_kt2kt_number(self, dHdl): + new_dhdl = to_kT(dHdl) assert 12.9 == pytest.approx(new_dhdl.loc[(0.0, 0.0)], 0.1) - def test_kt2kt_unit(self, dhdl): - new_dhdl = to_kT(dhdl) + def test_kt2kt_unit(self, dHdl): + new_dhdl = to_kT(dHdl) assert new_dhdl.attrs["energy_unit"] == "kT" - def test_kj2kt_unit(self, dhdl): - dhdl.attrs["energy_unit"] = "kJ/mol" - new_dhdl = to_kT(dhdl) + def test_kj2kt_unit(self, dHdl): + dHdl.attrs["energy_unit"] = "kJ/mol" + new_dhdl = to_kT(dHdl) assert new_dhdl.attrs["energy_unit"] == "kT" - def test_kj2kt_number(self, dhdl): - dhdl.attrs["energy_unit"] = "kJ/mol" - new_dhdl = to_kT(dhdl) + def test_kj2kt_number(self, dHdl): + dHdl.attrs["energy_unit"] = "kJ/mol" + new_dhdl = to_kT(dHdl) assert 5.0 == pytest.approx(new_dhdl.loc[(0.0, 0.0)], 0.1) - def test_kcal2kt_unit(self, dhdl): - dhdl.attrs["energy_unit"] = "kcal/mol" - new_dhdl = to_kT(dhdl) + def test_kcal2kt_unit(self, dHdl): + dHdl.attrs["energy_unit"] = "kcal/mol" + new_dhdl = to_kT(dHdl) assert new_dhdl.attrs["energy_unit"] == "kT" - def test_kcal2kt_number(self, dhdl): - dhdl.attrs["energy_unit"] = "kcal/mol" - new_dhdl = to_kT(dhdl) + def test_kcal2kt_number(self, dHdl): + dHdl.attrs["energy_unit"] = "kcal/mol" + new_dhdl = to_kT(dHdl) assert 21.0 == pytest.approx(new_dhdl.loc[(0.0, 0.0)], 0.1) - def test_unknown2kt(self, dhdl): - dhdl.attrs["energy_unit"] = "ddd" + def test_unknown2kt(self, dHdl): + dHdl.attrs["energy_unit"] = "ddd" with pytest.raises(ValueError): - to_kT(dhdl) + to_kT(dHdl) def test_pd_concat(): @@ -146,30 +143,16 @@ class TestRetainUnit: """This test tests if the functions that should retain the unit would actually retain the units.""" - @staticmethod - @pytest.fixture(scope="class") - def dhdl(): - dataset = load_benzene() - dhdl = extract_dHdl(dataset["data"]["Coulomb"][0], 310) - return dhdl - - @staticmethod - @pytest.fixture(scope="class") - def u_nk(): - dataset = load_benzene() - u_nk = extract_u_nk(dataset["data"]["Coulomb"][0], 310) - return u_nk - @pytest.mark.parametrize( "func,fixture_in", [ - (dhdl2series, "dhdl"), + (dhdl2series, "dHdl"), (u_nk2series, "u_nk"), (decorrelate_u_nk, "u_nk"), - (decorrelate_dhdl, "dhdl"), - (slicing, "dhdl"), - (statistical_inefficiency, "dhdl"), - (equilibrium_detection, "dhdl"), + (decorrelate_dhdl, "dHdl"), + (slicing, "dHdl"), + (statistical_inefficiency, "dHdl"), + (equilibrium_detection, "dHdl"), ], ) def test_function(self, func, fixture_in, request): diff --git a/src/alchemlyb/tests/test_visualisation.py b/src/alchemlyb/tests/test_visualisation.py index 8866822b..c8f522f9 100644 --- a/src/alchemlyb/tests/test_visualisation.py +++ b/src/alchemlyb/tests/test_visualisation.py @@ -8,17 +8,15 @@ import alchemlyb from alchemlyb.convergence import forward_backward_convergence from alchemlyb.estimators import MBAR, TI, BAR -from alchemlyb.parsing.gmx import extract_u_nk, extract_dHdl from alchemlyb.visualisation import plot_convergence from alchemlyb.visualisation.dF_state import plot_dF_state from alchemlyb.visualisation.mbar_matrix import plot_mbar_overlap_matrix from alchemlyb.visualisation.ti_dhdl import plot_ti_dhdl -def test_plot_mbar_omatrix(): +def test_plot_mbar_omatrix(gmx_benzene_Coulomb_u_nk): """Just test if the plot runs""" - bz = load_benzene().data - u_nk_coul = alchemlyb.concat([extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]]) + u_nk_coul = alchemlyb.concat(gmx_benzene_Coulomb_u_nk) mbar_coul = MBAR() mbar_coul.fit(u_nk_coul) @@ -42,10 +40,9 @@ def test_plot_mbar_omatrix(): assert isinstance(plot_mbar_overlap_matrix(overlap_maxtrix), matplotlib.axes.Axes) -def test_plot_ti_dhdl(): +def test_plot_ti_dhdl(gmx_benzene_Coulomb_dHdl, gmx_benzene_VDW_dHdl): """Just test if the plot runs""" - bz = load_benzene().data - dHdl_coul = alchemlyb.concat([extract_dHdl(xvg, T=300) for xvg in bz["Coulomb"]]) + dHdl_coul = alchemlyb.concat(gmx_benzene_Coulomb_dHdl) ti_coul = TI() ti_coul.fit(dHdl_coul) @@ -61,7 +58,7 @@ def test_plot_ti_dhdl(): ) plt.close(fig) - dHdl_vdw = alchemlyb.concat([extract_dHdl(xvg, T=300) for xvg in bz["VDW"]]) + dHdl_vdw = alchemlyb.concat(gmx_benzene_VDW_dHdl) ti_vdw = TI().fit(dHdl_vdw) ax = plot_ti_dhdl([ti_coul, ti_vdw]) assert isinstance(ax, matplotlib.axes.Axes) @@ -76,13 +73,18 @@ def test_plot_ti_dhdl(): plt.close(ax.figure) -def test_plot_dF_state(): +def test_plot_dF_state( + gmx_benzene_Coulomb_dHdl, + gmx_benzene_Coulomb_u_nk, + gmx_benzene_VDW_u_nk, + gmx_benzene_VDW_dHdl, +): """Just test if the plot runs""" bz = load_benzene().data - u_nk_coul = alchemlyb.concat([extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]]) - dHdl_coul = alchemlyb.concat([extract_dHdl(xvg, T=300) for xvg in bz["Coulomb"]]) - u_nk_vdw = alchemlyb.concat([extract_u_nk(xvg, T=300) for xvg in bz["VDW"]]) - dHdl_vdw = alchemlyb.concat([extract_dHdl(xvg, T=300) for xvg in bz["VDW"]]) + u_nk_coul = alchemlyb.concat(gmx_benzene_Coulomb_u_nk) + dHdl_coul = alchemlyb.concat(gmx_benzene_Coulomb_dHdl) + u_nk_vdw = alchemlyb.concat(gmx_benzene_VDW_u_nk) + dHdl_vdw = alchemlyb.concat(gmx_benzene_VDW_dHdl) ti_coul = TI().fit(dHdl_coul) ti_vdw = TI().fit(dHdl_vdw) @@ -144,10 +146,8 @@ def test_plot_dF_state(): plt.close(fig) -def test_plot_convergence_dataframe(): - bz = load_benzene().data - data_list = [extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]] - df = forward_backward_convergence(data_list, "MBAR") +def test_plot_convergence_dataframe(gmx_benzene_Coulomb_u_nk): + df = forward_backward_convergence(gmx_benzene_Coulomb_u_nk, "MBAR") ax = plot_convergence(df) assert isinstance(ax, matplotlib.axes.Axes) plt.close(ax.figure) @@ -168,9 +168,8 @@ def test_plot_convergence_dataframe_noerr(): plt.close(ax.figure) -def test_plot_convergence(): - bz = load_benzene().data - data_list = [extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]] +def test_plot_convergence(gmx_benzene_Coulomb_u_nk): + data_list = gmx_benzene_Coulomb_u_nk forward = [] forward_error = [] backward = [] @@ -205,17 +204,11 @@ def test_plot_convergence(): class Test_Units: @staticmethod - @pytest.fixture(scope="class") - def estimaters(): - bz = load_benzene().data - dHdl_coul = alchemlyb.concat( - [extract_dHdl(xvg, T=300) for xvg in bz["Coulomb"]] - ) + @pytest.fixture() + def estimaters(gmx_benzene_Coulomb_dHdl, gmx_benzene_Coulomb_u_nk): + dHdl_coul = alchemlyb.concat(gmx_benzene_Coulomb_dHdl) ti = TI().fit(dHdl_coul) - - u_nk_coul = alchemlyb.concat( - [extract_u_nk(xvg, T=300) for xvg in bz["Coulomb"]] - ) + u_nk_coul = alchemlyb.concat(gmx_benzene_Coulomb_u_nk) mbar = MBAR().fit(u_nk_coul) return ti, mbar