Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up the iloc in the tests #254

Merged
merged 16 commits into from
Dec 6, 2022
1 change: 1 addition & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ The rules for this file:
* 1.0.1

Fixes
- Remove most of the iloc in the tests (issue #202, PR #254).
- AMBER parser now raises ValueError when the initial simulation time
is not found (issue #272, PR #273).
- The regex in the AMBER parser now reads also 'field=value' pairs where
Expand Down
11 changes: 6 additions & 5 deletions src/alchemlyb/tests/parsing/test_gmx.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_u_nk_with_total_energy():

# Check one specific value in the dataframe
assert_almost_equal(
extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
extract_u_nk(dataset['data']['AllStates'][0], T=300).loc[0][(0.0,0.0)].values[0],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PEP8 and using .loc

Suggested change
extract_u_nk(dataset['data']['AllStates'][0], T=300).loc[0][(0.0,0.0)].values[0],
extract_u_nk(dataset['data']['AllStates'][0], T=300).loc[0, (0.0, 0.0)].values[0],

-11211.577658852531,
decimal=6
)
Expand All @@ -142,7 +142,7 @@ def test_u_nk_with_potential_energy():

# Check one specific value in the dataframe
assert_almost_equal(
extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
extract_u_nk(dataset['data']['AllStates'][0], T=300).loc[0][(0.0,0.0)].values[0],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above

-15656.557252200757,
decimal=6
)
Expand All @@ -161,7 +161,7 @@ def test_u_nk_without_energy():

# Check one specific value in the dataframe
assert_almost_equal(
extract_u_nk(dataset['data']['AllStates'][0], T=300).iloc[0][0],
extract_u_nk(dataset['data']['AllStates'][0], T=300).loc[0][(0.0,0.0)].values[0],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see above

0.0,
decimal=6
)
Expand All @@ -180,8 +180,9 @@ def _diag_sum(dataset):
u_nk = extract_u_nk(filename, T=300)

# Calculate the sum of diagonal elements:
for i in range(len(dataset['data'][leg])):
ds += u_nk.iloc[i][i]
for i, lambda_ in enumerate(u_nk.columns):
#18.6 is the time step
ds += u_nk.loc[i*186/10][lambda_].values[0]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a proper .loc, can't we use

Suggested change
ds += u_nk.loc[i*186/10][lambda_].values[0]
ds += u_nk.loc[i*186/10, lambda_].values[0]

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a proper .loc.
the .loc[1.0] could in theory match both dataframe with row index (1.0, 0.0, 0.0) and (1.0).
But .loc[1.0, :] would only strictly match (1.0).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, after looking at the data I see that this is a multiindex. But in this case wouldn't be

ds += u_nk.loc[i*186/10].loc[lambda_].values[0]

be cleaner?

(I have no idea how to get something like ".loc[lambda_, (0.0, 0.0)]" instead of the values[0] but I find pandas indexing confusing...)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we cannot get lambda_ beforehand and the lambda_ could be (0.0, 0.0) or (0.0, 0.0, 0.0), which is why for some of them, I could only do iloc.
In some sense, we could get them by analysing the dataframe but that would make the unit test too complicated so I think the current form is a good balance.


return ds

Expand Down
38 changes: 11 additions & 27 deletions src/alchemlyb/tests/test_convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,37 +18,21 @@ def test_convergence_ti(gmx_benzene):
dHdl, u_nk = gmx_benzene
convergence = forward_backward_convergence(dHdl, 'TI')
assert convergence.shape == (10, 5)
assert convergence.iloc[0, 0] == pytest.approx(3.07, 0.01)
assert convergence.iloc[0, 2] == pytest.approx(3.11, 0.01)
assert convergence.iloc[-1, 0] == pytest.approx(3.09, 0.01)
assert convergence.iloc[-1, 2] == pytest.approx(3.09, 0.01)

def test_convergence_mbar(gmx_benzene):
dHdl, u_nk = gmx_benzene
convergence = forward_backward_convergence(u_nk, 'MBAR')
assert convergence.shape == (10, 5)
assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01)
assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01)
assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01)
assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01)

def test_convergence_autombar(gmx_benzene):
dHdl, u_nk = gmx_benzene
convergence = forward_backward_convergence(u_nk, 'MBAR')
assert convergence.shape == (10, 5)
assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01)
assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01)
assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01)
assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01)
assert convergence.loc[0, 'Forward'] == pytest.approx(3.07, 0.01)
assert convergence.loc[0, 'Backward'] == pytest.approx(3.11, 0.01)
assert convergence.loc[9, 'Forward'] == pytest.approx(3.09, 0.01)
assert convergence.loc[9, 'Backward'] == pytest.approx(3.09, 0.01)

def test_convergence_bar(gmx_benzene):
@pytest.mark.parametrize('estimator', ['MBAR', 'BAR'])
def test_convergence_fep(gmx_benzene, estimator):
dHdl, u_nk = gmx_benzene
convergence = forward_backward_convergence(u_nk, 'BAR')
convergence = forward_backward_convergence(u_nk, estimator)
assert convergence.shape == (10, 5)
assert convergence.iloc[0, 0] == pytest.approx(3.02, 0.01)
assert convergence.iloc[0, 2] == pytest.approx(3.06, 0.01)
assert convergence.iloc[-1, 0] == pytest.approx(3.05, 0.01)
assert convergence.iloc[-1, 2] == pytest.approx(3.04, 0.01)
assert convergence.loc[0, 'Forward'] == pytest.approx(3.02, 0.01)
assert convergence.loc[0, 'Backward'] == pytest.approx(3.06, 0.01)
assert convergence.loc[9, 'Forward'] == pytest.approx(3.05, 0.01)
assert convergence.loc[9, 'Backward'] == pytest.approx(3.04, 0.01)

def test_convergence_wrong_estimator(gmx_benzene):
dHdl, u_nk = gmx_benzene
Expand Down
6 changes: 5 additions & 1 deletion src/alchemlyb/tests/test_fep_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def compare_delta_f(self, X_delta_f):
assert X_delta_f[2] == pytest.approx(d_delta_f, rel=1e-3)

def get_delta_f(self, est):
# Use .iloc[0, -1] as we want to cater for both
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for the comment

# delta_f_.loc[0.0, 1.0] and delta_f_.loc[(0.0, 0.0), (0.0, 1.0)]
return est.delta_f_.iloc[0, -1], est.d_delta_f_.iloc[0, -1]


Expand Down Expand Up @@ -181,7 +183,7 @@ def test_failback_adaptive(self, n_uk_list):
# The hybr will fail on this while adaptive will work
mbar = AutoMBAR().fit(alchemlyb.concat([n_uk[:2] for n_uk in
n_uk_list]))
assert np.isclose(mbar.d_delta_f_.iloc[0, -1], 1.76832, 0.1)
assert np.isclose(mbar.d_delta_f_[(0.0, 0.0, 0.0)][(1.0, 1.0, 1.0)], 1.76832, 0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we write this as

Suggested change
assert np.isclose(mbar.d_delta_f_[(0.0, 0.0, 0.0)][(1.0, 1.0, 1.0)], 1.76832, 0.1)
assert np.isclose(mbar.d_delta_f_[(0.0, 0.0, 0.0), (1.0, 1.0, 1.0)], 1.76832, 0.1)


def test_AutoMBAR_BGFS():
# A case where only BFGS would work
Expand Down Expand Up @@ -227,6 +229,8 @@ def get_delta_f(self, est):

for i in range(len(est.d_delta_f_) - 1):
ee += est.d_delta_f_.values[i][i+1]**2
# Use .iloc[0, -1] as we want to cater for both
# delta_f_.loc[0.0, 1.0] and delta_f_.loc[(0.0, 0.0), (0.0, 1.0)]
return est.delta_f_.iloc[0, -1], ee**0.5

class Test_Units():
Expand Down
8 changes: 4 additions & 4 deletions src/alchemlyb/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ def test_disordered_exception(self, data):
"""Test that a shuffled DataFrame yields a KeyError.

"""
indices = np.arange(len(data))
indices = data.index.values
np.random.shuffle(indices)

df = data.iloc[indices]
df = data.loc[indices]

with pytest.raises(KeyError):
self.slicer(df, lower=200)
Expand Down Expand Up @@ -246,7 +246,7 @@ def test_subsampling(self, data, size):
"""Basic test for execution; resulting size of dataset sensitive to
machine and depends on algorithm.
"""
assert len(self.slicer(data, series=data.iloc[:, 0])) <= size
assert len(self.slicer(data, series=data.loc[:, data.columns[0]])) <= size

@pytest.mark.parametrize('data', [gmx_benzene_dHdl(),
gmx_benzene_u_nk()])
Expand All @@ -273,7 +273,7 @@ def slicer(self, *args, **kwargs):
(False, gmx_benzene_u_nk(), 3571),
])
def test_conservative(self, data, size, conservative):
sliced = self.slicer(data, series=data.iloc[:, 0], conservative=conservative)
sliced = self.slicer(data, series=data.loc[:, data.columns[0]], conservative=conservative)
# results can vary slightly with different machines
# so possibly do
# delta = 10
Expand Down
2 changes: 2 additions & 0 deletions src/alchemlyb/tests/test_ti_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ class TIestimatorMixin:
def test_get_delta_f(self, X_delta_f):
dHdl, E, dE = X_delta_f
est = self.cls().fit(dHdl)
# Use .iloc[0, -1] as we want to cater for both
# delta_f_.loc[0.0, 1.0] and delta_f_.loc[(0.0, 0.0), (0.0, 1.0)]
delta_f = est.delta_f_.iloc[0, -1]
d_delta_f = est.d_delta_f_.iloc[0, -1]

Expand Down
6 changes: 3 additions & 3 deletions src/alchemlyb/tests/test_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def dhdl():

def test_kt2kt_number(self, dhdl):
new_dhdl = to_kT(dhdl)
assert 12.9 == pytest.approx(new_dhdl.iloc[0, 0], 0.1)
assert 12.9 == pytest.approx(new_dhdl.loc[(0.0,0.0)], 0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8 space after comma

Suggested change
assert 12.9 == pytest.approx(new_dhdl.loc[(0.0,0.0)], 0.1)
assert 12.9 == pytest.approx(new_dhdl.loc[(0.0, 0.0)], 0.1)


def test_kt2kt_unit(self, dhdl):
new_dhdl = to_kT(dhdl)
Expand All @@ -74,7 +74,7 @@ def test_kj2kt_unit(self, dhdl):
def test_kj2kt_number(self, dhdl):
dhdl.attrs['energy_unit'] = 'kJ/mol'
new_dhdl = to_kT(dhdl)
assert 5.0 == pytest.approx(new_dhdl.iloc[0, 0], 0.1)
assert 5.0 == pytest.approx(new_dhdl.loc[(0.0,0.0)], 0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8 space after comma


def test_kcal2kt_unit(self, dhdl):
dhdl.attrs['energy_unit'] = 'kcal/mol'
Expand All @@ -84,7 +84,7 @@ def test_kcal2kt_unit(self, dhdl):
def test_kcal2kt_number(self, dhdl):
dhdl.attrs['energy_unit'] = 'kcal/mol'
new_dhdl = to_kT(dhdl)
assert 21.0 == pytest.approx(new_dhdl.iloc[0, 0], 0.1)
assert 21.0 == pytest.approx(new_dhdl.loc[(0.0,0.0)], 0.1)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8 space after comma


def test_unknown2kt(self, dhdl):
dhdl.attrs['energy_unit'] = 'ddd'
Expand Down
8 changes: 4 additions & 4 deletions src/alchemlyb/tests/test_visualisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,13 +158,13 @@ def test_plot_convergence():
slice = int(len(data_list[0])/num_points*i)
u_nk_coul = alchemlyb.concat([data[:slice] for data in data_list])
estimate = MBAR().fit(u_nk_coul)
forward.append(estimate.delta_f_.iloc[0,-1])
forward_error.append(estimate.d_delta_f_.iloc[0,-1])
forward.append(estimate.delta_f_.loc[0.0,1.0])
forward_error.append(estimate.d_delta_f_.loc[0.0,1.0])
Comment on lines +161 to +162
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8 space after comma

# Do the backward
u_nk_coul = alchemlyb.concat([data[-slice:] for data in data_list])
estimate = MBAR().fit(u_nk_coul)
backward.append(estimate.delta_f_.iloc[0,-1])
backward_error.append(estimate.d_delta_f_.iloc[0,-1])
backward.append(estimate.delta_f_.loc[0.0,1.0])
backward_error.append(estimate.d_delta_f_.loc[0.0,1.0])
Comment on lines +166 to +167
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pep8 space after comma


df = pd.DataFrame(data={'Forward': forward,
'Forward_Error': forward_error,
Expand Down