Skip to content

Commit

Permalink
test for correct TIP3P resname in hbond analysis table (#801)
Browse files Browse the repository at this point in the history
  • Loading branch information
orbeckst committed May 10, 2017
1 parent 1c8f87d commit 0764299
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 19 deletions.
41 changes: 24 additions & 17 deletions package/MDAnalysis/analysis/hbonds/hbond_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_
elif self.selection1_type not in ('both', 'donor', 'acceptor'):
raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format(self.selection1_type))

self.timeseries = None # final result
self._timeseries = None # final result accessed as self.timeseries
self.timesteps = None # time for each frame

self.table = None # placeholder for output table
Expand Down Expand Up @@ -898,7 +898,7 @@ def run(self, **kwargs):
if not self.debug:
logger.debug("HBond analysis: For full step-by-step debugging output use debug=True")

self.timeseries = []
self._timeseries = []
self.timesteps = []

logger.info("checking trajectory...") # n_frames can take a while!
Expand Down Expand Up @@ -968,8 +968,8 @@ def _get_timestep():
#self.logger_debug("S1-D: %r <-> S2-A: %r %f A, %f DEG" % (h, a, dist, angle))
frame_results.append(
[h.index + 1, a.index + 1, h.index, a.index,
'{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name),
'{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name),
(h.resname, h.resid, h.name),
(a.resname, a.resid, a.name),
dist, angle])

already_found[(h.index + 1, a.index + 1)] = True
Expand All @@ -994,11 +994,11 @@ def _get_timestep():
#self.logger_debug("S1-A: %r <-> S2-D: %r %f A, %f DEG" % (a, h, dist, angle))
frame_results.append(
[h.index + 1, a.index + 1, h.index, a.index,
'{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name),
'{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name),
(h.resname, h.resid, h.name),
(a.resname, a.resid, a.name),
dist, angle])

self.timeseries.append(frame_results)
self._timeseries.append(frame_results)

logger.info("HBond analysis: complete; timeseries with %d hbonds in %s.timeseries",
self.count_by_time().count.sum(), self.__class__.__name__)
Expand All @@ -1017,6 +1017,12 @@ def calc_eucl_distance(a1, a2):
"""Calculate the Euclidean distance between two atoms. """
return norm(a2.position - a1.position)

@property
def timeseries(self):
"""Time series of hydrogen bonds."""

return self._timeseries

def generate_table(self):
"""Generate a normalised table of the results.
Expand All @@ -1042,13 +1048,13 @@ def generate_table(self):
.. _recsql: http://pypi.python.org/pypi/RecSQL
"""
if self.timeseries is None:
if self._timeseries is None:
msg = "No timeseries computed, do run() first."
warnings.warn(msg, category=MissingDataWarning)
logger.warn(msg)
return

num_records = np.sum([len(hframe) for hframe in self.timeseries])
num_records = np.sum([len(hframe) for hframe in self._timeseries])
# build empty output table
dtype = [
("time", float), ("donor_idx", int), ("acceptor_idx", int),
Expand All @@ -1060,11 +1066,12 @@ def generate_table(self):
# and speedups of ~x10 can be achieved by filling a standard array, like this:
out = np.empty((num_records,), dtype=dtype)
cursor = 0 # current row
for t, hframe in zip(self.timesteps, self.timeseries):
for t, hframe in zip(self.timesteps, self._timeseries):
for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
acceptor, distance, angle) in hframe:
# donor|acceptor = (resname, resid, atomid)
out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \
parse_residue(donor) + parse_residue(acceptor) + (distance, angle)
donor + acceptor + (distance, angle)
cursor += 1
assert cursor == num_records, "Internal Error: Not all HB records stored"
self.table = out.view(np.recarray)
Expand All @@ -1089,15 +1096,15 @@ def count_by_time(self):
:Returns: a class:`numpy.recarray`
"""

if self.timeseries is None:
if self._timeseries is None:
msg = "No timeseries computed, do run() first."
warnings.warn(msg, category=MissingDataWarning)
logger.warn(msg)
return

out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)])
for cursor, time_count in enumerate(zip(self.timesteps,
(len(series) for series in self.timeseries))):
(len(series) for series in self._timeseries))):
out[cursor] = time_count
return out.view(np.recarray)

Expand All @@ -1112,14 +1119,14 @@ def count_by_type(self):
:Returns: a class:`numpy.recarray`
"""
if self.timeseries is None:
if self._timeseries is None:
msg = "No timeseries computed, do run() first."
warnings.warn(msg, category=MissingDataWarning)
logger.warn(msg)
return

hbonds = defaultdict(int)
for hframe in self.timeseries:
for hframe in self._timeseries:
for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
acceptor, distance, angle) in hframe:
donor_resnm, donor_resid, donor_atom = parse_residue(donor)
Expand Down Expand Up @@ -1173,14 +1180,14 @@ def timesteps_by_type(self):
:Returns: a class:`numpy.recarray`
"""

if self.timeseries is None:
if self._timeseries is None:
msg = "No timeseries computed, do run() first."
warnings.warn(msg, category=MissingDataWarning)
logger.warn(msg)
return

hbonds = defaultdict(list)
for (t, hframe) in zip(self.timesteps, self.timeseries):
for (t, hframe) in zip(self.timesteps, self._timeseries):
for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
acceptor, distance, angle) in hframe:
donor_resnm, donor_resid, donor_atom = parse_residue(donor)
Expand Down
96 changes: 94 additions & 2 deletions testsuite/MDAnalysisTests/analysis/test_hbonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,17 @@
from MDAnalysis import SelectionError, SelectionWarning

from numpy.testing import (assert_, assert_equal, assert_array_equal,
assert_raises)
assert_almost_equal, assert_array_almost_equal,
assert_raises, dec)
import numpy as np

import itertools
import warnings
from six import StringIO

from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC
from MDAnalysisTests import parser_not_found
from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC, waterPSF, waterDCD

# For type guessing:
from MDAnalysis.topology.core import guess_atom_type
from MDAnalysis.core.topologyattrs import Atomtypes
Expand Down Expand Up @@ -226,3 +229,92 @@ def run_HBA_dynamic_selections(*args):
yield run_HBA_dynamic_selections, s1, s2, s1type
finally:
self._tearDown()


class TestHydrogenBondAnalysisTIP3P(object):
@dec.skipif(parser_not_found('DCD'),
'DCD parser not available. Are you using python 3?')
def setUp(self):
self.universe = u = MDAnalysis.Universe(waterPSF, waterDCD)
self.kwargs = {
'selection1': 'all',
'selection2': 'all',
'detect_hydrogens': "distance",
'distance': 3.0,
'angle': 120.0,
}
self.h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, **self.kwargs)
self.h.run(verbose=False)
self.h.generate_table()
self.normalized_timeseries = self._normalize_timeseries()

# keys are the names in the h.table
self.reference = {
'distance': {'mean': 2.0208776, 'std': 0.31740859},
'angle': {'mean': 155.13521, 'std': 12.98955},
}

# reference values for the table only
self.reference_table = {
'donor_resnm': ["TIP3"] * len(self.normalized_timeseries),
'acceptor_resnm': ["TIP3"] * len(self.normalized_timeseries),
}

# index into timeseries (ADJUST ONCE donor_idx and acceptor_ndx are removed)
# with keys being field names in h.table
self.columns = {
'time': 0,
'donor_idx': 1,
'acceptor_idx': 2,
'donor_index': 3,
'acceptor_index': 4,
'distance': 7,
'angle': 8,
}

# hackish way to allow looping over self.reference and generating tests
self._functions = {
'mean': np.mean,
'std': np.std,
}

def _normalize_timeseries(self):
# timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle)
# array index: 0 1 2 3 4 5 6 7 8
timeseries = [[t] + item
for t, hframe in zip(self.h.timesteps, self.h.timeseries)
for item in hframe]
return timeseries

def test_timeseries(self):
h = self.h
assert_equal(len(h.timeseries), 10)
assert_equal(len(self.normalized_timeseries), 29)

for observable in self.reference:
idx = self.columns[observable]
for quantity, reference in self.reference[observable].items():
func = self._functions[quantity]
assert_almost_equal(
func([item[idx] for item in self.normalized_timeseries]), reference,
decimal=5,
err_msg="{quantity}({observable}) does not match reference".format(**vars()))

def test_table_atoms(self):
h = self.h
table = h.table

assert_equal(len(h.table), len(self.normalized_timeseries))

# test that timeseries and table agree on index data and
# hydrogen bond information at atom level
for name, idx in self.columns.items():
assert_array_almost_equal(table.field(name), [data[idx] for data in self.normalized_timeseries],
err_msg="table[{name}] and timeseries[{idx} do not agree".format(**vars()))

# test at residue level (issue #801
# https://github.com/MDAnalysis/mdanalysis/issues/801)
for name, ref in self.reference_table.items():
assert_array_equal(h.table.field(name), ref,
err_msg="resname for {0} do not match (Issue #801)")

0 comments on commit 0764299

Please sign in to comment.