From 4c4a510b7d6ba8ba739d7b6295726b19ef55ae7e Mon Sep 17 00:00:00 2001 From: Sean Seyler Date: Fri, 5 Apr 2019 11:29:19 -0700 Subject: [PATCH] improved PSA module - fix #1006 - Updated docs for fit_to_reference; added doc for postfix parameter and note about prefix - Added prefix parameter to Path.run to match optional prefix parameter accepted by fit_to_reference. Updated docs for Path.run() to match Path.fit_to_reference. - Code to pass prefix parameter in Path.run to Path.fit_to_reference added - Fixed typo in PSAnalysis.generate_paths referring incorrectly to directory for fitted trajectories as trj_fit rather than fitted_trajs - Added default_path_basename parameter to PSAnalysis and inits to "path" - Added default prefix in PSAnalysis for fitted paths and trajectories; fixed save_paths to use default prefix and path basename if prefix and filename are not provided - test_load now passes pytest - Updated changelog with fix to PSA --- package/CHANGELOG | 5 +- package/MDAnalysis/analysis/psa.py | 65 +++++++++++++------ .../MDAnalysisTests/analysis/test_psa.py | 1 - 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index 4b08daeafae..1c941f350e7 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/yy micaela-matta, xiki-tempula, zemanj, mattwthompson, orbeckst, aliehlen, dpadula85, jbarnoud, manuel.nuno.melo, richardjgowers, mattwthompson, - ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith + ayushsuhane, picocentauri, NinadBhat, bieniekmateusz, p-j-smith, sseyler * 0.20.0 @@ -52,7 +52,7 @@ Enhancements * added functionality to write files in compressed form(gz,bz2). (Issue #2216, PR #2221) * survival probability additions: residues, intermittency, step with performance, - (PR #2226) + (PR #2226) Changes * added official support for Python 3.7 (PR #1963) @@ -64,6 +64,7 @@ Changes * changed fudge_factor in guess_bonds to deal with new vdw radii (#2138, PR #2142) Fixes + * Fixed default saving and loading behavior of Path and PSAnalysis (Issue #2049) * fixed the segmentation fault in capped_distances (Issue #2164, PR #2169) * fixed gcc support in MacOS (Issue #2162, PR #2163) * fixed error when reading bonds/angles/dihedrals from gsd file (Issue #2152, diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py index d0d8d36d3d6..99d2ae3fcc6 100644 --- a/package/MDAnalysis/analysis/psa.py +++ b/package/MDAnalysis/analysis/psa.py @@ -837,10 +837,10 @@ def __init__(self, universe, reference, ref_select='name CA', self.natoms = None - def fit_to_reference(self, filename=None, prefix='', postfix='_fit', + def fit_to_reference(self, filename=None, prefix='psafit_', postfix='', rmsdfile=None, targetdir=os.path.curdir, weights=None, tol_mass=0.1): - """Align each trajectory frame to the reference structure + """Align each trajectory frame to the reference structure. Parameters ---------- @@ -849,7 +849,11 @@ def fit_to_reference(self, filename=None, prefix='', postfix='_fit', original trajectory filename (from :attr:`Path.u_original`) with `prefix` prepended prefix : str (optional) - prefix for auto-generating the new output filename + prefix for output filenames (sets optional `prefix` parameter in + :class:`MDAnalysis.analysis.align.AlignTraj`) + postfix : str (optional) + postfix for output filenames (appended to the filename to be given to + :class:`MDAnalysis.analysis.align.AlignTraj`) rmsdfile : str (optional) file name for writing the RMSD time series [``None``] weights : {"mass", ``None``} or array_like (optional) @@ -949,7 +953,7 @@ def to_path(self, fitted=False, select=None, flat=False): return np.array([atoms.positions for _ in frames]) - def run(self, align=False, filename=None, postfix='_fit', rmsdfile=None, + def run(self, align=False, filename=None, prefix='psafit_', postfix='', rmsdfile=None, targetdir=os.path.curdir, weights=None, tol_mass=0.1, flat=False): r"""Generate a path from a trajectory and reference structure. @@ -980,8 +984,12 @@ def run(self, align=False, filename=None, postfix='_fit', rmsdfile=None, filename for the RMS-fitted trajectory or pdb; defaults to the original trajectory filename (from :attr:`Path.u_original`) with *prefix* prepended + prefix : str (optional) + prefix for output filenames (passed to optional `prefix` parameter in + :class:`MDAnalysis.analysis.align.AlignTraj`) postfix : str (optional) - prefix for auto-generating the new output filename + postfix for output filenames (appended to the filename given to + :class:`MDAnalysis.analysis.align.AlignTraj`) rmsdfile : str (optional) file name for writing the RMSD time series [``None``] weights : {"mass", ``None``} or array_like (optional) @@ -1013,7 +1021,7 @@ def run(self, align=False, filename=None, postfix='_fit', rmsdfile=None, """ if align: self.u_fitted = self.fit_to_reference( - filename=filename, postfix=postfix, + filename=filename, prefix=prefix, postfix=postfix, rmsdfile=rmsdfile, targetdir=targetdir, weights=weights, tol_mass=0.1) self.path = self.to_path(fitted=align, flat=flat) @@ -1373,6 +1381,8 @@ def __init__(self, universes, reference=None, ref_select='name CA', trj_names.append(filename) self.trj_names = trj_names self.fit_trj_names = None + self.default_fitted_path_prefix = 'psafit_' + self.default_path_basename = 'path' self.path_names = None self.top_name = self.universes[0].filename if len(universes) != 0 else None self.labels = labels or self.trj_names @@ -1401,8 +1411,9 @@ def __init__(self, universes, reference=None, ref_select='name CA', self._psa_pairs = None # (distance vector order) list of all PSAPairs - def generate_paths(self, align=False, filename='fitted', infix='', weights=None, - tol_mass=False, ref_frame=None, flat=False, save=True, store=True): + def generate_paths(self, align=False, filename=None, prefix=None, postfix='', + weights=None, tol_mass=False, ref_frame=None, flat=False, + save=True, store=True): """Generate paths, aligning each to reference structure if necessary. Parameters @@ -1411,10 +1422,13 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, Align trajectories to atom selection :attr:`PSAnalysis.ref_select` of :attr:`PSAnalysis.u_reference` [``False``] filename : str - strings representing base filename for fitted trajectories and - paths [``None``] - infix : str - additional tag string that is inserted into the output filename of + string representing base filename for fitted paths and trajectories + files [``None``] + prefix : str + additional tag string that is prepended to the output filename of + the fitted paths and trajectory files [''] + postfix : str + additional tag string that is appended to the output filename of the fitted trajectory files [''] weights : {"mass", ``None``} or array_like (optional) choose weights. With ``"mass"`` uses masses as weights; with @@ -1441,7 +1455,7 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, The fitted trajectories are written to new files in the - "/trj_fit" subdirectory in :attr:`PSAnalysis.targetdir` named + "/fitted_trajs" subdirectory in :attr:`PSAnalysis.targetdir` named "filename(*trajectory*)XXX*infix*_psa", where "XXX" is a number between 000 and 999; the extension of each file is the same as its original. Optionally, the trajectories can also be saved in numpy compressed npz @@ -1460,15 +1474,17 @@ def generate_paths(self, align=False, filename='fitted', infix='', weights=None, if ref_frame is None: ref_frame = self.ref_frame + prefix = prefix or self.default_fitted_path_prefix + filename = filename or self.default_path_basename paths = [] fit_trj_names = [] for i, u in enumerate(self.universes): p = Path(u, self.u_reference, ref_select=self.ref_select, path_select=self.path_select, ref_frame=ref_frame) trj_dir = os.path.join(self.targetdir, self.datadirs['fitted_trajs']) - postfix = '{0}{1}{2:03n}'.format(infix, '_psa', i+1) + postfix = '{0}{1:03n}'.format(postfix, i+1) top_name, fit_trj_name = p.run(align=align, filename=filename, - postfix=postfix, + prefix=prefix, postfix=postfix, targetdir=trj_dir, weights=weights, tol_mass=tol_mass, flat=flat) @@ -1646,7 +1662,7 @@ def save_result(self, filename=None): return filename - def save_paths(self, filename=None): + def save_paths(self, filename=None, prefix=None): """Save fitted :attr:`PSAnalysis.paths` to numpy compressed npz files. The data are saved with :func:`numpy.savez_compressed` in the directory @@ -1654,8 +1670,12 @@ def save_paths(self, filename=None): Parameters ---------- - filename : str - specifies filename [``None``] + filename : str (optional) + specifies filename; defaults to default path basename + [``None``] + prefix : str (optional) + prefix for output filenames of paths; defaults to default path prefix + [``None``] Returns ------- @@ -1666,9 +1686,12 @@ def save_paths(self, filename=None): load """ - filename = filename or 'path_psa' + + prefix = prefix or self.default_fitted_path_prefix + filename = filename or self.default_path_basename head = os.path.join(self.targetdir, self.datadirs['paths']) - outfile = os.path.join(head, filename) + tail = prefix + filename + outfile = os.path.join(head, tail) if self.paths is None: raise NoDataError("Paths have not been calculated yet") path_names = [] @@ -1680,7 +1703,7 @@ def save_paths(self, filename=None): self.path_names = path_names with open(self._paths_pkl, 'wb') as output: cPickle.dump(self.path_names, output) - return filename + return tail def load(self): diff --git a/testsuite/MDAnalysisTests/analysis/test_psa.py b/testsuite/MDAnalysisTests/analysis/test_psa.py index f3f5b553b0f..2f0722f71e8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_psa.py +++ b/testsuite/MDAnalysisTests/analysis/test_psa.py @@ -167,7 +167,6 @@ def test_nearest_neighbors(self, psa): psa.run_pairs_analysis(neighbors=True) assert len(psa.nearest_neighbors) == 3 - @pytest.mark.xfail def test_load(self, psa): """Test that the automatically saved files can be loaded""" expected_path_names = psa.path_names[:]