From a00840e9f033bc614c5b3830243c5ee34ac34ef1 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Wed, 14 Nov 2018 18:16:06 -0600 Subject: [PATCH 01/21] added experimental pickling support to Universe --- package/MDAnalysis/core/universe.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index f869ffad945..b8ec883676d 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -768,11 +768,15 @@ def __repr__(self): return "".format( n_atoms=len(self.atoms)) - def __getstate__(self): - raise NotImplementedError + @classmethod + def _unpickle_U(cls, top, traj, anchor): + u = cls(top, anchor_name=anchor) + u.load_new(traj) + + return u - def __setstate__(self, state): - raise NotImplementedError + def __reduce__(self): + return (self._unpickle_U, (self._topology, self.trajectory.filename, self.anchor_name)) # Properties @property From 133072f90c585f4be47e1a64141457bd1b4dd5b1 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Sat, 17 Nov 2018 16:10:08 -0600 Subject: [PATCH 02/21] fixed pickle test --- testsuite/MDAnalysisTests/core/test_universe.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index 20d5f2a3293..05663c5af6b 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -283,10 +283,14 @@ def test_load_multiple_args(self): assert_equal(len(u.atoms), 3341, "Loading universe failed somehow") assert_equal(u.trajectory.n_frames, 2 * ref.trajectory.n_frames) - def test_pickle_raises_NotImplementedError(self): + def test_pickle(self): u = mda.Universe(PSF, DCD) - with pytest.raises(NotImplementedError): - cPickle.dumps(u, protocol = cPickle.HIGHEST_PROTOCOL) + + s = cPickle.dumps(u, protocol = cPickle.HIGHEST_PROTOCOL) + + new_u = cPickle.loads(s) + + assert_equal(u.atoms.names, new_u.atoms.names) def test_set_dimensions(self): u = mda.Universe(PSF, DCD) From aa595f58500cf1a30afba9b6de0b658a63150eb0 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Wed, 21 Nov 2018 13:50:53 -0600 Subject: [PATCH 03/21] wip of reader pickling --- package/MDAnalysis/coordinates/base.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index d29b741fd75..87fe76be39d 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -2078,6 +2078,20 @@ def __init__(self, filename, convert_units=None, **kwargs): self._ts_kwargs = ts_kwargs + @classmethod + def _unpickle_Reader(cls, filename, timestep, auxs, trans): + new_R = cls(filename) + new_R.add_transformations(trans) + for auxname, auxdata in auxs.items(): + new_R.add_aux(auxname, auxdata) + + def __reduce__(self): + return (self._unpickle_Reader, + (self.filename, self.ts, self._auxs, self._transformations)) + + def __len__(self): + return self.n_frames + def copy(self): """Return independent copy of this Reader. From ba1a13876001e4472914b463432fa3e313bfff24 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Sat, 16 Feb 2019 10:51:59 -0600 Subject: [PATCH 04/21] simplified serialisation support more --- package/MDAnalysis/coordinates/base.py | 11 ----------- package/MDAnalysis/core/universe.py | 15 +++++++++++---- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index 87fe76be39d..0a08e7b35d5 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -2078,17 +2078,6 @@ def __init__(self, filename, convert_units=None, **kwargs): self._ts_kwargs = ts_kwargs - @classmethod - def _unpickle_Reader(cls, filename, timestep, auxs, trans): - new_R = cls(filename) - new_R.add_transformations(trans) - for auxname, auxdata in auxs.items(): - new_R.add_aux(auxname, auxdata) - - def __reduce__(self): - return (self._unpickle_Reader, - (self.filename, self.ts, self._auxs, self._transformations)) - def __len__(self): return self.n_frames diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index b8ec883676d..27e2b7946a1 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -738,7 +738,7 @@ def _gen_anchor_hash(self): return self._anchor_uuid except AttributeError: # store this so we can later recall it if needed - self._anchor_uuid = uuid.uuid4() + self._anchor_uuid = str(uuid.uuid4()) return self._anchor_uuid @property @@ -770,13 +770,20 @@ def __repr__(self): @classmethod def _unpickle_U(cls, top, traj, anchor): - u = cls(top, anchor_name=anchor) - u.load_new(traj) + """Special method used by __reduce__ to deserialise a Universe""" + # top is a Topology object at this point, but Universe can handle that + u = cls(top) + u.anchor_name = anchor + # maybe this is None, but that's still cool + u.trajectory = traj return u def __reduce__(self): - return (self._unpickle_U, (self._topology, self.trajectory.filename, self.anchor_name)) + # Can't quite use __setstate__/__getstate__ so go via __reduce__ + # Universe's two "legs" of topology and traj both serialise themselves + # the only other state held in Universe is anchor name? + return (self._unpickle_U, (self._topology, self._trajectory, self.anchor_name)) # Properties @property From caa588d1394df57d1d3f662be02dc66ae3b70ac1 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Sat, 16 Feb 2019 11:14:30 -0600 Subject: [PATCH 05/21] make AuxReader not seralise (until tested) --- package/MDAnalysis/auxiliary/base.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/package/MDAnalysis/auxiliary/base.py b/package/MDAnalysis/auxiliary/base.py index 50c3c3bf7c9..e769cfde883 100644 --- a/package/MDAnalysis/auxiliary/base.py +++ b/package/MDAnalysis/auxiliary/base.py @@ -307,6 +307,10 @@ def __init__(self, represent_ts_as='closest', auxname=None, cutoff=-1, self.auxstep._dt = self.time - self.initial_time self.rewind() + def __getstate__(self): + # probably works fine, but someone needs to write tests to confirm + return NotImplementedError + def copy(self): raise NotImplementedError("Copy not implemented for AuxReader") From e21361d509457ade46cda96c04083ae3c16101a4 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Sat, 16 Feb 2019 11:15:07 -0600 Subject: [PATCH 06/21] add tests for multiprocessing --- .../MDAnalysisTests/parallelism/__init__.py | 0 .../parallelism/test_multiprocessing.py | 38 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 testsuite/MDAnalysisTests/parallelism/__init__.py create mode 100644 testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py diff --git a/testsuite/MDAnalysisTests/parallelism/__init__.py b/testsuite/MDAnalysisTests/parallelism/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py new file mode 100644 index 00000000000..dfcf57ab3f4 --- /dev/null +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -0,0 +1,38 @@ +"""Test that MDAnalysis plays nicely with multiprocessing + +""" +import multiprocessing +import numpy as np +import pytest + +import MDAnalysis as mda +from MDAnalysisTests.datafiles import ( + PSF, DCD +) + +from numpy.testing import assert_equal + + +@pytest.fixture +def u(): + return mda.Universe(PSF, DCD) + + +def cog(u, ag, frame_id): + u.trajectory[frame_id] + + return ag.center_of_geometry() + + +def test_multiprocess_COM(u): + ag = u.atoms[10:20] + + ref = np.array([cog(u, ag, i) + for i in range(4)]) + + p = multiprocessing.Pool(2) + + res = np.array([p.apply(cog, args=(u, ag, i)) + for i in range(4)]) + + assert_equal(ref, res) From 99bdd08b4cd8ffa329cbe1c75969f3f03f1b94bf Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Sat, 16 Feb 2019 11:24:45 -0600 Subject: [PATCH 07/21] added more formats to multiprocessing tests and broke everything --- .../parallelism/test_multiprocessing.py | 44 ++++++++++++++++--- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index dfcf57ab3f4..7705e8a7823 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -7,32 +7,62 @@ import MDAnalysis as mda from MDAnalysisTests.datafiles import ( - PSF, DCD + PSF, DCD, + GRO, XTC, + PDB, + XYZ, ) from numpy.testing import assert_equal -@pytest.fixture -def u(): - return mda.Universe(PSF, DCD) - +@pytest.fixture(params=[ + (PSF, DCD), + (GRO, XTC), + (PDB,), + (XYZ,), +]) +def u(request): + if len(request.param) == 1: + f = request.param + return mda.Universe(f) + else: + top, trj = request.param + return mda.Universe(top, trj) +# Define target functions here +# inside test functions doesn't work def cog(u, ag, frame_id): u.trajectory[frame_id] return ag.center_of_geometry() -def test_multiprocess_COM(u): +def getnames(u, ix): + # Check topology stuff works + return u.atoms[ix].name + + +def test_multiprocess_COG(u): ag = u.atoms[10:20] ref = np.array([cog(u, ag, i) for i in range(4)]) p = multiprocessing.Pool(2) - res = np.array([p.apply(cog, args=(u, ag, i)) for i in range(4)]) + p.close() + assert_equal(ref, res) + + +def test_multiprocess_names(u): + ref = [getnames(u, i) + for i in range(10)] + + p = multiprocessing.Pool(2) + res = [p.apply(getnames, args=(u, i)) + for i in range(10)] + p.close() assert_equal(ref, res) From f1a7d5aec969d30adc31b719d637b1bcfe386024 Mon Sep 17 00:00:00 2001 From: richardjgowers Date: Wed, 20 Feb 2019 16:33:55 -0600 Subject: [PATCH 08/21] pickling all readers now works... --- package/MDAnalysis/coordinates/DLPoly.py | 37 ++--- package/MDAnalysis/coordinates/GMS.py | 22 +-- package/MDAnalysis/coordinates/PDB.py | 12 +- package/MDAnalysis/coordinates/TRJ.py | 144 ++++++++++-------- package/MDAnalysis/coordinates/TRZ.py | 40 ++--- package/MDAnalysis/coordinates/XYZ.py | 20 +-- package/MDAnalysis/coordinates/base.py | 38 ++++- package/MDAnalysis/coordinates/chain.py | 12 +- .../parallelism/test_multiprocessing.py | 73 ++++++++- 9 files changed, 261 insertions(+), 137 deletions(-) diff --git a/package/MDAnalysis/coordinates/DLPoly.py b/package/MDAnalysis/coordinates/DLPoly.py index 5e322e4ffc1..7901d05f59c 100644 --- a/package/MDAnalysis/coordinates/DLPoly.py +++ b/package/MDAnalysis/coordinates/DLPoly.py @@ -37,6 +37,7 @@ from . import base from . import core +from ..lib import util _DLPOLY_UNITS = {'length': 'Angstrom', 'velocity': 'Angstrom/ps', 'time': 'ps'} @@ -141,7 +142,7 @@ def _read_first_frame(self): ts.frame = 0 -class HistoryReader(base.ReaderBase): +class HistoryReader(base.ReaderBase, base._AsciiPickle): """Reads DLPoly format HISTORY files .. versionadded:: 0.11.0 @@ -154,9 +155,9 @@ def __init__(self, filename, **kwargs): super(HistoryReader, self).__init__(filename, **kwargs) # "private" file handle - self._file = open(self.filename, 'r') - self.title = self._file.readline().strip() - self._levcfg, self._imcon, self.n_atoms = np.int64(self._file.readline().split()[:3]) + self._f = util.anyopen(self.filename, 'r') + self.title = self._f.readline().strip() + self._levcfg, self._imcon, self.n_atoms = np.int64(self._f.readline().split()[:3]) self._has_vels = True if self._levcfg > 0 else False self._has_forces = True if self._levcfg == 2 else False @@ -170,20 +171,20 @@ def _read_next_timestep(self, ts=None): if ts is None: ts = self.ts - line = self._file.readline() # timestep line + line = self._f.readline() # timestep line if not line.startswith('timestep'): raise IOError if not self._imcon == 0: - ts._unitcell[0] = self._file.readline().split() - ts._unitcell[1] = self._file.readline().split() - ts._unitcell[2] = self._file.readline().split() + ts._unitcell[0] = self._f.readline().split() + ts._unitcell[1] = self._f.readline().split() + ts._unitcell[2] = self._f.readline().split() # If ids are given, put them in here # and later sort by them ids = [] for i in range(self.n_atoms): - line = self._file.readline().strip() # atom info line + line = self._f.readline().strip() # atom info line try: idx = int(line.split()[1]) except IndexError: @@ -192,11 +193,11 @@ def _read_next_timestep(self, ts=None): ids.append(idx) # Read in this order for now, then later reorder in place - ts._pos[i] = self._file.readline().split() + ts._pos[i] = self._f.readline().split() if self._has_vels: - ts._velocities[i] = self._file.readline().split() + ts._velocities[i] = self._f.readline().split() if self._has_forces: - ts._forces[i] = self._file.readline().split() + ts._forces[i] = self._f.readline().split() if ids: ids = np.array(ids) @@ -214,7 +215,7 @@ def _read_next_timestep(self, ts=None): def _read_frame(self, frame): """frame is 0 based, error checking is done in base.getitem""" - self._file.seek(self._offsets[frame]) + self._f.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in read_next_frame return self._read_next_timestep() @@ -234,7 +235,7 @@ def _read_n_frames(self): """ offsets = self._offsets = [] - with open(self.filename, 'r') as f: + with util.anyopen(self.filename, 'r') as f: n_frames = 0 f.readline() @@ -262,10 +263,10 @@ def _read_n_frames(self): def _reopen(self): self.close() - self._file = open(self.filename, 'r') - self._file.readline() # header is 2 lines - self._file.readline() + self._f = util.anyopen(self.filename, 'r') + self._f.readline() # header is 2 lines + self._f.readline() self.ts.frame = -1 def close(self): - self._file.close() + self._f.close() diff --git a/package/MDAnalysis/coordinates/GMS.py b/package/MDAnalysis/coordinates/GMS.py index 46be3a34c09..36251bfa086 100644 --- a/package/MDAnalysis/coordinates/GMS.py +++ b/package/MDAnalysis/coordinates/GMS.py @@ -47,7 +47,7 @@ import MDAnalysis.lib.util as util -class GMSReader(base.ReaderBase): +class GMSReader(base.ReaderBase, base._AsciiPickle): """Reads from an GAMESS output file :Data: @@ -82,7 +82,7 @@ def __init__(self, outfilename, **kwargs): super(GMSReader, self).__init__(outfilename, **kwargs) # the filename has been parsed to be either b(g)zipped or not - self.outfile = util.anyopen(self.filename) + self._f = util.anyopen(self.filename) # note that, like for xtc and trr files, _n_atoms and _n_frames are used quasi-private variables # to prevent the properties being recalculated @@ -177,7 +177,7 @@ def _read_out_n_frames(self): return len(offsets) def _read_frame(self, frame): - self.outfile.seek(self._offsets[frame]) + self._f.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in _read_next return self._read_next_timestep() @@ -186,7 +186,7 @@ def _read_next_timestep(self, ts=None): if ts is None: ts = self.ts # check that the outfile object exists; if not reopen the trajectory - if self.outfile is None: + if self._f is None: self.open_trajectory() x = [] y = [] @@ -195,7 +195,7 @@ def _read_next_timestep(self, ts=None): flag = 0 counter = 0 - for line in self.outfile: + for line in self._f: if self.runtyp == 'optimize': if (flag == 0) and (re.match(r'^.NSERCH=.*', line) is not None): flag = 1 @@ -246,22 +246,22 @@ def _reopen(self): self.open_trajectory() def open_trajectory(self): - if self.outfile is not None: + if self._f is not None: raise IOError(errno.EALREADY, 'GMS file already opened', self.filename) if not os.path.exists(self.filename): # must check; otherwise might segmentation fault raise IOError(errno.ENOENT, 'GMS file not found', self.filename) - self.outfile = util.anyopen(self.filename) + self._f = util.anyopen(self.filename) # reset ts ts = self.ts ts.frame = -1 - return self.outfile + return self._f def close(self): """Close out trajectory file if it was open.""" - if self.outfile is None: + if self._f is None: return - self.outfile.close() - self.outfile = None + self._f.close() + self._f = None diff --git a/package/MDAnalysis/coordinates/PDB.py b/package/MDAnalysis/coordinates/PDB.py index 4506ebc7cce..534a4a189fd 100644 --- a/package/MDAnalysis/coordinates/PDB.py +++ b/package/MDAnalysis/coordinates/PDB.py @@ -168,7 +168,7 @@ # Pairs of residue name / atom name in use to deduce PDB formatted atom names Pair = collections.namedtuple('Atom', 'resname name') -class PDBReader(base.ReaderBase): +class PDBReader(base.ReaderBase, base._BAsciiPickle): """PDBReader that reads a `PDB-formatted`_ file, no frills. The following *PDB records* are parsed (see `PDB coordinate section`_ for @@ -277,7 +277,7 @@ def __init__(self, filename, **kwargs): if isinstance(filename, util.NamedStream) and isinstance(filename.stream, StringIO): filename.stream = BytesIO(filename.stream.getvalue().encode()) - pdbfile = self._pdbfile = util.anyopen(filename, 'rb') + pdbfile = self._f = util.anyopen(filename, 'rb') line = "magical" while line: @@ -345,7 +345,7 @@ def _reopen(self): # Pretend the current TS is -1 (in 0 based) so "next" is the # 0th frame self.close() - self._pdbfile = util.anyopen(self.filename, 'rb') + self._f = util.anyopen(self.filename, 'rb') self.ts.frame = -1 def _read_next_timestep(self, ts=None): @@ -371,8 +371,8 @@ def _read_frame(self, frame): occupancy = np.ones(self.n_atoms) # Seek to start and read until start of next frame - self._pdbfile.seek(start) - chunk = self._pdbfile.read(stop - start).decode() + self._f.seek(start) + chunk = self._f.read(stop - start).decode() tmp_buf = [] for line in chunk.splitlines(): @@ -411,7 +411,7 @@ def _read_frame(self, frame): return self.ts def close(self): - self._pdbfile.close() + self._f.close() class PDBWriter(base.WriterBase): diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 867d38ede20..8a16fcba713 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -188,7 +188,7 @@ class Timestep(base.Timestep): order = 'C' -class TRJReader(base.ReaderBase): +class TRJReader(base.ReaderBase, base._AsciiPickle): """AMBER trajectory reader. Reads the ASCII formatted `AMBER TRJ format`_. Periodic box information @@ -223,7 +223,7 @@ def __init__(self, filename, n_atoms=None, **kwargs): self._n_atoms = n_atoms self._n_frames = None - self.trjfile = None # have _read_next_timestep() open it properly! + self._f = None # have _read_next_timestep() open it properly! self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) # FORMAT(10F8.3) (X(i), Y(i), Z(i), i=1,NATOM) @@ -248,22 +248,22 @@ def __init__(self, filename, n_atoms=None, **kwargs): self._read_next_timestep() def _read_frame(self, frame): - if self.trjfile is None: + if self._f is None: self.open_trajectory() - self.trjfile.seek(self._offsets[frame]) + self._f.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in _read_next return self._read_next_timestep() def _read_next_timestep(self): # FORMAT(10F8.3) (X(i), Y(i), Z(i), i=1,NATOM) ts = self.ts - if self.trjfile is None: + if self._f is None: self.open_trajectory() # Read coordinat frame: # coordinates = numpy.zeros(3*self.n_atoms, dtype=np.float32) _coords = [] - for number, line in enumerate(self.trjfile): + for number, line in enumerate(self._f): try: _coords.extend(self.default_line_parser.read(line)) except ValueError: @@ -278,7 +278,7 @@ def _read_next_timestep(self): # Read box information if self.periodic: - line = next(self.trjfile) + line = next(self._f) box = self.box_line_parser.read(line) ts._unitcell[:3] = np.array(box, dtype=np.float32) ts._unitcell[3:] = [90., 90., 90.] # assumed @@ -325,7 +325,7 @@ def _detect_amber_box(self): self._read_next_timestep() ts = self.ts # TODO: what do we do with 1-frame trajectories? Try..except EOFError? - line = next(self.trjfile) + line = next(self._f) nentries = self.default_line_parser.number_of_matches(line) if nentries == 3: self.periodic = True @@ -376,8 +376,8 @@ def _reopen(self): def open_trajectory(self): """Open the trajectory for reading and load first frame.""" - self.trjfile = util.anyopen(self.filename) - self.header = self.trjfile.readline() # ignore first line + self._f = util.anyopen(self.filename) + self.header = self._f.readline() # ignore first line if len(self.header.rstrip()) > 80: # Chimera uses this check raise OSError( @@ -387,14 +387,14 @@ def open_trajectory(self): ts = self.ts ts.frame = -1 - return self.trjfile + return self._f def close(self): """Close trj trajectory file if it was open.""" - if self.trjfile is None: + if self._f is None: return - self.trjfile.close() - self.trjfile = None + self._f.close() + self._f = None class NCDFReader(base.ReaderBase): @@ -465,36 +465,28 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): super(NCDFReader, self).__init__(filename, **kwargs) - self.trjfile = scipy.io.netcdf.netcdf_file(self.filename, - mmap=self._mmap) + self._f = scipy.io.netcdf.netcdf_file(self.filename, + mmap=self._mmap) - if not ('AMBER' in self.trjfile.Conventions.decode('utf-8').split(',') or - 'AMBER' in self.trjfile.Conventions.decode('utf-8').split()): + if not ('AMBER' in self._f.Conventions.decode('utf-8').split(',') or + 'AMBER' in self._f.Conventions.decode('utf-8').split()): errmsg = ("NCDF trajectory {0} does not conform to AMBER " "specifications, http://ambermd.org/netcdf/nctraj.xhtml " "('AMBER' must be one of the tokens in attribute " "Conventions)".format(self.filename)) logger.fatal(errmsg) raise TypeError(errmsg) - if not self.trjfile.ConventionVersion.decode('utf-8') == self.version: + if not self._f.ConventionVersion.decode('utf-8') == self.version: wmsg = ("NCDF trajectory format is {0!s} but the reader " "implements format {1!s}".format( - self.trjfile.ConventionVersion, self.version)) + self._f.ConventionVersion, self.version)) warnings.warn(wmsg) logger.warning(wmsg) - self.n_atoms = self.trjfile.dimensions['atom'] - self.n_frames = self.trjfile.dimensions['frame'] - # example trajectory when read with scipy.io.netcdf has - # dimensions['frame'] == None (indicating a record dimension that can - # grow) whereas if read with netCDF4 I get len(dimensions['frame']) == - # 10: in any case, we need to get the number of frames from somewhere - # such as the time variable: - if self.n_frames is None: - self.n_frames = self.trjfile.variables['time'].shape[0] + self.n_atoms = self._f.dimensions['atom'] try: - self.remarks = self.trjfile.title + self.remarks = self._f.title except AttributeError: self.remarks = "" # other metadata (*= requd): @@ -505,27 +497,27 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): # checks for not-implemented features (other units would need to be # hacked into MDAnalysis.units) - if self.trjfile.variables['time'].units.decode('utf-8') != "picosecond": + if self._f.variables['time'].units.decode('utf-8') != "picosecond": raise NotImplementedError( "NETCDFReader currently assumes that the trajectory was written " "with a time unit of picoseconds and not {0}.".format( - self.trjfile.variables['time'].units)) - if self.trjfile.variables['coordinates'].units.decode('utf-8') != "angstrom": + self._f.variables['time'].units)) + if self._f.variables['coordinates'].units.decode('utf-8') != "angstrom": raise NotImplementedError( "NETCDFReader currently assumes that the trajectory was written " "with a length unit of Angstroem and not {0}.".format( - self.trjfile.variables['coordinates'].units)) - if hasattr(self.trjfile.variables['coordinates'], 'scale_factor'): + self._f.variables['coordinates'].units)) + if hasattr(self._f.variables['coordinates'], 'scale_factor'): raise NotImplementedError("scale_factors are not implemented") if n_atoms is not None and n_atoms != self.n_atoms: raise ValueError("Supplied n_atoms ({0}) != natom from ncdf ({1}). " "Note: n_atoms can be None and then the ncdf value " "is used!".format(n_atoms, self.n_atoms)) - self.has_velocities = 'velocities' in self.trjfile.variables - self.has_forces = 'forces' in self.trjfile.variables + self.has_velocities = 'velocities' in self._f.variables + self.has_forces = 'forces' in self._f.variables - self.periodic = 'cell_lengths' in self.trjfile.variables + self.periodic = 'cell_lengths' in self._f.variables self._current_frame = 0 self.ts = self._Timestep(self.n_atoms, @@ -537,6 +529,30 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs): # load first data frame self._read_frame(0) + def __getstate__(self): + state = self.__dict__.copy() + del state['_f'] + + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self._f = scipy.io.netcdf.netcdf_file(self.filename, + mmap=self._mmap) + + @property + def n_frames(self): + n_frames = self._f.dimensions['frame'] + # example trajectory when read with scipy.io.netcdf has + # dimensions['frame'] == None (indicating a record dimension that can + # grow) whereas if read with netCDF4 I get len(dimensions['frame']) == + # 10: in any case, we need to get the number of frames from somewhere + # such as the time variable: + if n_frames is None: + n_frames = self._f.variables['time'].shape[0] + + return n_frames + @staticmethod def parse_n_atoms(filename, **kwargs): with scipy.io.netcdf.netcdf_file(filename, mmap=None) as f: @@ -546,7 +562,7 @@ def parse_n_atoms(filename, **kwargs): def _read_frame(self, frame): ts = self.ts - if self.trjfile is None: + if self._f is None: raise IOError("Trajectory is closed") if np.dtype(type(frame)) != np.dtype(int): # convention... for netcdf could also be a slice @@ -554,16 +570,16 @@ def _read_frame(self, frame): if frame >= self.n_frames or frame < 0: raise IndexError("frame index must be 0 <= frame < {0}".format( self.n_frames)) - # note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3) - ts._pos[:] = self.trjfile.variables['coordinates'][frame] - ts.time = self.trjfile.variables['time'][frame] + # note: self._f.variables['coordinates'].shape == (frames, n_atoms, 3) + ts._pos[:] = self._f.variables['coordinates'][frame] + ts.time = self._f.variables['time'][frame] if self.has_velocities: - ts._velocities[:] = self.trjfile.variables['velocities'][frame] + ts._velocities[:] = self._f.variables['velocities'][frame] if self.has_forces: - ts._forces[:] = self.trjfile.variables['forces'][frame] + ts._forces[:] = self._f.variables['forces'][frame] if self.periodic: - ts._unitcell[:3] = self.trjfile.variables['cell_lengths'][frame] - ts._unitcell[3:] = self.trjfile.variables['cell_angles'][frame] + ts._unitcell[:3] = self._f.variables['cell_lengths'][frame] + ts._unitcell[3:] = self._f.variables['cell_angles'][frame] if self.convert_units: self.convert_pos_from_native(ts._pos) # in-place ! self.convert_time_from_native( @@ -592,8 +608,8 @@ def _read_next_timestep(self, ts=None): raise IOError def _get_dt(self): - t1 = self.trjfile.variables['time'][1] - t0 = self.trjfile.variables['time'][0] + t1 = self._f.variables['time'][1] + t0 = self._f.variables['time'][0] return t1 - t0 def close(self): @@ -605,9 +621,9 @@ def close(self): before the file can be closed. """ - if self.trjfile is not None: - self.trjfile.close() - self.trjfile = None + if self._f is not None: + self._f.close() + self._f = None def Writer(self, filename, **kwargs): """Returns a NCDFWriter for `filename` with the same parameters as this NCDF. @@ -763,7 +779,7 @@ def __init__(self, self.ts = None # when/why would this be assigned?? self._first_frame = True # signals to open trajectory - self.trjfile = None # open on first write with _init_netcdf() + self._f = None # open on first write with _init_netcdf() self.periodic = None # detect on first write self.has_velocities = kwargs.get('velocities', False) self.has_forces = kwargs.get('forces', False) @@ -862,7 +878,7 @@ def _init_netcdf(self, periodic=True): ncfile.sync() self._first_frame = False - self.trjfile = ncfile + self._f = ncfile def is_periodic(self, ts=None): """Test if `Timestep` contains a periodic trajectory. @@ -902,7 +918,7 @@ def write_next_timestep(self, ts=None): raise IOError( "NCDFWriter: Timestep does not have the correct number of atoms") - if self.trjfile is None: + if self._f is None: # first time step: analyze data and open trajectory accordingly self._init_netcdf(periodic=self.is_periodic(ts)) @@ -938,12 +954,12 @@ def _write_next_timestep(self, ts): unitcell = self.convert_dimensions_to_unitcell(ts) # write step - self.trjfile.variables['coordinates'][self.curr_frame, :, :] = pos - self.trjfile.variables['time'][self.curr_frame] = time + self._f.variables['coordinates'][self.curr_frame, :, :] = pos + self._f.variables['time'][self.curr_frame] = time if self.periodic: - self.trjfile.variables['cell_lengths'][ + self._f.variables['cell_lengths'][ self.curr_frame, :] = unitcell[:3] - self.trjfile.variables['cell_angles'][ + self._f.variables['cell_angles'][ self.curr_frame, :] = unitcell[3:] if self.has_velocities: @@ -951,19 +967,19 @@ def _write_next_timestep(self, ts): if self.convert_units: velocities = self.convert_velocities_to_native( velocities, inplace=False) - self.trjfile.variables['velocities'][self.curr_frame, :, :] = velocities + self._f.variables['velocities'][self.curr_frame, :, :] = velocities if self.has_forces: forces = ts._forces if self.convert_units: forces = self.convert_forces_to_native( forces, inplace=False) - self.trjfile.variables['forces'][self.curr_frame, :, :] = forces + self._f.variables['forces'][self.curr_frame, :, :] = forces - self.trjfile.sync() + self._f.sync() self.curr_frame += 1 def close(self): - if self.trjfile is not None: - self.trjfile.close() - self.trjfile = None + if self._f is not None: + self._f.close() + self._f = None diff --git a/package/MDAnalysis/coordinates/TRZ.py b/package/MDAnalysis/coordinates/TRZ.py index a469ee09aba..80123e41d45 100644 --- a/package/MDAnalysis/coordinates/TRZ.py +++ b/package/MDAnalysis/coordinates/TRZ.py @@ -126,7 +126,7 @@ def dimensions(self, box): self._unitcell[:] = triclinic_vectors(box).reshape(9) -class TRZReader(base.ReaderBase): +class TRZReader(base.ReaderBase, base._BAsciiPickle): """Reads an IBIsCO or YASP trajectory file Attributes @@ -170,7 +170,7 @@ def __init__(self, trzfilename, n_atoms=None, **kwargs): if n_atoms is None: raise ValueError('TRZReader requires the n_atoms keyword') - self.trzfile = util.anyopen(self.filename, 'rb') + self._f = util.anyopen(self.filename, 'rb') self._cache = dict() self._n_atoms = n_atoms @@ -234,7 +234,7 @@ def _read_trz_header(self): ('p2', '<2i4'), ('force', ' Date: Wed, 20 May 2020 10:42:54 +0200 Subject: [PATCH 09/21] start of gsoc 2020 project, serialize --- README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000000..08dafd70628 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +**This is the branch for GSoC 2020 project, serialization of universes** From 59f55e8ee4292e4b9a55f83a7db90331248bb482 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Tue, 26 May 2020 17:15:29 +0200 Subject: [PATCH 10/21] fix depreciate core.flag --- package/MDAnalysis/coordinates/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index 3fb7241ffe8..77fb0ee1e12 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -218,7 +218,6 @@ from .. import units from ..auxiliary.base import AuxReader from ..auxiliary.core import auxreader -from ..core import flags from ..lib.util import asiterable, Namespace, anyopen From dce2dd4308b0b111dfeb1bd390dfc6861bed71f0 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Wed, 27 May 2020 10:47:51 +0200 Subject: [PATCH 11/21] fix netcdf trjfile --- package/MDAnalysis/coordinates/TRJ.py | 24 +++++++++---------- .../coordinates/test_netcdf.py | 6 ++--- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/coordinates/TRJ.py b/package/MDAnalysis/coordinates/TRJ.py index 814cfc6c9aa..7a12c2a9f11 100644 --- a/package/MDAnalysis/coordinates/TRJ.py +++ b/package/MDAnalysis/coordinates/TRJ.py @@ -639,18 +639,18 @@ def __setstate__(self, state): self._f = scipy.io.netcdf.netcdf_file(self.filename, mmap=self._mmap) - @property - def n_frames(self): - n_frames = self._f.dimensions['frame'] - # example trajectory when read with scipy.io.netcdf has - # dimensions['frame'] == None (indicating a record dimension that can - # grow) whereas if read with netCDF4 I get len(dimensions['frame']) == - # 10: in any case, we need to get the number of frames from somewhere - # such as the time variable: - if n_frames is None: - n_frames = self._f.variables['time'].shape[0] - - return n_frames +# @property +# def n_frames(self): +# n_frames = self._f.dimensions['frame'] +# # example trajectory when read with scipy.io.netcdf has +# # dimensions['frame'] == None (indicating a record dimension that can +# # grow) whereas if read with netCDF4 I get len(dimensions['frame']) == +# # 10: in any case, we need to get the number of frames from somewhere +# # such as the time variable: +# if n_frames is None: +# n_frames = self._f.variables['time'].shape[0] +# +# return n_frames @staticmethod def _verify_units(eval_unit, expected_units): diff --git a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py index de040e9a092..48e78ef9a1b 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_netcdf.py +++ b/testsuite/MDAnalysisTests/coordinates/test_netcdf.py @@ -57,7 +57,7 @@ def test_slice_iteration(self, universe): err_msg="slicing did not produce the expected frames") def test_metadata(self, universe): - data = universe.trajectory.trjfile + data = universe.trajectory._f assert_equal(data.Conventions.decode('utf-8'), 'AMBER') assert_equal(data.ConventionVersion.decode('utf-8'), '1.0') @@ -687,8 +687,8 @@ def _check_new_traj(self, universe, outfile): self.prec, err_msg="unitcells are not identical") # check that the NCDF data structures are the same - nc_orig = universe.trajectory.trjfile - nc_copy = uw.trajectory.trjfile + nc_orig = universe.trajectory._f + nc_copy = uw.trajectory._f # note that here 'dimensions' is a specific netcdf data structure and # not the unit cell dimensions in MDAnalysis From c730e798b5db3314c567d081676b0d3dcc803d75 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Wed, 27 May 2020 19:45:08 +0200 Subject: [PATCH 12/21] fix txyz trjfile --- package/MDAnalysis/coordinates/TXYZ.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/coordinates/TXYZ.py b/package/MDAnalysis/coordinates/TXYZ.py index 26cfce894a9..ad15093b992 100644 --- a/package/MDAnalysis/coordinates/TXYZ.py +++ b/package/MDAnalysis/coordinates/TXYZ.py @@ -58,7 +58,7 @@ from ..lib.util import openany, cached -class TXYZReader(base.ReaderBase): +class TXYZReader(base.ReaderBase, base._AsciiPickle): """Reads from a TXYZ file""" @@ -74,7 +74,7 @@ def __init__(self, filename, **kwargs): # coordinates::core.py so the last file extension will tell us if it is # bzipped or not root, ext = os.path.splitext(self.filename) - self.xyzfile = util.anyopen(self.filename) + self._f = util.anyopen(self.filename) self._cache = dict() # Check if file has box information saved with util.openany(self.filename) as inp: @@ -131,7 +131,7 @@ def _read_xyz_n_frames(self): return n_frames def _read_frame(self, frame): - self.xyzfile.seek(self._offsets[frame]) + self._f.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in next return self._read_next_timestep() @@ -140,7 +140,7 @@ def _read_next_timestep(self, ts=None): if ts is None: ts = self.ts - f = self.xyzfile + f = self._f try: # we assume that there is only one header line per frame @@ -162,21 +162,21 @@ def _reopen(self): self.open_trajectory() def open_trajectory(self): - if self.xyzfile is not None: + if self._f is not None: raise IOError( errno.EALREADY, 'TXYZ file already opened', self.filename) - self.xyzfile = util.anyopen(self.filename) + self._f = util.anyopen(self.filename) # reset ts ts = self.ts ts.frame = -1 - return self.xyzfile + return self._f def close(self): """Close arc trajectory file if it was open.""" - if self.xyzfile is None: + if self._f is None: return - self.xyzfile.close() - self.xyzfile = None + self._f.close() + self._f = None From 598a671d737e5cc5a2d368d4497152ccce014fea Mon Sep 17 00:00:00 2001 From: yuxuan Date: Fri, 29 May 2020 20:29:55 +0200 Subject: [PATCH 13/21] rm README.md --- README.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 README.md diff --git a/README.md b/README.md deleted file mode 100644 index 08dafd70628..00000000000 --- a/README.md +++ /dev/null @@ -1 +0,0 @@ -**This is the branch for GSoC 2020 project, serialization of universes** From 2c2883a7532a5f86851a8347fd1dd94a5c7e01a3 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Mon, 1 Jun 2020 09:51:58 +0200 Subject: [PATCH 14/21] add absolute_import --- .../parallelism/test_multiprocessing.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index b88cfad1acf..8ccd49445bd 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -1,6 +1,28 @@ -"""Test that MDAnalysis plays nicely with multiprocessing +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +from __future__ import absolute_import -""" import multiprocessing import numpy as np import pytest From 7f1f4a12e3581a7f569a5bf36d40c0cf116ca33a Mon Sep 17 00:00:00 2001 From: yuxuan Date: Tue, 2 Jun 2020 11:19:19 +0200 Subject: [PATCH 15/21] add txyz, lammpsdump formats to test --- testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index 8ccd49445bd..e5bee48981a 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -41,6 +41,7 @@ GMS_ASYMOPT, GRO, LAMMPSdata_mini, + LAMMPSDUMP, mol2_molecules, MMTF, NCDF, @@ -50,6 +51,7 @@ TRR, TRJ, TRZ, + TXYZ, XTC, XPDB_small, XYZ_mini, XYZ, @@ -118,6 +120,7 @@ def test_multiprocess_names(u): ('CONFIG', DLP_CONFIG, dict()), ('HISTORY', DLP_HISTORY, dict()), ('INPCRD', INPCRD, dict()), + ('LAMMPSDUMP', LAMMPSDUMP, dict()), ('GMS', GMS_ASYMOPT, dict()), ('GRO', GRO, dict()), ('MMTF', MMTF, dict()), @@ -132,6 +135,7 @@ def test_multiprocess_names(u): ('XPDB', XPDB_small, dict()), ('XYZ', XYZ_mini, dict()), ('NCDF', NCDF, dict()), + ('TXYZ', TXYZ, dict()), ('memory', np.arange(60).reshape(2, 10, 3).astype(np.float64), dict()), ('CHAIN', [GRO, GRO, GRO], dict()), ]) From 33bf8884caf00d91858e1e9129cecfbf42e18b42 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Tue, 2 Jun 2020 11:29:40 +0200 Subject: [PATCH 16/21] add lammpsdump support for pickle --- package/MDAnalysis/coordinates/LAMMPS.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/package/MDAnalysis/coordinates/LAMMPS.py b/package/MDAnalysis/coordinates/LAMMPS.py index 7dd70a26a94..cbea8e6e44c 100644 --- a/package/MDAnalysis/coordinates/LAMMPS.py +++ b/package/MDAnalysis/coordinates/LAMMPS.py @@ -454,7 +454,7 @@ def write(self, selection, frame=None): self._write_velocities(atoms) -class DumpReader(base.ReaderBase): +class DumpReader(base.ReaderBase, base._BAsciiPickle): """Reads the default `LAMMPS dump format`_ Expects trajectories produced by the default 'atom' style dump. @@ -478,7 +478,7 @@ def __init__(self, filename, **kwargs): def _reopen(self): self.close() - self._file = util.anyopen(self.filename) + self._f = util.anyopen(self.filename, 'rb') self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) self.ts.frame = -1 @@ -510,17 +510,17 @@ def n_frames(self): return len(self._offsets) def close(self): - if hasattr(self, '_file'): - self._file.close() + if hasattr(self, '_f'): + self._f.close() def _read_frame(self, frame): - self._file.seek(self._offsets[frame]) + self._f.seek(self._offsets[frame]) self.ts.frame = frame - 1 # gets +1'd in next return self._read_next_timestep() def _read_next_timestep(self): - f = self._file + f = self._f ts = self.ts ts.frame += 1 if ts.frame >= len(self): From 90d8f78c42cf0d39fb06c816695ff14a5efa51e9 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Tue, 2 Jun 2020 11:34:41 +0200 Subject: [PATCH 17/21] add pickle test for gsd --- testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index e5bee48981a..0c83f9b4345 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -40,6 +40,7 @@ INPCRD, GMS_ASYMOPT, GRO, + GSD, LAMMPSdata_mini, LAMMPSDUMP, mol2_molecules, @@ -123,6 +124,7 @@ def test_multiprocess_names(u): ('LAMMPSDUMP', LAMMPSDUMP, dict()), ('GMS', GMS_ASYMOPT, dict()), ('GRO', GRO, dict()), + ('GSD', GSD, dict()), ('MMTF', MMTF, dict()), ('MOL2', mol2_molecules, dict()), ('PDB', PDB_small, dict()), From 31e08a4db7e5548717285ea281256ecd6a612122 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Tue, 2 Jun 2020 16:11:32 +0200 Subject: [PATCH 18/21] need gsd>2.1.1 for pickle --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d526eb12d93..5ee4bb89ad2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,7 @@ env: - MAIN_CMD="pytest ${PYTEST_LIST}" - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" + - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd>=2.1.1 codecov" - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True From 5bbc9b26e8916be512eb52df50229b7bf42c2306 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Wed, 3 Jun 2020 11:08:31 +0200 Subject: [PATCH 19/21] rely on GSDReader for pickling instead, gsd>2 dependency removed --- .travis.yml | 2 +- package/MDAnalysis/coordinates/GSD.py | 26 +++++++++++++++++++------- package/MDAnalysis/coordinates/base.py | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5ee4bb89ad2..d526eb12d93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,7 +27,7 @@ env: - MAIN_CMD="pytest ${PYTEST_LIST}" - SETUP_CMD="${PYTEST_FLAGS}" - BUILD_CMD="pip install -e package/ && (cd testsuite/ && python setup.py build)" - - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd>=2.1.1 codecov" + - CONDA_MIN_DEPENDENCIES="mmtf-python mock six biopython networkx cython matplotlib scipy griddataformats hypothesis gsd codecov" - CONDA_DEPENDENCIES="${CONDA_MIN_DEPENDENCIES} seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0" - CONDA_CHANNELS='biobuilds conda-forge' - CONDA_CHANNEL_PRIORITY=True diff --git a/package/MDAnalysis/coordinates/GSD.py b/package/MDAnalysis/coordinates/GSD.py index e5968e32d34..32178e0cf2b 100644 --- a/package/MDAnalysis/coordinates/GSD.py +++ b/package/MDAnalysis/coordinates/GSD.py @@ -54,7 +54,7 @@ from . import base -class GSDReader(base.ReaderBase): +class GSDReader(base.ReaderBase, base._ExAsciiPickle): """Reader for the GSD format. """ @@ -76,23 +76,27 @@ def __init__(self, filename, **kwargs): super(GSDReader, self).__init__(filename, **kwargs) self.filename = filename self.open_trajectory() - self.n_atoms = self._file[0].particles.N + self.n_atoms = self._f[0].particles.N self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs) self._read_next_timestep() - def open_trajectory(self) : + def open_trajectory(self): """opens the trajectory file using gsd.hoomd module""" self._frame = -1 - self._file = gsd.hoomd.open(self.filename,mode='rb') + self._f = gsd.hoomd.open(self.filename,mode='rb') + + def open_trajectory_for_pickle(self): + """opens the trajectory file while not reset frame""" + self._f = gsd.hoomd.open(self.filename, mode='rb') def close(self): """close reader""" - self._file.file.close() + self._f.file.close() @property def n_frames(self): """number of frames in trajectory""" - return len(self._file) + return len(self._f) def _reopen(self): """reopen trajectory""" @@ -101,7 +105,7 @@ def _reopen(self): def _read_frame(self, frame): try : - myframe = self._file[frame] + myframe = self._f[frame] except IndexError: raise_from(IOError, None) @@ -131,3 +135,11 @@ def _read_frame(self, frame): def _read_next_timestep(self) : """read next frame in trajectory""" return self._read_frame(self._frame + 1) + +# def __getstate__(self): +# """Implement the pickle protocol.""" +# return dict(name=self.filename) +# +# def __setstate__(self, state): +# """Implement the pickle protocol.""" +# self.__init__(state['name']) diff --git a/package/MDAnalysis/coordinates/base.py b/package/MDAnalysis/coordinates/base.py index 77fb0ee1e12..d5e384b364e 100644 --- a/package/MDAnalysis/coordinates/base.py +++ b/package/MDAnalysis/coordinates/base.py @@ -2106,6 +2106,27 @@ def __setstate__(self, state): del self._pickle_fn +class _ExAsciiPickle(object): + # For external file reader, e.g. GSD + def __getstate__(self): + # Shallow copy of state of self + # shallow ie don't recursively copy all objects, + # just copy the references that __dict__ holds + stuff = self.__dict__.copy() + # don't pass the file handle over + del stuff['_f'] + # instead pass enough metadata to reconstruct + stuff['_pickle_fn'] = self.filename + # TODO: what other state does Reader hold? + # TODO: reconstruct file handle position + return stuff + + def __setstate__(self, state): + self.__dict__.update(state) + self.open_trajectory_for_pickle() + del self._pickle_fn + + class ReaderBase(ProtoReader): """Base class for trajectory readers that extends :class:`ProtoReader` with a :meth:`__del__` method. From e9121df8f7ab57ed0de68d48a1e7fdd8e3afb466 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Wed, 3 Jun 2020 11:16:50 +0200 Subject: [PATCH 20/21] del commented func --- package/MDAnalysis/coordinates/GSD.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/package/MDAnalysis/coordinates/GSD.py b/package/MDAnalysis/coordinates/GSD.py index 32178e0cf2b..50cbd9334d0 100644 --- a/package/MDAnalysis/coordinates/GSD.py +++ b/package/MDAnalysis/coordinates/GSD.py @@ -136,10 +136,3 @@ def _read_next_timestep(self) : """read next frame in trajectory""" return self._read_frame(self._frame + 1) -# def __getstate__(self): -# """Implement the pickle protocol.""" -# return dict(name=self.filename) -# -# def __setstate__(self, state): -# """Implement the pickle protocol.""" -# self.__init__(state['name']) From 864d733a434690ffe4a85c78a7b2a8d27aaec875 Mon Sep 17 00:00:00 2001 From: yuxuan Date: Mon, 8 Jun 2020 23:09:10 +0200 Subject: [PATCH 21/21] add xfail to py2 serilization --- testsuite/MDAnalysisTests/core/test_universe.py | 7 +++---- .../MDAnalysisTests/parallelism/test_multiprocessing.py | 6 +++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/testsuite/MDAnalysisTests/core/test_universe.py b/testsuite/MDAnalysisTests/core/test_universe.py index a312aba13fc..f2eea1a08e8 100644 --- a/testsuite/MDAnalysisTests/core/test_universe.py +++ b/testsuite/MDAnalysisTests/core/test_universe.py @@ -27,7 +27,7 @@ import os import subprocess - +import sys try: from cStringIO import StringIO except: @@ -272,13 +272,12 @@ def test_load_multiple_args(self): assert_equal(len(u.atoms), 3341, "Loading universe failed somehow") assert_equal(u.trajectory.n_frames, 2 * ref.trajectory.n_frames) + @pytest.mark.xfail(sys.version_info < (3, 0), reason="pickle function not \ + working in python 2") def test_pickle(self): u = mda.Universe(PSF, DCD) - s = cPickle.dumps(u, protocol = cPickle.HIGHEST_PROTOCOL) - new_u = cPickle.loads(s) - assert_equal(u.atoms.names, new_u.atoms.names) @pytest.mark.parametrize('dtype', (int, np.float32, np.float64)) diff --git a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py index 0c83f9b4345..6b7fb117843 100644 --- a/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py +++ b/testsuite/MDAnalysisTests/parallelism/test_multiprocessing.py @@ -22,7 +22,7 @@ # from __future__ import absolute_import - +import sys import multiprocessing import numpy as np import pytest @@ -88,6 +88,8 @@ def getnames(u, ix): return u.atoms[ix].name +@pytest.mark.xfail(sys.version_info < (3, 0), reason="pickle function not \ + working in python 2") def test_multiprocess_COG(u): ag = u.atoms[10:20] @@ -101,6 +103,8 @@ def test_multiprocess_COG(u): assert_equal(ref, res) +@pytest.mark.xfail(sys.version_info <= (3, 0), reason="pickle function not \ + working in python 2") def test_multiprocess_names(u): ref = [getnames(u, i) for i in range(10)]