Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updating Serialization Functionality from PR #2140 #2704

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
a00840e
added experimental pickling support to Universe
richardjgowers Nov 15, 2018
133072f
fixed pickle test
richardjgowers Nov 17, 2018
aa595f5
wip of reader pickling
richardjgowers Nov 21, 2018
ba1a138
simplified serialisation support more
richardjgowers Feb 16, 2019
caa588d
make AuxReader not seralise (until tested)
richardjgowers Feb 16, 2019
e21361d
add tests for multiprocessing
richardjgowers Feb 16, 2019
99bdd08
added more formats to multiprocessing tests
richardjgowers Feb 16, 2019
f1a7d5a
pickling all readers now works...
richardjgowers Feb 20, 2019
931d9b5
Merge pull request #1 from MDAnalysis/develop
yuxuanzhuang May 20, 2020
7126473
start of gsoc 2020 project, serialize
yuxuanzhuang May 20, 2020
0c528c7
merge serialise(#PR2140)
yuxuanzhuang May 26, 2020
59f55e8
fix depreciate core.flag
yuxuanzhuang May 26, 2020
dce2dd4
fix netcdf trjfile
yuxuanzhuang May 27, 2020
c730e79
fix txyz trjfile
yuxuanzhuang May 27, 2020
598a671
rm README.md
yuxuanzhuang May 29, 2020
2c2883a
add absolute_import
yuxuanzhuang Jun 1, 2020
7f1f4a1
add txyz, lammpsdump formats to test
yuxuanzhuang Jun 2, 2020
33bf888
add lammpsdump support for pickle
yuxuanzhuang Jun 2, 2020
90d8f78
add pickle test for gsd
yuxuanzhuang Jun 2, 2020
31e08a4
need gsd>2.1.1 for pickle
yuxuanzhuang Jun 2, 2020
5bbc9b2
rely on GSDReader for pickling instead, gsd>2 dependency removed
yuxuanzhuang Jun 3, 2020
e9121df
del commented func
yuxuanzhuang Jun 3, 2020
91b28f4
merge with develop
yuxuanzhuang Jun 8, 2020
864d733
add xfail to py2 serilization
yuxuanzhuang Jun 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions package/MDAnalysis/auxiliary/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,10 @@ def __init__(self, represent_ts_as='closest', auxname=None, cutoff=-1,
self.auxstep._dt = self.time - self.initial_time
self.rewind()

def __getstate__(self):
# probably works fine, but someone needs to write tests to confirm
return NotImplementedError

def copy(self):
raise NotImplementedError("Copy not implemented for AuxReader")

Expand Down
37 changes: 19 additions & 18 deletions package/MDAnalysis/coordinates/DLPoly.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@

from . import base
from . import core
from ..lib import util

_DLPOLY_UNITS = {'length': 'Angstrom', 'velocity': 'Angstrom/ps', 'time': 'ps'}

Expand Down Expand Up @@ -141,7 +142,7 @@ def _read_first_frame(self):
ts.frame = 0


class HistoryReader(base.ReaderBase):
class HistoryReader(base.ReaderBase, base._AsciiPickle):
"""Reads DLPoly format HISTORY files

.. versionadded:: 0.11.0
Expand All @@ -154,9 +155,9 @@ def __init__(self, filename, **kwargs):
super(HistoryReader, self).__init__(filename, **kwargs)

# "private" file handle
self._file = open(self.filename, 'r')
self.title = self._file.readline().strip()
self._levcfg, self._imcon, self.n_atoms = np.int64(self._file.readline().split()[:3])
self._f = util.anyopen(self.filename, 'r')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'll probably want to document somewhere (in the AnalysisBase docstring at the very least?) that we expect files to be stored under self._f for the readers / writers.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's important, make the attribute name more expressive, e.g., _file or _stream or _open_stream. Readability is important!

self.title = self._f.readline().strip()
self._levcfg, self._imcon, self.n_atoms = np.int64(self._f.readline().split()[:3])
self._has_vels = True if self._levcfg > 0 else False
self._has_forces = True if self._levcfg == 2 else False

Expand All @@ -170,20 +171,20 @@ def _read_next_timestep(self, ts=None):
if ts is None:
ts = self.ts

line = self._file.readline() # timestep line
line = self._f.readline() # timestep line
if not line.startswith('timestep'):
raise IOError
if not self._imcon == 0:
ts._unitcell[0] = self._file.readline().split()
ts._unitcell[1] = self._file.readline().split()
ts._unitcell[2] = self._file.readline().split()
ts._unitcell[0] = self._f.readline().split()
ts._unitcell[1] = self._f.readline().split()
ts._unitcell[2] = self._f.readline().split()

# If ids are given, put them in here
# and later sort by them
ids = []

for i in range(self.n_atoms):
line = self._file.readline().strip() # atom info line
line = self._f.readline().strip() # atom info line
try:
idx = int(line.split()[1])
except IndexError:
Expand All @@ -192,11 +193,11 @@ def _read_next_timestep(self, ts=None):
ids.append(idx)

# Read in this order for now, then later reorder in place
ts._pos[i] = self._file.readline().split()
ts._pos[i] = self._f.readline().split()
if self._has_vels:
ts._velocities[i] = self._file.readline().split()
ts._velocities[i] = self._f.readline().split()
if self._has_forces:
ts._forces[i] = self._file.readline().split()
ts._forces[i] = self._f.readline().split()

if ids:
ids = np.array(ids)
Expand All @@ -214,7 +215,7 @@ def _read_next_timestep(self, ts=None):

def _read_frame(self, frame):
"""frame is 0 based, error checking is done in base.getitem"""
self._file.seek(self._offsets[frame])
self._f.seek(self._offsets[frame])
self.ts.frame = frame - 1 # gets +1'd in read_next_frame
return self._read_next_timestep()

Expand All @@ -234,7 +235,7 @@ def _read_n_frames(self):
"""
offsets = self._offsets = []

with open(self.filename, 'r') as f:
with util.anyopen(self.filename, 'r') as f:
n_frames = 0

f.readline()
Expand Down Expand Up @@ -262,10 +263,10 @@ def _read_n_frames(self):

def _reopen(self):
self.close()
self._file = open(self.filename, 'r')
self._file.readline() # header is 2 lines
self._file.readline()
self._f = util.anyopen(self.filename, 'r')
self._f.readline() # header is 2 lines
self._f.readline()
self.ts.frame = -1

def close(self):
self._file.close()
self._f.close()
22 changes: 11 additions & 11 deletions package/MDAnalysis/coordinates/GMS.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
import MDAnalysis.lib.util as util


class GMSReader(base.ReaderBase):
class GMSReader(base.ReaderBase, base._AsciiPickle):
"""Reads from an GAMESS output file

:Data:
Expand Down Expand Up @@ -82,7 +82,7 @@ def __init__(self, outfilename, **kwargs):
super(GMSReader, self).__init__(outfilename, **kwargs)

# the filename has been parsed to be either b(g)zipped or not
self.outfile = util.anyopen(self.filename)
self._f = util.anyopen(self.filename)

# note that, like for xtc and trr files, _n_atoms and _n_frames are used quasi-private variables
# to prevent the properties being recalculated
Expand Down Expand Up @@ -177,7 +177,7 @@ def _read_out_n_frames(self):
return len(offsets)

def _read_frame(self, frame):
self.outfile.seek(self._offsets[frame])
self._f.seek(self._offsets[frame])
self.ts.frame = frame - 1 # gets +1'd in _read_next
return self._read_next_timestep()

Expand All @@ -186,7 +186,7 @@ def _read_next_timestep(self, ts=None):
if ts is None:
ts = self.ts
# check that the outfile object exists; if not reopen the trajectory
if self.outfile is None:
if self._f is None:
self.open_trajectory()
x = []
y = []
Expand All @@ -195,7 +195,7 @@ def _read_next_timestep(self, ts=None):
flag = 0
counter = 0

for line in self.outfile:
for line in self._f:
if self.runtyp == 'optimize':
if (flag == 0) and (re.match(r'^.NSERCH=.*', line) is not None):
flag = 1
Expand Down Expand Up @@ -246,22 +246,22 @@ def _reopen(self):
self.open_trajectory()

def open_trajectory(self):
if self.outfile is not None:
if self._f is not None:
raise IOError(errno.EALREADY, 'GMS file already opened', self.filename)
if not os.path.exists(self.filename):
# must check; otherwise might segmentation fault
raise IOError(errno.ENOENT, 'GMS file not found', self.filename)

self.outfile = util.anyopen(self.filename)
self._f = util.anyopen(self.filename)

# reset ts
ts = self.ts
ts.frame = -1
return self.outfile
return self._f

def close(self):
"""Close out trajectory file if it was open."""
if self.outfile is None:
if self._f is None:
return
self.outfile.close()
self.outfile = None
self._f.close()
self._f = None
19 changes: 12 additions & 7 deletions package/MDAnalysis/coordinates/GSD.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

from . import base

class GSDReader(base.ReaderBase):
class GSDReader(base.ReaderBase, base._ExAsciiPickle):
"""Reader for the GSD format.

"""
Expand All @@ -76,23 +76,27 @@ def __init__(self, filename, **kwargs):
super(GSDReader, self).__init__(filename, **kwargs)
self.filename = filename
self.open_trajectory()
self.n_atoms = self._file[0].particles.N
self.n_atoms = self._f[0].particles.N
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
self._read_next_timestep()

def open_trajectory(self) :
def open_trajectory(self):
"""opens the trajectory file using gsd.hoomd module"""
self._frame = -1
self._file = gsd.hoomd.open(self.filename,mode='rb')
self._f = gsd.hoomd.open(self.filename,mode='rb')

def open_trajectory_for_pickle(self):
"""opens the trajectory file while not reset frame"""
self._f = gsd.hoomd.open(self.filename, mode='rb')

def close(self):
"""close reader"""
self._file.file.close()
self._f.file.close()

@property
def n_frames(self):
"""number of frames in trajectory"""
return len(self._file)
return len(self._f)

def _reopen(self):
"""reopen trajectory"""
Expand All @@ -101,7 +105,7 @@ def _reopen(self):

def _read_frame(self, frame):
try :
myframe = self._file[frame]
myframe = self._f[frame]
except IndexError:
raise_from(IOError, None)

Expand Down Expand Up @@ -131,3 +135,4 @@ def _read_frame(self, frame):
def _read_next_timestep(self) :
"""read next frame in trajectory"""
return self._read_frame(self._frame + 1)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PEP8 don't need a blank line on the last line

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@IAlibay do we have a PEP8 checker running? If not, we should add one to the linter.

@lilyminium does the user guide talk about PEP8-checking?

I'm just asking I find myself writing a lot of "PEP8" comments on PRs.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure we do - I think there's already an ongoing discussion in #2450

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does the user guide talk about PEP8-checking?

Yes, the user guide highlights important points from PEP8, mentions tools like flake8 for linting, and autopep8/yapf for autoformatting. It doesn't mention stuff like spaces after commas and around comparison operators, or blank lines, though.

12 changes: 6 additions & 6 deletions package/MDAnalysis/coordinates/LAMMPS.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def write(self, selection, frame=None):
self._write_velocities(atoms)


class DumpReader(base.ReaderBase):
class DumpReader(base.ReaderBase, base._BAsciiPickle):
"""Reads the default `LAMMPS dump format`_

Expects trajectories produced by the default 'atom' style dump.
Expand All @@ -478,7 +478,7 @@ def __init__(self, filename, **kwargs):

def _reopen(self):
self.close()
self._file = util.anyopen(self.filename)
self._f = util.anyopen(self.filename, 'rb')
self.ts = self._Timestep(self.n_atoms, **self._ts_kwargs)
self.ts.frame = -1

Expand Down Expand Up @@ -510,17 +510,17 @@ def n_frames(self):
return len(self._offsets)

def close(self):
if hasattr(self, '_file'):
self._file.close()
if hasattr(self, '_f'):
self._f.close()

def _read_frame(self, frame):
self._file.seek(self._offsets[frame])
self._f.seek(self._offsets[frame])
self.ts.frame = frame - 1 # gets +1'd in next

return self._read_next_timestep()

def _read_next_timestep(self):
f = self._file
f = self._f
ts = self.ts
ts.frame += 1
if ts.frame >= len(self):
Expand Down
12 changes: 6 additions & 6 deletions package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
# Pairs of residue name / atom name in use to deduce PDB formatted atom names
Pair = collections.namedtuple('Atom', 'resname name')

class PDBReader(base.ReaderBase):
class PDBReader(base.ReaderBase, base._BAsciiPickle):
"""PDBReader that reads a `PDB-formatted`_ file, no frills.

The following *PDB records* are parsed (see `PDB coordinate section`_ for
Expand Down Expand Up @@ -280,7 +280,7 @@ def __init__(self, filename, **kwargs):
if isinstance(filename, util.NamedStream) and isinstance(filename.stream, StringIO):
filename.stream = BytesIO(filename.stream.getvalue().encode())

pdbfile = self._pdbfile = util.anyopen(filename, 'rb')
pdbfile = self._f = util.anyopen(filename, 'rb')

line = "magical"
while line:
Expand Down Expand Up @@ -348,7 +348,7 @@ def _reopen(self):
# Pretend the current TS is -1 (in 0 based) so "next" is the
# 0th frame
self.close()
self._pdbfile = util.anyopen(self.filename, 'rb')
self._f = util.anyopen(self.filename, 'rb')
self.ts.frame = -1

def _read_next_timestep(self, ts=None):
Expand All @@ -374,8 +374,8 @@ def _read_frame(self, frame):
occupancy = np.ones(self.n_atoms)

# Seek to start and read until start of next frame
self._pdbfile.seek(start)
chunk = self._pdbfile.read(stop - start).decode()
self._f.seek(start)
chunk = self._f.read(stop - start).decode()

tmp_buf = []
for line in chunk.splitlines():
Expand Down Expand Up @@ -432,7 +432,7 @@ def _read_frame(self, frame):
return self.ts

def close(self):
self._pdbfile.close()
self._f.close()


class PDBWriter(base.WriterBase):
Expand Down
Loading