Skip to content

Commit

Permalink
replaced netCDF4 with scipy.io.netcdf (issue #506)
Browse files Browse the repository at this point in the history
- removed all imports of netCDF4
- API is different (less feature rich in the pure python
  implementation) so it is not straightforward to support
  both netCDF4 and fall back on scipy.io.netcdf (or perhaps
  even pupyere, which is a single-file package from which the
  scipy code originated)
- uses float32 for
  - positions
  - velocities
  - forces
  (port of resolution of issue #518)
- change NCDFReader, NCDFWriter and test_netcdf
- see discussion of issue #506 for performance benchmarks
  • Loading branch information
tylerjereddy authored and orbeckst committed Jun 30, 2017
1 parent be8db1c commit b706315
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 92 deletions.
3 changes: 3 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ Changes
before trying to deduce the format from file extension. (Issue #712)

Fixes

* Syntax error corrected in psa.py (Issue #738)
* XDR file seeking and telling working again for large files (Issue #677).
* ContactAnalysis1 run method now starts at frame index 0 by default (Issue #624)
Expand Down Expand Up @@ -437,6 +438,8 @@ Changes
* removed superfluous analysis.density.density_from_trajectory();
use density_from_Universe(TOPOL, TRAJ) instead.
* MOL2Writer.write now only writes a single frame (Issue #521)
* use scipy.io.netcdf pure python implementation for reading of Amber
netcdf3 trajctories instead of netCDF4 + netcdf lib (see also Issue #488)

Fixes

Expand Down
132 changes: 51 additions & 81 deletions package/MDAnalysis/coordinates/TRJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,6 @@

logger = logging.getLogger("MDAnalysis.coordinates.AMBER")

try:
import netCDF4 as netcdf
except ImportError:
# Just to notify the user; the module will still load. However, NCDFReader
# and NCDFWriter will raise a proper ImportError if they are called without
# the netCDF4 library present. See Issue 122 for a discussion.
logger.debug(
"Failed to import netCDF4; AMBER NETCDFReader/Writer will not work. "
"Install netCDF4 from https://github.com/Unidata/netcdf4-python.")
logger.debug(
"See also https://github.com/MDAnalysis/mdanalysis/wiki/netcdf")


class Timestep(base.Timestep):
"""AMBER trajectory Timestep.
Expand Down Expand Up @@ -433,19 +421,15 @@ class NCDFReader(base.ReaderBase):

def __init__(self, filename, n_atoms=None, **kwargs):
try:
import netCDF4 as netcdf
from scipy.io import netcdf
except ImportError:
errmsg = ("netCDF4 package missing.\n"
"netcdf4-python with the netCDF and HDF5 libraries must"
" be installed for the AMBER ncdf Reader.\n"
"See installation instructions at "
"https://github.com/MDAnalysis/mdanalysis/wiki/netcdf")
logger.fatal(errmsg)
raise ImportError(errmsg)
logger.fatal("scipy.io.netcdf must be installed for the AMBER ncdf Reader.")
raise ImportError("scipy.io.netcdf package missing but is required "
"for the Amber Reader.")

super(NCDFReader, self).__init__(filename, **kwargs)

self.trjfile = netcdf.Dataset(self.filename)
self.trjfile = netcdf.netcdf_file(self.filename)

if not ('AMBER' in self.trjfile.Conventions.split(',') or
'AMBER' in self.trjfile.Conventions.split()):
Expand All @@ -462,10 +446,15 @@ def __init__(self, filename, n_atoms=None, **kwargs):
warnings.warn(wmsg)
logger.warn(wmsg)

self.n_atoms = len(self.trjfile.dimensions['atom'])
self.n_frames = len(self.trjfile.dimensions['frame'])
# also records time steps in data.variables['time'] and unit
# but my example only has 0
self.n_atoms = self.trjfile.dimensions['atom']
self.n_frames = self.trjfile.dimensions['frame']
# example trajectory when read with scipy.io.netcdf has
# dimensions['frame'] == None (indicating a record dimension that can
# grow) whereas if read with netCDF4 I get len(dimensions['frame']) ==
# 10: in any case, we need to get the number of frames from somewhere
# such as the time variable:
if self.n_frames is None:
self.n_frames = self.trjfile.variables['time'].shape[0]

try:
self.remarks = self.trjfile.title
Expand All @@ -481,22 +470,20 @@ def __init__(self, filename, n_atoms=None, **kwargs):
# hacked into MDAnalysis.units)
if self.trjfile.variables['time'].units != "picosecond":
raise NotImplementedError(
"NETCDFReader currently assumes that the trajectory was "
"written with a time unit of picoseconds and "
"not {0}.".format(self.trjfile.variables['time'].units))
"NETCDFReader currently assumes that the trajectory was written "
"with a time unit of picoseconds and not {0}.".format(
self.trjfile.variables['time'].units))
if self.trjfile.variables['coordinates'].units != "angstrom":
raise NotImplementedError(
"NETCDFReader currently assumes that the trajectory was "
"written with a length unit of Angstroem and "
"not {0}.".format(self.trjfile.variables['coordinates'].units))
"NETCDFReader currently assumes that the trajectory was written "
"with a length unit of Angstroem and not {0}.".format(
self.trjfile.variables['coordinates'].units))
if hasattr(self.trjfile.variables['coordinates'], 'scale_factor'):
raise NotImplementedError("scale_factors are not implemented")
if n_atoms is not None:
if n_atoms != self.n_atoms:
raise ValueError(
"Supplied n_atoms ({0}) != natom from ncdf ({1}). Note: "
"n_atoms can be None and then the ncdf value is used!"
"".format(n_atoms, self.n_atoms))
if n_atoms is not None and n_atoms != self.n_atoms:
raise ValueError("Supplied n_atoms ({0}) != natom from ncdf ({1}). "
"Note: n_atoms can be None and then the ncdf value "
"is used!".format(n_atoms, self.n_atoms))

self.has_velocities = 'velocities' in self.trjfile.variables
self.has_forces = 'forces' in self.trjfile.variables
Expand All @@ -518,9 +505,12 @@ def _read_frame(self, frame):

if self.trjfile is None:
raise IOError("Trajectory is closed")
if np.dtype(type(frame)) != np.dtype(int):
# convention... for netcdf could also be a slice
raise TypeError("frame must be a positive integer or zero")
if frame >= self.n_frames or frame < 0:
raise IndexError("frame index must be 0 <= frame < {0}"
"".format(self.n_frames))
raise IndexError("frame index must be 0 <= frame < {0}".format(
self.n_frames))
# note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3)
ts._pos[:] = self.trjfile.variables['coordinates'][frame]
ts.time = self.trjfile.variables['time'][frame]
Expand Down Expand Up @@ -564,7 +554,12 @@ def _get_dt(self):
return t1 - t0

def close(self):
"""Close trajectory; any further access will raise an :exc:`IOError`"""
"""Close trajectory; any further access will raise an :exc:`IOError`.
.. Note:: The underlying :mod:`scipy.io.netcdf` module open netcdf
files with `mmap()`. Hence *any* reference to an array
*must* be removed before the file can be closed.
"""
if self.trjfile is not None:
self.trjfile.close()
self.trjfile = None
Expand Down Expand Up @@ -710,28 +705,18 @@ def _init_netcdf(self, periodic=True):
https://storage.googleapis.com/google-code-attachments/mdanalysis/issue-109/comment-2/netcdf4storage.py
"""
try:
import netCDF4 as netcdf
from scipy.io import netcdf
except ImportError:
logger.fatal("netcdf4-python with the netCDF and HDF5 libraries "
"must be installed for the AMBER ncdf Writer."
"See installation instructions at "
"https://github.com/MDAnalysis/mdanalysis/wiki/netcdf")
raise ImportError(
"netCDF4 package missing.\n"
"netcdf4-python with the netCDF and HDF5 libraries must be "
"installed for the AMBER ncdf Writer.\n"
"See installation instructions at "
"https://github.com/MDAnalysis/mdanalysis/wiki/netcdf")
logger.fatal("scipy.io.netcdf must be installed for the AMBER ncdf Reader.")
raise ImportError("scipy.io.netcdf package missing but is required "
"for the Amber Reader.")

if not self._first_frame:
raise IOError(
errno.EIO,
"Attempt to write to closed file {0}".format(self.filename))

ncfile = netcdf.Dataset(self.filename,
clobber=True,
mode='w',
format='NETCDF3_64BIT')
ncfile = netcdf.netcdf_file(self.filename, mode='w', version=2)

# Set global attributes.
setattr(ncfile, 'program', 'MDAnalysis.coordinates.TRJ.NCDFWriter')
Expand All @@ -751,39 +736,28 @@ def _init_netcdf(self, periodic=True):
ncfile.createDimension('label', 5) # needed for cell_angular

# Create variables.
coords = ncfile.createVariable('coordinates',
'f4', ('frame', 'atom', 'spatial'),
zlib=self.zlib,
complevel=self.cmplevel)
coords = ncfile.createVariable('coordinates', 'f4',
('frame', 'atom', 'spatial'))
setattr(coords, 'units', 'angstrom')

spatial = ncfile.createVariable('spatial', 'c', ('spatial', ))
spatial[:] = np.asarray(list('xyz'))

time = ncfile.createVariable('time',
'f4', ('frame', ),
zlib=self.zlib,
complevel=self.cmplevel)
time = ncfile.createVariable('time', 'f4', ('frame',))
setattr(time, 'units', 'picosecond')

self.periodic = periodic
if self.periodic:
cell_lengths = ncfile.createVariable('cell_lengths',
'f8',
('frame', 'cell_spatial'),
zlib=self.zlib,
complevel=self.cmplevel)
cell_lengths = ncfile.createVariable('cell_lengths', 'f8',
('frame', 'cell_spatial'))
setattr(cell_lengths, 'units', 'angstrom')

cell_spatial = ncfile.createVariable('cell_spatial', 'c',
('cell_spatial', ))
cell_spatial[:] = np.asarray(list('abc'))

cell_angles = ncfile.createVariable('cell_angles',
'f8',
('frame', 'cell_angular'),
zlib=self.zlib,
complevel=self.cmplevel)
cell_angles = ncfile.createVariable('cell_angles', 'f8',
('frame', 'cell_angular'))
setattr(cell_angles, 'units', 'degrees')

cell_angular = ncfile.createVariable('cell_angular', 'c',
Expand All @@ -793,16 +767,12 @@ def _init_netcdf(self, periodic=True):

# These properties are optional, and are specified on Writer creation
if self.has_velocities:
velocs = ncfile.createVariable('velocities',
'f8', ('frame', 'atom', 'spatial'),
zlib=self.zlib,
complevel=self.cmplevel)
velocs = ncfile.createVariable('velocities', 'f4',
('frame', 'atom', 'spatial'))
setattr(velocs, 'units', 'angstrom/picosecond')
if self.has_forces:
forces = ncfile.createVariable('forces',
'f8', ('frame', 'atom', 'spatial'),
zlib=self.zlib,
complevel=self.cmplevel)
forces = ncfile.createVariable('forces', 'f4',
('frame', 'atom', 'spatial'))
setattr(forces, 'units', 'kilocalorie/mole/angstrom')

ncfile.sync()
Expand Down
4 changes: 2 additions & 2 deletions package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
(Note that the group really is called `mdnalysis-discussion' because
Google groups forbids any name that contains the string `anal'.)
"""

from __future__ import print_function
from setuptools import setup, Extension, find_packages
from distutils.ccompiler import new_compiler
Expand Down Expand Up @@ -511,8 +512,7 @@ def dynamic_author_list():
# you might prefer to use the version available through your
# packaging system
extras_require={
'AMBER': ['netCDF4>=1.0'], # for AMBER netcdf, also needs HDF5
# and netcdf-4
'AMBER': ['scipy'], # for AMBER netcdf, (does NOT need HDF5)
'analysis': [
'seaborn', # for annotated heat map and nearest neighbor
# plotting in PSA
Expand Down
17 changes: 8 additions & 9 deletions testsuite/MDAnalysisTests/coordinates/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,26 +234,25 @@ def _check_new_traj(self):
dim_new = nc_copy.dimensions[k]
except KeyError:
raise AssertionError("NCDFWriter did not write "
"dimension '{}'".format(k))
"dimension '{0}'".format(k))
else:
assert_equal(len(dim), len(dim_new),
err_msg="Dimension '{0}' size mismatch".format(k))


for k, v in nc_orig.variables.items():
try:
v_new = nc_copy.variables[k]
except KeyError:
raise AssertionError("NCDFWriter did not write "
"variable '{}'".format(k))
"variable '{0}'".format(k))
else:
try:
assert_array_almost_equal(v[:], v_new[:], self.prec,
err_msg="Variable '{}' not "
err_msg="Variable '{0}' not "
"written correctly".format(k))
except TypeError:
assert_array_equal(v[:], v_new[:],
err_msg="Variable {} not written "
err_msg="Variable {0} not written "
"correctly".format(k))

@attr('slow')
Expand All @@ -271,14 +270,14 @@ def test_TRR2NCDF(self):
assert_array_almost_equal(written_ts._pos, orig_ts._pos, self.prec,
err_msg="coordinate mismatch between "
"original and written trajectory at "
"frame %d (orig) vs %d (written)" % (
orig_ts.frame, written_ts.frame))
"frame {0} (orig) vs {1} (written)".format(
orig_ts.frame, written_ts.frame))
assert_array_almost_equal(written_ts._velocities,
orig_ts._velocities, self.prec,
err_msg="velocity mismatch between "
"original and written trajectory at "
"frame %d (orig) vs %d (written)" % (
orig_ts.frame, written_ts.frame))
"frame {0} (orig) vs {1} (written)".format(
orig_ts.frame, written_ts.frame))
assert_almost_equal(orig_ts.time, written_ts.time, self.prec,
err_msg="Time for step {0} are not the "
"same.".format(orig_ts.frame))
Expand Down

0 comments on commit b706315

Please sign in to comment.