Skip to content

Commit

Permalink
use netcdf for reading Amber netcdf but netCDF4 for writing
Browse files Browse the repository at this point in the history
- netcdf reads MUCH faster than netCDF4 but writes MUCH, MUCH
  slower than netCDF4: always read with netcdf but write with
  netCDF4 if available, otherwise use slow netcdf and warn
- implements @swails 's solution from ParmEd/ParmEd#722 -- thank you!!
- minimal testing: write the same trajectory with netcdf and with
  netCDF4
- NOTE: netCDF4 is not installed by default, use the re-introduced
  [AMBER] install target

     pip install MDAnalysis[AMBER]

  to request its installation but Amber users should also be aware
  of potential issues with the bundled netcdf library of Amber; see
  #506 (comment)
  for details
- closes #506
  • Loading branch information
orbeckst committed Jun 30, 2017
1 parent 7f89973 commit b4f77a4
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 7 deletions.
10 changes: 5 additions & 5 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,19 @@ Deprecations
Fixes
* correctly read little-endian TRZ files on big-endian architectures (issue
#1424)

Changes
* remove deprecated TimeSeriesCollection
* remove deprecated analysis.align.rms_fit_trj
* remove deprecated analysis.contacts.ContactAnalysis
* remove deprecated analysis.contacts.ContactAnalysis1
* remove deprecated analysis.hbonds.hbond_analysis 1-indexing
* remove deprecated analysis.rms `target` keyword from functions
* use scipy.io.netcdf pure python implementation for reading of Amber
netcdf3 trajctories instead of netCDF4 + netcdf lib (see also
Issue #506)
* use fast scipy.io.netcdf pure python implementation for reading of Amber
netcdf3 trajctories instead of netCDF4 but use netCDF4 for fast
writing (if available) or fall back to netcdf (see also Issue #506)
* bundled scipy.io.netcdf (v0.19.1) as lib.netcdf as a fallback for
scipy.io.netcdf so that Amber NC trajectories can always be read
scipy.io.netcdf so that Amber NC trajectories can always be processed


mm/dd/17 richardjgowers, rathann, jbarnoud, orbeckst, utkbansal
Expand Down
18 changes: 17 additions & 1 deletion package/MDAnalysis/coordinates/TRJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@
logger.debug("Using the bundled lib.netcdf (from scipy 0.16.1) instead.")
from ..lib import netcdf

try:
import netCDF4
except ImportError:
netCDF4 = None
logger.warn("netCDF4 is not available. Writing AMBER ncdf files will be slow")

class Timestep(base.Timestep):
"""AMBER trajectory Timestep.
Expand Down Expand Up @@ -629,6 +634,9 @@ class NCDFWriter(base.WriterBase):
Added ability to write velocities and forces
.. versionchanged:: 0.11.0
kwarg `delta` renamed to `dt`, for uniformity with other Readers
.. versionchanged:: 0.17.0
Use fast netCDF4 for writing but fall back to slow scipy.io.netcdf
if netCDF4 is not available.
"""

format = 'NCDF'
Expand Down Expand Up @@ -722,7 +730,15 @@ def _init_netcdf(self, periodic=True):
errno.EIO,
"Attempt to write to closed file {0}".format(self.filename))

ncfile = netcdf.netcdf_file(self.filename, mode='w', version=2)
if netCDF4:
ncfile = netCDF4.Dataset(self.filename, 'w', format='NETCDF3_64BIT')
else:
ncfile = netcdf.netcdf_file(self.filename, mode='w', version=2,
mmap=False)
warn_msg = "Could not find netCDF4 module. Falling back to MUCH slower "\
"scipy.io.netcdf implementation for writing."
logger.warn(warn_msg)
warnings.warn(warn_msg)

# Set global attributes.
setattr(ncfile, 'program', 'MDAnalysis.coordinates.TRJ.NCDFWriter')
Expand Down
1 change: 1 addition & 0 deletions package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ def dynamic_author_list():
# you might prefer to use the version available through your
# packaging system
extras_require={
'AMBER': 'netCDF4',
'analysis': [
'seaborn', # for annotated heat map and nearest neighbor
# plotting in PSA
Expand Down
11 changes: 10 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/test_netcdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
# fall back (should ALWAYS work)
from MDAnalysis.lib import netcdf

import pytest
from nose.plugins.attrib import attr
from numpy.testing import (assert_, assert_equal, assert_array_almost_equal,
assert_array_equal,
Expand Down Expand Up @@ -185,7 +186,7 @@ def tearDown(self):
del self.Writer
del self.tmpdir

def test_write_trajectory(self):
def _test_write_trajectory(self):
t = self.universe.trajectory
with self.Writer(self.outfile, t.n_atoms, dt=t.dt) as W:
self._copy_traj(W)
Expand All @@ -208,6 +209,14 @@ def test_write_trajectory(self):
err_msg='ncdf time output not float32 '
'but {}'.format(time[:].dtype))

def test_write_trajectory_netCDF4(self):
pytest.importorskip("netCDF4")
return self._test_write_trajectory()

@block_import('netCDF4')
def test_write_trajectory_netcdf(self):
return self._test_write_trajectory()

def test_OtherWriter(self):
t = self.universe.trajectory
with t.OtherWriter(self.outfile) as W:
Expand Down

0 comments on commit b4f77a4

Please sign in to comment.