Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scale_factor write support for NetCDF writer #3294

Merged
merged 18 commits into from
Aug 20, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,9 @@ Enhancements
checking if it can be used in parallel analysis. (Issue #2996, PR #2950)

Changes
* NCDFWriter now allows for the writing of `scale_factor` attributes for
trajectory variables. Default writing is no `scale_factor` except for
velocities where it is 20.455 (Issue #2327)
* `analysis.polymer.PersistenceLength` class now stores `lb`,
`lp` and `fit` using the `analysis.base.Results` class
(Issues #3289, #3291)
Expand Down
210 changes: 158 additions & 52 deletions package/MDAnalysis/coordinates/TRJ.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,7 +414,6 @@ class NCDFReader(base.ReaderBase):
Current limitations:

* only trajectories with time in ps and lengths in Angstroem are processed
* scale_factors are supported on read but are currently not kept/used when
writing
IAlibay marked this conversation as resolved.
Show resolved Hide resolved

The NCDF reader uses :mod:`scipy.io.netcdf` and therefore :mod:`scipy` must
Expand Down Expand Up @@ -456,6 +455,8 @@ class NCDFReader(base.ReaderBase):
:class:`NCDFPicklable`.
Reading of `dt` now defaults to 1.0 ps if `dt` cannot be extracted from
the first two frames of the trajectory.
:meth:`Writer` now also sets `convert_units`, `velocities`, `forces` and
`scale_factor` information for the :class:`NCDFWriter`.

"""

Expand Down Expand Up @@ -581,17 +582,19 @@ def __init__(self, filename, n_atoms=None, mmap=None, **kwargs):

# Check for scale_factor attributes for all data variables and
# store this to multiply through later (Issue #2323)
self.scale_factors = {'time': 1.0,
'cell_lengths': 1.0,
'cell_angles': 1.0,
'coordinates': 1.0,
'velocities': 1.0,
'forces': 1.0}
self.scale_factors = {'time': None,
'cell_lengths': None,
'cell_angles': None,
'coordinates': None,
'velocities': None,
'forces': None}

for variable in self.trjfile.variables:
if hasattr(self.trjfile.variables[variable], 'scale_factor'):
if variable in self.scale_factors:
scale_factor = self.trjfile.variables[variable].scale_factor
if not isinstance(scale_factor, (float, np.floating)):
raise TypeError(f"{scale_factor} is not a float")
self.scale_factors[variable] = scale_factor
else:
errmsg = ("scale_factors for variable {0} are "
Expand Down Expand Up @@ -641,6 +644,13 @@ def parse_n_atoms(filename, **kwargs):
n_atoms = f.dimensions['atom']
return n_atoms

@staticmethod
def _scale(variable, scale_factor):
if scale_factor is None or scale_factor == 1:
return variable
else:
return scale_factor * variable

def _read_frame(self, frame):
ts = self.ts

Expand All @@ -653,21 +663,25 @@ def _read_frame(self, frame):
raise IndexError("frame index must be 0 <= frame < {0}".format(
self.n_frames))
# note: self.trjfile.variables['coordinates'].shape == (frames, n_atoms, 3)
ts._pos[:] = (self.trjfile.variables['coordinates'][frame] *
self.scale_factors['coordinates'])
ts.time = (self.trjfile.variables['time'][frame] *
self.scale_factors['time'])
ts._pos[:] = self._scale(self.trjfile.variables['coordinates'][frame],
self.scale_factors['coordinates'])
ts.time = self._scale(self.trjfile.variables['time'][frame],
self.scale_factors['time'])
if self.has_velocities:
ts._velocities[:] = (self.trjfile.variables['velocities'][frame] *
self.scale_factors['velocities'])
ts._velocities[:] = self._scale(
self.trjfile.variables['velocities'][frame],
self.scale_factors['velocities'])
if self.has_forces:
ts._forces[:] = (self.trjfile.variables['forces'][frame] *
self.scale_factors['forces'])
ts._forces[:] = self._scale(
self.trjfile.variables['forces'][frame],
self.scale_factors['forces'])
if self.periodic:
ts._unitcell[:3] = (self.trjfile.variables['cell_lengths'][frame] *
self.scale_factors['cell_lengths'])
ts._unitcell[3:] = (self.trjfile.variables['cell_angles'][frame] *
self.scale_factors['cell_angles'])
ts._unitcell[:3] = self._scale(
self.trjfile.variables['cell_lengths'][frame],
self.scale_factors['cell_lengths'])
ts._unitcell[3:] = self._scale(
self.trjfile.variables['cell_angles'][frame],
self.scale_factors['cell_angles'])
if self.convert_units:
self.convert_pos_from_native(ts._pos) # in-place !
self.convert_time_from_native(
Expand Down Expand Up @@ -731,18 +745,38 @@ def Writer(self, filename, **kwargs):
filename of the output NCDF trajectory
n_atoms : int (optional)
number of atoms
dt : float (optional)
length of one timestep in picoseconds
remarks : str (optional)
string that is stored in the title field
convert_units : bool (optional)
``True``: units are converted to the AMBER base format
velocities : bool (optional)
Write velocities into the trajectory
forces : bool (optional)
Write forces into the trajectory
scale_time : float (optional)
Scale factor for time units
scale_cell_lengths : float (optional)
Scale factor for cell lengths
scale_cell_angles : float (optional)
Scale factor for cell angles
scale_coordinates : float (optional)
Scale factor for coordinates
scale_velocities : float (optional)
Scale factor for velocities
scale_forces : float (optional)
Scale factor for forces

Returns
-------
:class:`NCDFWriter`
"""
n_atoms = kwargs.pop('n_atoms', self.n_atoms)
kwargs.setdefault('remarks', self.remarks)
kwargs.setdefault('dt', self.dt)
kwargs.setdefault('convert_units', self.convert_units)
kwargs.setdefault('velocities', self.has_velocities)
kwargs.setdefault('forces', self.has_forces)
for key in self.scale_factors:
kwargs.setdefault(f'scale_{key}', self.scale_factors[key])
return NCDFWriter(filename, n_atoms, **kwargs)
IAlibay marked this conversation as resolved.
Show resolved Hide resolved


Expand All @@ -755,7 +789,14 @@ class NCDFWriter(base.WriterBase):
Velocities are written out if they are detected in the input
:class:`Timestep`. The trajectories are always written with ångström
for the lengths and picoseconds for the time (and hence Å/ps for
velocities).
velocities and kilocalorie/mole/Å for forces).

If passed, `scale_factor` variables for time / velocities / cell lengths /
cell angles / coordinates / velocities / forces will be written to the
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
NetCDF file. In this case, the trajectory data will be written to the
NetCDF file divided by the scale_factor value. By default, `scale_factor`
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
variables are not written, except for velocities where it is set to 20.455
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
replicating the default behaviour of AMBER.

Unit cell information is written if available.

Expand All @@ -768,18 +809,24 @@ class NCDFWriter(base.WriterBase):
name of output file
n_atoms : int
number of atoms in trajectory file
start : int (optional)
orbeckst marked this conversation as resolved.
Show resolved Hide resolved
starting timestep
step : int (optional)
skip between subsequent timesteps
dt : float (optional)
timestep
convert_units : bool (optional)
``True``: units are converted to the AMBER base format; [``True``]
velocities : bool (optional)
Write velocities into the trajectory [``False``]
forces : bool (optional)
Write forces into the trajectory [``False``]
scale_time : float (optional)
Scale factor for time units [`None`]
scale_cell_lengths : float (optional)
Scale factor for cell lengths [`None`]
scale_cell_angles : float (optional)
Scale factor for cell angles [`None`]
scale_coordinates : float (optional)
Scale factor for coordinates [`None`]
scale_velocities : float (optional)
Scale factor for velocities [20.455]
scale_forces : float (optional)
Scale factor for forces [`None`]
IAlibay marked this conversation as resolved.
Show resolved Hide resolved


Note
Expand Down Expand Up @@ -840,9 +887,11 @@ class NCDFWriter(base.WriterBase):
Changes the `cell_angles` unit to the AMBER NetCDF convention standard
of `degree` instead of the `degrees` written in previous version of
MDAnalysis (Issue #2327).

.. TODO:
* Implement `scale_factor` handling (Issue #2327).
.. versionchanged:: 2.0.0
`dt`, `start`, and `step` keywords were unused and are no longer set.
Writing of `scale_factor` values has now been implemented. By default
only velocities write a scale_factor of 20.455 (echoing the behaviour
seen from AMBER)

"""

Expand All @@ -854,32 +903,40 @@ class NCDFWriter(base.WriterBase):
'velocity': 'Angstrom/ps',
'force': 'kcal/(mol*Angstrom)'}

def __init__(self,
filename,
n_atoms,
start=0,
step=1,
dt=1.0,
remarks=None,
convert_units=True,
**kwargs):
def __init__(self, filename, n_atoms, remarks=None, convert_units=True,
velocities=False, forces=False, scale_time=None,
scale_cell_lengths=None, scale_cell_angles=None,
scale_coordinates=None, scale_velocities=None,
scale_forces=None, **kwargs):
self.filename = filename
if n_atoms == 0:
raise ValueError("NCDFWriter: no atoms in output trajectory")
self.n_atoms = n_atoms
# convert length and time to base units on the fly?
self.convert_units = convert_units

self.start = start # do we use those?
self.step = step # do we use those?
self.dt = dt
self.remarks = remarks or "AMBER NetCDF format (MDAnalysis.coordinates.trj.NCDFWriter)"

self._first_frame = True # signals to open trajectory
self.trjfile = None # open on first write with _init_netcdf()
self.periodic = None # detect on first write
self.has_velocities = kwargs.get('velocities', False)
self.has_forces = kwargs.get('forces', False)
self.has_velocities = velocities
self.has_forces = forces

self.scale_factors = {
'time': scale_time,
'cell_lengths': scale_cell_lengths,
'cell_angles': scale_cell_angles,
'coordinates': scale_coordinates,
'velocities': scale_velocities,
'forces': scale_forces}
# NCDF standard enforces float scale_factors
for value in self.scale_factors.values():
if (value is not None) and (
not isinstance(value, (float, np.floating))):
errmsg = f"scale_factor {value} is not a float"
raise TypeError(errmsg)

self.curr_frame = 0

def _init_netcdf(self, periodic=True):
Expand Down Expand Up @@ -937,18 +994,25 @@ def _init_netcdf(self, periodic=True):
coords = ncfile.createVariable('coordinates', 'f4',
('frame', 'atom', 'spatial'))
setattr(coords, 'units', 'angstrom')
if self.scale_factors['coordinates']:
setattr(coords, 'scale_factor', self.scale_factors['coordinates'])
IAlibay marked this conversation as resolved.
Show resolved Hide resolved

spatial = ncfile.createVariable('spatial', 'c', ('spatial', ))
spatial[:] = np.asarray(list('xyz'))

time = ncfile.createVariable('time', 'f4', ('frame',))
setattr(time, 'units', 'picosecond')
if self.scale_factors['time']:
setattr(time, 'scale_factor', self.scale_factors['time'])

self.periodic = periodic
if self.periodic:
cell_lengths = ncfile.createVariable('cell_lengths', 'f8',
('frame', 'cell_spatial'))
setattr(cell_lengths, 'units', 'angstrom')
if self.scale_factors['cell_lengths']:
setattr(cell_lengths, 'scale_factor',
self.scale_factors['cell_lengths'])

cell_spatial = ncfile.createVariable('cell_spatial', 'c',
('cell_spatial', ))
Expand All @@ -957,6 +1021,9 @@ def _init_netcdf(self, periodic=True):
cell_angles = ncfile.createVariable('cell_angles', 'f8',
('frame', 'cell_angular'))
setattr(cell_angles, 'units', 'degree')
if self.scale_factors['cell_angles']:
setattr(cell_angles, 'scale_factor',
self.scale_factors['cell_angles'])

cell_angular = ncfile.createVariable('cell_angular', 'c',
('cell_angular', 'label'))
Expand All @@ -968,10 +1035,16 @@ def _init_netcdf(self, periodic=True):
velocs = ncfile.createVariable('velocities', 'f4',
('frame', 'atom', 'spatial'))
setattr(velocs, 'units', 'angstrom/picosecond')
if self.scale_factors['velocities']:
setattr(velocs, 'scale_factor',
self.scale_factors['velocities'])
if self.has_forces:
forces = ncfile.createVariable('forces', 'f4',
('frame', 'atom', 'spatial'))
setattr(forces, 'units', 'kilocalorie/mole/angstrom')
if self.scale_factors['forces']:
setattr(forces, 'scale_factor',
self.scale_factors['forces'])

ncfile.sync()
self._first_frame = False
Expand Down Expand Up @@ -1029,6 +1102,13 @@ def _write_next_frame(self, ag):

return self._write_next_timestep(ts)

@staticmethod
def _scale(variable, scale_factor):
if scale_factor is None or scale_factor == 1:
return variable
else:
return variable / scale_factor

IAlibay marked this conversation as resolved.
Show resolved Hide resolved
def _write_next_timestep(self, ts):
"""Write coordinates and unitcell information to NCDF file.

Expand All @@ -1042,6 +1122,10 @@ def _write_next_timestep(self, ts):
https://github.com/MDAnalysis/mdanalysis/issues/109
.. _`netcdf4storage.py`:
https://storage.googleapis.com/google-code-attachments/mdanalysis/issue-109/comment-2/netcdf4storage.py


.. versionchanged:: 2.0.0
Can now write scale_factors, and scale variables accordingly.
"""
pos = ts._pos
time = ts.time
Expand All @@ -1059,27 +1143,49 @@ def _write_next_timestep(self, ts):
unitcell = self.convert_dimensions_to_unitcell(ts)

# write step
self.trjfile.variables['coordinates'][self.curr_frame, :, :] = pos
self.trjfile.variables['time'][self.curr_frame] = time
# coordinates
self.trjfile.variables['coordinates'][
IAlibay marked this conversation as resolved.
Show resolved Hide resolved
self.curr_frame, :, :] = self._scale(
pos, self.scale_factors['coordinates'])

# time
self.trjfile.variables['time'][self.curr_frame] = self._scale(
time, self.scale_factors['time'])

# unitcell
if self.periodic:
# cell lengths
self.trjfile.variables['cell_lengths'][
self.curr_frame, :] = unitcell[:3]
self.curr_frame, :] = self._scale(
unitcell[:3],
self.scale_factors['cell_lengths'])

self.trjfile.variables['cell_angles'][
self.curr_frame, :] = unitcell[3:]
self.curr_frame, :] = self._scale(
unitcell[3:],
self.scale_factors['cell_angles'])

# velocities
if self.has_velocities:
velocities = ts._velocities
if self.convert_units:
velocities = self.convert_velocities_to_native(
velocities, inplace=False)
self.trjfile.variables['velocities'][self.curr_frame, :, :] = velocities

self.trjfile.variables['velocities'][
self.curr_frame, :, :] = self._scale(
velocities, self.scale_factors['velocities'])

# forces
if self.has_forces:
forces = ts._forces
if self.convert_units:
forces = self.convert_forces_to_native(
forces, inplace=False)
self.trjfile.variables['forces'][self.curr_frame, :, :] = forces

self.trjfile.variables['forces'][
self.curr_frame, :, :] = self._scale(
forces, self.scale_factors['forces'])

self.trjfile.sync()
self.curr_frame += 1
Expand Down
Loading