Skip to content

Commit

Permalink
Merge pull request #18 from bjmorgan/test
Browse files Browse the repository at this point in the history
Fixing correctness bug
  • Loading branch information
arm61 authored Nov 23, 2022
2 parents 03a6edc + 9e81ccf commit 8f70cd1
Show file tree
Hide file tree
Showing 14 changed files with 278 additions and 256 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ['3.7',
'3.8',
python-version: ['3.8',
'3.9',
'3.10']
steps:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-18.04
steps:
- uses: actions/checkout@master
- name: setup python 3.7
- name: setup python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.7
python-version: 3.9
- name: install pypa/build
run: >-
python -m
Expand Down
17 changes: 14 additions & 3 deletions docs/source/arrhenius_t.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"source": [
"To read these simulations we will use [MDAnalysis](https://userguide.mdanalysis.org/stable/index.html) (however, it is also possible to use data from a [VASP simulation](./vasp_d.html)).\n",
"The parser, bootstrap, and diffusion parameters are all defined for all simulations, here we only consider the diffusive regime to begin after 5 ps.\n",
"Additionally, we include in the `p_params` a `sub_sample_atoms` key, this defines the sampling frequency of atoms to be used in the analysis.\n",
"Additionally, we include in the `p_params` a `sub_sample_atoms` key, this defines the sampling frequency of atoms to be used in the analysis and the `sub_sample_traj` key, which defined the sampling frequency for the trajectory.\n",
"This facility can be particularly useful for large simulations where `kinisi` might encounter issues related to out-of-memory problems. "
]
},
Expand All @@ -77,6 +77,7 @@
" 'step_skip': 100,\n",
" 'min_dt': 0.001,\n",
" 'sub_sample_atoms': 4,\n",
" 'sub_sample_traj': 2,\n",
" 'progress': False}\n",
"b_params = {'progress': False}\n",
"d_params = {'dt_skip': 10, \n",
Expand Down Expand Up @@ -113,6 +114,16 @@
"The list of diffusion coefficient objects (which are `uravu.distribution.Distribution` type objects) and array of temperatures can then be passed to the `kinisi.arrhenius.StandardArrhenius` class. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a7ea3985-ffcf-45c1-a6ca-b870c31740d6",
"metadata": {},
"outputs": [],
"source": [
"len(D), len(temperatures)"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -215,7 +226,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "9d87d5b9-24ef-4635-85eb-5d41678b53a9",
"id": "3fea99f1-ffcc-4a08-8f48-1e155a921bb6",
"metadata": {},
"outputs": [],
"source": []
Expand All @@ -237,7 +248,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
13 changes: 2 additions & 11 deletions docs/source/vasp_d.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,7 @@
"metadata": {},
"outputs": [],
"source": [
"b_params = {'dimension': 'xy'}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from kinisi.parser import PymatgenParser"
"b_params = {'dimension': 'xyz'}"
]
},
{
Expand Down Expand Up @@ -355,7 +346,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.9.7"
}
},
"nbformat": 4,
Expand Down
2 changes: 1 addition & 1 deletion kinisi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MAJOR = 0
MINOR = 4
MINOR = 5
MICRO = 0
__version__ = f'{MAJOR:d}.{MINOR:d}.{MICRO:d}'
2 changes: 1 addition & 1 deletion kinisi/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def _stack_trajectories(u: Union[MDAnalysisParser, PymatgenParser]) -> List[np.n
disp[u[0].disp_3d[i].shape[0] * j:u[0].disp_3d[i].shape[0] * (j + 1)] = u[j].disp_3d[i]
joint_disp_3d.append(disp)
return joint_disp_3d

@property
def distribution(self) -> np.ndarray:
"""
Expand Down
96 changes: 50 additions & 46 deletions kinisi/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
# author: Andrew R. McCluskey (arm61)

import warnings
from typing import List, Union
from typing import List, Tuple, Union
import numpy as np
from scipy.stats import multivariate_normal, normaltest, linregress
from scipy.stats import normaltest, linregress
from scipy.linalg import pinvh
from scipy.optimize import minimize, curve_fit
import scipy.constants as const
import tqdm
Expand Down Expand Up @@ -49,11 +50,11 @@ def __init__(self, delta_t: np.ndarray, disp_3d: List[np.ndarray], sub_sample_dt
self._max_obs = self._displacements[0].shape[1]
self._distributions = []
self._dt = np.array([])
self._iterator = self.iterator(progress, range(len(self._displacements)))
self._n = np.array([])
self._s = np.array([])
self._v = np.array([])
self._n_i = np.array([], dtype=int)
self._n_o = np.array([], dtype=int)
self._ngp = np.array([])
self._euclidian_displacements = []
self._diffusion_coefficient = None
Expand Down Expand Up @@ -265,7 +266,7 @@ def bootstrap_GLS(self,
n_samples: int = 1000,
n_walkers: int = 32,
n_burn: int = 500,
thin: int = 1,
thin: int = 10,
progress: bool = True,
random_state: np.random.mtrand.RandomState = None):
"""
Expand All @@ -283,17 +284,11 @@ def bootstrap_GLS(self,
:param n_walkers: Number of MCMC walkers to use. Optional, default is :py:attr:`32`.
:param n_burn: Number of burn in samples (these allow the sampling to settle). Optional, default
is :py:attr:`500`.
:param rtol: The relative threshold term for the covariance matrix inversion. If you obtain a very unusual
value for the diffusion coefficient, it is recommended to increase this value (ideally iteratively).
Optional, default is :code:`N * eps`, where :code:`eps` is the machine precision value of the covariance
matrix content.
:param thin: Use only every :py:attr:`thin` samples for the MCMC sampler. Optional, default is :py:attr:`10`.
:param progress: Show tqdm progress for sampling. Optional, default is :py:attr:`True`.
:param random_state: A :py:attr:`RandomState` object to be used to ensure reproducibility. Optional,
default is :py:attr:`None`.
"""
if random_state is not None:
np.random.seed(random_state.get_state()[1][1])

max_ngp = np.argwhere(self._dt > dt_skip)[0][0]
if use_ngp:
max_ngp = np.argmax(self._ngp)
Expand All @@ -307,15 +302,17 @@ def model_variance(dt: np.ndarray, a: float) -> np.ndarray:
:param a: Quadratic coefficient
:return: Model variances
"""
return a / self._n_i[max_ngp:] * dt**2

popt, _ = curve_fit(model_variance, self.dt[max_ngp:], self._v[max_ngp:])
model_v = model_variance(self.dt[max_ngp:], *popt)
return a / self._n_o[max_ngp:] * dt**2

self._covariance_matrix = self.populate_covariance_matrix(model_v, self._n_i[max_ngp:])
self._popt, _ = curve_fit(model_variance, self.dt[max_ngp:], self._v[max_ngp:])
self._model_v = model_variance(self.dt[max_ngp:], *self._popt)
self._covariance_matrix = _populate_covariance_matrix(self._model_v, self._n_o[max_ngp:])
self._npd_covariance_matrix = self._covariance_matrix
self._covariance_matrix = find_nearest_positive_definite(self._covariance_matrix)

mv = multivariate_normal(self._n[max_ngp:], self._covariance_matrix, allow_singular=True)
_, logdet = np.linalg.slogdet(self._covariance_matrix)
logdet += np.log(2 * np.pi) * self._n[max_ngp:].size
inv = pinvh(self._covariance_matrix)

def log_likelihood(theta: np.ndarray) -> float:
"""
Expand All @@ -326,7 +323,9 @@ def log_likelihood(theta: np.ndarray) -> float:
if theta[0] < 0:
return -np.inf
model = _straight_line(self._dt[max_ngp:], *theta)
return mv.logpdf(model)
diff = (model - self._n[max_ngp:])
logl = -0.5 * (logdet + np.matmul(diff.T, np.matmul(inv, diff)))
return logl

ols = linregress(self._dt[max_ngp:], self._n[max_ngp:])
slope = ols.slope
Expand All @@ -353,31 +352,11 @@ def nll(*args) -> float:
# sampler._random = random_state
sampler.run_mcmc(pos, n_samples + n_burn, progress=progress, progress_kwargs={'desc': "Likelihood Sampling"})
self.flatchain = sampler.get_chain(flat=True, thin=thin, discard=n_burn)

self.gradient = Distribution(self.flatchain[:, 0])
self._intercept = None
if fit_intercept:
self._intercept = Distribution(self.flatchain[:, 1])

@staticmethod
def populate_covariance_matrix(variances: np.ndarray, n_samples: np.ndarray) -> np.ndarray:
"""
Populate the covariance matrix for the generalised least squares methodology.
:param variances: The variances for each timestep
:param n_samples: Number of independent trajectories for each timestep
:return: An estimated covariance matrix for the system
"""
covariance_matrix = np.zeros((variances.size, variances.size))
for i in range(0, variances.size):
for j in range(i, variances.size):
ratio = n_samples[i] / n_samples[j]
value = ratio * variances[i]
covariance_matrix[i, j] = value
covariance_matrix[j, i] = np.copy(covariance_matrix[i, j])
return covariance_matrix

def diffusion(self, **kwargs):
"""
Use the bootstrap-GLS method to determine the diffusivity for the system. Keyword arguments will be
Expand All @@ -401,8 +380,8 @@ def jump_diffusion(self, **kwargs):
will be passed of the :py:func:`bootstrap_GLS` method.
"""
self.bootstrap_GLS(**kwargs)
self._jump_diffusion_coefficient = Distribution(self.gradient.samples /
(2e4 * self.dims * self._displacements[0].shape[0]))
self._jump_diffusion_coefficient = Distribution(
self.gradient.samples / (2e4 * self.dims * self._displacements[0].shape[0]))

@property
def D_J(self) -> Union[Distribution, None]:
Expand Down Expand Up @@ -470,15 +449,17 @@ def __init__(self,
random_state: np.random.mtrand.RandomState = None,
progress: bool = True):
super().__init__(delta_t, disp_3d, sub_sample_dt, progress)
self._iterator = self.iterator(progress, range(len(self._displacements)))
slice = DIMENSIONALITY[dimension.lower()]
self.dims = len(dimension.lower())
timesteps = (self._delta_t / np.diff(self._delta_t)[0]).astype(int)
for i in self._iterator:
disp_slice = self._displacements[i][:, :, slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
d_squared = np.sum(disp_slice**2, axis=2)
if d_squared.size <= 1:
continue
self._n_i = np.append(self._n_i, d_squared.size)
self._n_o = np.append(self._n_o, d_squared.size)
self._euclidian_displacements.append(Distribution(np.sqrt(d_squared.flatten())))
distro = self.sample_until_normal(d_squared, d_squared.size, n_resamples, max_resamples, alpha,
random_state)
Expand Down Expand Up @@ -527,21 +508,23 @@ def __init__(self,
random_state: np.random.mtrand.RandomState = None,
progress: bool = True):
super().__init__(delta_t, disp_3d, sub_sample_dt, progress)
self._iterator = self.iterator(progress, range(int(len(self._displacements) / 2)))
slice = DIMENSIONALITY[dimension.lower()]
self.dims = len(dimension.lower())
timesteps = (self._delta_t / np.diff(self._delta_t)[0]).astype(int)
for i in self._iterator:
disp_slice = self._displacements[i][:, :, slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
d_squared = np.sum(disp_slice**2, axis=2)
coll_motion = np.sum(np.sum(disp_slice, axis=0)**2, axis=-1)
if coll_motion.size <= 1:
continue
self._n_i = np.append(self._n_i, coll_motion.size)
self._n_o = np.append(self._n_o, coll_motion.size)
self._euclidian_displacements.append(Distribution(np.sqrt(d_squared.flatten())))
distro = self.sample_until_normal(coll_motion, coll_motion.size, n_resamples, max_resamples, alpha,
random_state)
self._distributions.append(distro)
self._n = np.append(self._n, coll_motion.mean())
self._n = np.append(self._n, distro.n)
self._s = np.append(self._s, np.std(distro.samples, ddof=1))
self._v = np.append(self._v, np.var(distro.samples, ddof=1))
self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten()))
Expand Down Expand Up @@ -587,25 +570,27 @@ def __init__(self,
random_state: np.random.mtrand.RandomState = None,
progress: bool = True):
super().__init__(delta_t, disp_3d, sub_sample_dt, progress)
self._iterator = self.iterator(progress, range(int(len(self._displacements) / 2)))
try:
_ = len(ionic_charge)
except TypeError:
ionic_charge = np.ones(self._displacements[0].shape[0]) * ionic_charge
slice = DIMENSIONALITY[dimension.lower()]
self.dims = len(dimension.lower())
timesteps = (self._delta_t / np.diff(self._delta_t)[0]).astype(int)
for i in self._iterator:
disp_slice = self._displacements[i][:, :, slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
d_squared = np.sum(disp_slice**2, axis=2)
sq_chg_motion = np.sum(np.sum((ionic_charge * self._displacements[i].T).T, axis=0)**2, axis=-1)
if sq_chg_motion.size <= 1:
continue
self._n_i = np.append(self._n_i, sq_chg_motion.size)
self._n_o = np.append(self._n_o, sq_chg_motion.size)
self._euclidian_displacements.append(Distribution(np.sqrt(d_squared.flatten())))
distro = self.sample_until_normal(sq_chg_motion, sq_chg_motion.size, n_resamples, max_resamples, alpha,
random_state)
self._distributions.append(distro)
self._n = np.append(self._n, sq_chg_motion.mean())
self._n = np.append(self._n, distro.n)
self._s = np.append(self._s, np.std(distro.samples, ddof=1))
self._v = np.append(self._v, np.var(distro.samples, ddof=1))
self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten()))
Expand All @@ -626,10 +611,29 @@ def _bootstrap(array: np.ndarray, n_samples: int, n_resamples: int, random_state
:return: Resampled values from the array
"""
return [
np.mean(resample(array.flatten(), n_samples=n_samples, random_state=random_state)) for j in range(n_resamples)
np.mean(resample(array.flatten(), n_samples=n_samples, random_state=random_state).flatten()) for j in range(n_resamples)
]


def _populate_covariance_matrix(variances: np.ndarray, n_samples: np.ndarray) -> np.ndarray:
"""
Populate the covariance matrix for the generalised least squares methodology.
:param variances: The variances for each timestep
:param n_samples: Number of independent trajectories for each timestep
:return: An estimated covariance matrix for the system
"""
covariance_matrix = np.zeros((variances.size, variances.size))
for i in range(0, variances.size):
for j in range(i, variances.size):
ratio = n_samples[i] / n_samples[j]
value = ratio * variances[i]
covariance_matrix[i, j] = value
covariance_matrix[j, i] = np.copy(covariance_matrix[i, j])
return covariance_matrix


def _straight_line(abscissa: np.ndarray, gradient: float, intercept: float = 0.0) -> np.ndarray:
"""
A one dimensional straight line function.
Expand Down
16 changes: 9 additions & 7 deletions kinisi/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ def __init__(self,
if n_steps is None:
nsteps = drift_corrected.shape[1]

timesteps = self.get_timesteps(nsteps)
self.timesteps = self.get_timesteps(nsteps)

self.delta_t, self.disp_3d = self.get_disps(timesteps, drift_corrected, progress)
self.delta_t, self.disp_3d = self.get_disps(self.timesteps, drift_corrected, progress)

@property
def volume(self) -> float:
Expand Down Expand Up @@ -124,7 +124,7 @@ def get_timesteps(self, nsteps: int) -> np.ndarray:
min_dt = 1
if min_dt >= nsteps:
raise ValueError('min_dt is greater than or equal to the maximum simulation length.')
timesteps = np.arange(min_dt, nsteps, 1)
timesteps = np.arange(min_dt, nsteps + 1, 1, dtype=int)
return timesteps

def get_disps(self,
Expand All @@ -148,17 +148,19 @@ def get_disps(self,
iterator = timesteps
disp_mem = 0
for i, timestep in enumerate(iterator):
disp_mem += np.product(drift_corrected[self.indices, i + 1::i + 1].shape) * 8
disp_mem += np.product(drift_corrected[self.indices, timestep::timestep].shape) * 8
disp_mem *= 1e-9
if disp_mem > self.memory_limit:
raise MemoryError(f"The memory limit of this job is {self.memory_limit:.1e} GB but the "
f"displacement values will use {disp_mem:.1e} GB. Please either increase "
"the memory_limit parameter or descrease the sampling rate (see "
"https://kinisi.readthedocs.io/en/latest/memory_limit.html).")
for i, timestep in enumerate(iterator):
disp = np.subtract(drift_corrected[self.indices, timestep::timestep],
drift_corrected[self.indices, :-timestep:timestep])
disp_3d.append(disp)
disp = np.concatenate([drift_corrected[self.indices, np.newaxis, timestep - 1],
np.subtract(drift_corrected[self.indices, timestep:],
drift_corrected[self.indices, :-timestep])],
axis=1)
disp_3d.append(disp[:, ::timestep])
return delta_t, disp_3d


Expand Down
Loading

0 comments on commit 8f70cd1

Please sign in to comment.