Skip to content

Commit

Permalink
Merge pull request #49 from bjmorgan/block
Browse files Browse the repository at this point in the history
Add support for variance estimation using the block averaging approach.
  • Loading branch information
arm61 authored Nov 23, 2023
2 parents c0752d5 + 9321625 commit ae30338
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 13 deletions.
94 changes: 82 additions & 12 deletions kinisi/diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def __init__(self,
self._slice = DIMENSIONALITY[dimension.lower()]
self._start_dt = None
self.dims = len(dimension.lower())
self._model = None

def to_dict(self) -> dict:
"""
Expand Down Expand Up @@ -110,7 +111,8 @@ def to_dict(self) -> dict:
'sigma': None,
'intercept': None,
'gradient': None,
'start_dt': self._start_dt
'start_dt': self._start_dt,
'model' : self._model
}
if len(self._distributions) != 0:
my_dict['distributions'] = [d.to_dict() for d in self._distributions]
Expand Down Expand Up @@ -168,6 +170,7 @@ def from_dict(cls, my_dict: dict) -> 'Bootstrap':
boot.flatchain = my_dict['flatchain']
boot._covariance_matrix = my_dict['covariance_matrix']
boot._start_dt = my_dict['start_dt']
boot._model = my_dict['model']
return boot

@property
Expand Down Expand Up @@ -298,6 +301,7 @@ def ngp_calculation(d_squared: np.ndarray) -> float:

def bootstrap_GLS(self,
start_dt: float,
model: bool = True,
fit_intercept: bool = True,
n_samples: int = 1000,
n_walkers: int = 32,
Expand All @@ -311,6 +315,8 @@ def bootstrap_GLS(self,
:param start_dt: The starting time for the analysis to find the diffusion coefficient.
This should be the start of the diffusive regime in the simulation.
:param model: Use the model for the covariance matrix, if False this may lead to numerical instability.
Optional, default is :py:attr:`True`.
:param fit_intercept: Should the intercept of the diffusion relationship be fit. Optional, default
is :py:attr:`True`.
:param n_samples: Number of samples of the Gaussian process to perform. Optional, default is :py:attr:`1000`.
Expand All @@ -326,6 +332,7 @@ def bootstrap_GLS(self,
np.random.seed(random_state.get_state()[1][1])

self._start_dt = start_dt
self._model = model

diff_regime = np.argwhere(self._dt >= self._start_dt)[0][0]

Expand Down Expand Up @@ -398,8 +405,11 @@ def _model_variance(dt: np.ndarray, a: float) -> np.ndarray:
"""
return a / self._n_o[diff_regime:] * dt**2

self._popt, _ = curve_fit(_model_variance, self.dt[diff_regime:], self._v[diff_regime:])
self._model_v = _model_variance(self.dt[diff_regime:], *self._popt)
if self._model:
self._popt, _ = curve_fit(_model_variance, self.dt[diff_regime:], self._v[diff_regime:])
self._model_v = _model_variance(self.dt[diff_regime:], *self._popt)
else:
self._model_v = self._v[diff_regime:]
self._covariance_matrix = _populate_covariance_matrix(self._model_v, self._n_o[diff_regime:])
self._npd_covariance_matrix = self._covariance_matrix
return cov_nearest(self._covariance_matrix)
Expand Down Expand Up @@ -515,6 +525,8 @@ class MSDBootstrap(Bootstrap):
:param sub_sample_dt: The frequency in observations to be sampled. Default is :py:attr:`1` (every observation)
:param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution.
Optional, default is :py:attr:`False`.
:param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an
approximation is used to estimate the variance. Optional, default is :py:attr:`False`.
:param n_resamples: The initial number of resamples to be performed. Default is :py:attr:`1000`
:param max_resamples: The max number of resamples to be performed by the distribution is assumed to be normal.
This is present to allow user control over the time taken for the resampling to occur. Default
Expand All @@ -533,6 +545,7 @@ def __init__(self,
n_o: np.ndarray,
sub_sample_dt: int = 1,
bootstrap: bool = False,
block: bool = False,
n_resamples: int = 1000,
max_resamples: int = 10000,
dimension: str = 'xyz',
Expand All @@ -541,6 +554,10 @@ def __init__(self,
progress: bool = True):
super().__init__(delta_t, disp_3d, n_o, sub_sample_dt, dimension)
self._iterator = self.iterator(progress, range(len(self._displacements)))
if block:
import pyblock
print('You are using the blocking method to estimate variances, please cite '
'doi:10.1063/1.457480 and the pyblock pacakge.')
for i in self._iterator:
disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
Expand All @@ -554,9 +571,22 @@ def __init__(self,
self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples))
self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1))
self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1))
self._n = np.append(self._n, d_squared.mean())
self._v = np.append(self._v, np.var(d_squared, ddof=1) / n_o[i])
self._s = np.append(self._s, np.sqrt(self._v[i]))
if block:
reblock = pyblock.blocking.reblock(d_squared.flatten())
opt_block = pyblock.blocking.find_optimal_block(d_squared.flatten().size, reblock)
try:
mean = reblock[opt_block[0]].mean
var = reblock[opt_block[0]].std_err ** 2
except TypeError:
mean = reblock[-1].mean
var = reblock[-1].std_err ** 2
self._n = np.append(self._n, mean)
self._v = np.append(self._v, var)
self._s = np.append(self._s, np.sqrt(self._v[i]))
else:
self._n = np.append(self._n, d_squared.mean())
self._v = np.append(self._v, np.var(d_squared, ddof=1) / n_o[i])
self._s = np.append(self._s, np.sqrt(self._v[i]))
self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared))
self._dt = np.append(self._dt, self._delta_t[i])
self._n_o = self._n_o[:self._n.size]
Expand All @@ -577,6 +607,8 @@ class MSTDBootstrap(Bootstrap):
is :py:attr:`1` (every observation).
:param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution.
Optional, default is :py:attr:`False`.
:param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an
approximation is used to estimate the variance. Optional, default is :py:attr:`False`.
:param n_resamples: The initial number of resamples to be performed. Optional, default
is :py:attr:`1000`
:param max_resamples: The max number of resamples to be performed by the distribution is assumed to be
Expand All @@ -597,6 +629,7 @@ def __init__(self,
n_o: np.ndarray,
sub_sample_dt: int = 1,
bootstrap: bool = False,
block: bool = False,
n_resamples: int = 1000,
max_resamples: int = 10000,
dimension: str = 'xyz',
Expand All @@ -605,6 +638,10 @@ def __init__(self,
progress: bool = True):
super().__init__(delta_t, disp_3d, n_o, sub_sample_dt, dimension)
self._iterator = self.iterator(progress, range(int(len(self._displacements) / 2)))
if block:
import pyblock
print('You are using the blocking method to estimate variances, please cite '
'doi:10.1063/1.457480 and the pyblock pacakge.')
for i in self._iterator:
disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
Expand All @@ -620,9 +657,22 @@ def __init__(self,
self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples))
self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1))
self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1))
self._n = np.append(self._n, coll_motion.mean())
self._v = np.append(self._v, np.var(coll_motion, ddof=1) / (n_o[i] / d_squared.shape[0]))
self._s = np.append(self._s, np.sqrt(self._v[i]))
if block:
reblock = pyblock.blocking.reblock(coll_motion.flatten())
opt_block = pyblock.blocking.find_optimal_block(coll_motion.flatten().size, reblock)
try:
mean = reblock[opt_block[0]].mean
var = reblock[opt_block[0]].std_err ** 2
except TypeError:
mean = reblock[-1].mean
var = reblock[-1].std_err ** 2
self._n = np.append(self._n, mean)
self._v = np.append(self._v, var)
self._s = np.append(self._s, np.sqrt(self._v[i]))
else:
self._n = np.append(self._n, coll_motion.mean())
self._v = np.append(self._v, np.var(coll_motion, ddof=1) / (n_o[i] / d_squared.shape[0]))
self._s = np.append(self._s, np.sqrt(self._v[i]))
self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten()))
self._dt = np.append(self._dt, self._delta_t[i])
self._n_o = self._n_o[:self._n.size]
Expand All @@ -643,6 +693,8 @@ class MSCDBootstrap(Bootstrap):
:param n_o: Number of statistically independent observations of the MSD at each timestep.
:param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution.
Optional, default is :py:attr:`False`.
:param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an
approximation is used to estimate the variance. Optional, default is :py:attr:`False`.
:param sub_sample_dt: The frequency in observations to be sampled. Optional, default is :py:attr:`1`
(every observation).
:param n_resamples: The initial number of resamples to be performed. Optional, default is :py:attr:`1000`.
Expand All @@ -665,6 +717,7 @@ def __init__(self,
n_o: np.ndarray,
sub_sample_dt: int = 1,
bootstrap: bool = False,
block: bool = False,
n_resamples: int = 1000,
max_resamples: int = 10000,
dimension: str = 'xyz',
Expand All @@ -677,6 +730,10 @@ def __init__(self,
_ = len(ionic_charge)
except TypeError:
ionic_charge = np.ones(self._displacements[0].shape[0]) * ionic_charge
if block:
import pyblock
print('You are using the blocking method to estimate variances, please cite '
'doi:10.1063/1.457480 and the pyblock pacakge.')
for i in self._iterator:
disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0],
self._displacements[i].shape[1], self.dims)
Expand All @@ -692,9 +749,22 @@ def __init__(self,
self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples))
self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1))
self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1))
self._n = np.append(self._n, sq_chg_motion.mean())
self._v = np.append(self._v, np.var(sq_chg_motion, ddof=1) / (n_o[i] / d_squared.shape[0]))
self._s = np.append(self._s, np.sqrt(self._v[i]))
if block:
reblock = pyblock.blocking.reblock(sq_chg_motion.flatten())
opt_block = pyblock.blocking.find_optimal_block(sq_chg_motion.flatten().size, reblock)
try:
mean = reblock[opt_block[0]].mean
var = reblock[opt_block[0]].std_err ** 2
except TypeError:
mean = reblock[-1].mean
var = reblock[-1].std_err ** 2
self._n = np.append(self._n, mean)
self._v = np.append(self._v, var)
self._s = np.append(self._s, np.sqrt(self._v[i]))
else:
self._n = np.append(self._n, sq_chg_motion.mean())
self._v = np.append(self._v, np.var(sq_chg_motion, ddof=1) / (n_o[i] / d_squared.shape[0]))
self._s = np.append(self._s, np.sqrt(self._v[i]))
self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten()))
self._dt = np.append(self._dt, self._delta_t[i])
self._n_o = self._n_o[:self._n.size]
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ dev = [
'pytest',
'pytest-cov',
'ase',
'h5py'
'h5py',
'pyblock'
]
docs = [
'pymatgen',
Expand Down

0 comments on commit ae30338

Please sign in to comment.