diff --git a/kinisi/diffusion.py b/kinisi/diffusion.py index 708c0bb6..9e47e1ea 100644 --- a/kinisi/diffusion.py +++ b/kinisi/diffusion.py @@ -83,6 +83,7 @@ def __init__(self, self._slice = DIMENSIONALITY[dimension.lower()] self._start_dt = None self.dims = len(dimension.lower()) + self._model = None def to_dict(self) -> dict: """ @@ -110,7 +111,8 @@ def to_dict(self) -> dict: 'sigma': None, 'intercept': None, 'gradient': None, - 'start_dt': self._start_dt + 'start_dt': self._start_dt, + 'model' : self._model } if len(self._distributions) != 0: my_dict['distributions'] = [d.to_dict() for d in self._distributions] @@ -168,6 +170,7 @@ def from_dict(cls, my_dict: dict) -> 'Bootstrap': boot.flatchain = my_dict['flatchain'] boot._covariance_matrix = my_dict['covariance_matrix'] boot._start_dt = my_dict['start_dt'] + boot._model = my_dict['model'] return boot @property @@ -298,6 +301,7 @@ def ngp_calculation(d_squared: np.ndarray) -> float: def bootstrap_GLS(self, start_dt: float, + model: bool = True, fit_intercept: bool = True, n_samples: int = 1000, n_walkers: int = 32, @@ -311,6 +315,8 @@ def bootstrap_GLS(self, :param start_dt: The starting time for the analysis to find the diffusion coefficient. This should be the start of the diffusive regime in the simulation. + :param model: Use the model for the covariance matrix, if False this may lead to numerical instability. + Optional, default is :py:attr:`True`. :param fit_intercept: Should the intercept of the diffusion relationship be fit. Optional, default is :py:attr:`True`. :param n_samples: Number of samples of the Gaussian process to perform. Optional, default is :py:attr:`1000`. @@ -326,6 +332,7 @@ def bootstrap_GLS(self, np.random.seed(random_state.get_state()[1][1]) self._start_dt = start_dt + self._model = model diff_regime = np.argwhere(self._dt >= self._start_dt)[0][0] @@ -398,8 +405,11 @@ def _model_variance(dt: np.ndarray, a: float) -> np.ndarray: """ return a / self._n_o[diff_regime:] * dt**2 - self._popt, _ = curve_fit(_model_variance, self.dt[diff_regime:], self._v[diff_regime:]) - self._model_v = _model_variance(self.dt[diff_regime:], *self._popt) + if self._model: + self._popt, _ = curve_fit(_model_variance, self.dt[diff_regime:], self._v[diff_regime:]) + self._model_v = _model_variance(self.dt[diff_regime:], *self._popt) + else: + self._model_v = self._v[diff_regime:] self._covariance_matrix = _populate_covariance_matrix(self._model_v, self._n_o[diff_regime:]) self._npd_covariance_matrix = self._covariance_matrix return cov_nearest(self._covariance_matrix) @@ -515,6 +525,8 @@ class MSDBootstrap(Bootstrap): :param sub_sample_dt: The frequency in observations to be sampled. Default is :py:attr:`1` (every observation) :param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution. Optional, default is :py:attr:`False`. + :param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an + approximation is used to estimate the variance. Optional, default is :py:attr:`False`. :param n_resamples: The initial number of resamples to be performed. Default is :py:attr:`1000` :param max_resamples: The max number of resamples to be performed by the distribution is assumed to be normal. This is present to allow user control over the time taken for the resampling to occur. Default @@ -533,6 +545,7 @@ def __init__(self, n_o: np.ndarray, sub_sample_dt: int = 1, bootstrap: bool = False, + block: bool = False, n_resamples: int = 1000, max_resamples: int = 10000, dimension: str = 'xyz', @@ -541,6 +554,10 @@ def __init__(self, progress: bool = True): super().__init__(delta_t, disp_3d, n_o, sub_sample_dt, dimension) self._iterator = self.iterator(progress, range(len(self._displacements))) + if block: + import pyblock + print('You are using the blocking method to estimate variances, please cite ' + 'doi:10.1063/1.457480 and the pyblock pacakge.') for i in self._iterator: disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0], self._displacements[i].shape[1], self.dims) @@ -554,9 +571,22 @@ def __init__(self, self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples)) self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1)) self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1)) - self._n = np.append(self._n, d_squared.mean()) - self._v = np.append(self._v, np.var(d_squared, ddof=1) / n_o[i]) - self._s = np.append(self._s, np.sqrt(self._v[i])) + if block: + reblock = pyblock.blocking.reblock(d_squared.flatten()) + opt_block = pyblock.blocking.find_optimal_block(d_squared.flatten().size, reblock) + try: + mean = reblock[opt_block[0]].mean + var = reblock[opt_block[0]].std_err ** 2 + except TypeError: + mean = reblock[-1].mean + var = reblock[-1].std_err ** 2 + self._n = np.append(self._n, mean) + self._v = np.append(self._v, var) + self._s = np.append(self._s, np.sqrt(self._v[i])) + else: + self._n = np.append(self._n, d_squared.mean()) + self._v = np.append(self._v, np.var(d_squared, ddof=1) / n_o[i]) + self._s = np.append(self._s, np.sqrt(self._v[i])) self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared)) self._dt = np.append(self._dt, self._delta_t[i]) self._n_o = self._n_o[:self._n.size] @@ -577,6 +607,8 @@ class MSTDBootstrap(Bootstrap): is :py:attr:`1` (every observation). :param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution. Optional, default is :py:attr:`False`. + :param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an + approximation is used to estimate the variance. Optional, default is :py:attr:`False`. :param n_resamples: The initial number of resamples to be performed. Optional, default is :py:attr:`1000` :param max_resamples: The max number of resamples to be performed by the distribution is assumed to be @@ -597,6 +629,7 @@ def __init__(self, n_o: np.ndarray, sub_sample_dt: int = 1, bootstrap: bool = False, + block: bool = False, n_resamples: int = 1000, max_resamples: int = 10000, dimension: str = 'xyz', @@ -605,6 +638,10 @@ def __init__(self, progress: bool = True): super().__init__(delta_t, disp_3d, n_o, sub_sample_dt, dimension) self._iterator = self.iterator(progress, range(int(len(self._displacements) / 2))) + if block: + import pyblock + print('You are using the blocking method to estimate variances, please cite ' + 'doi:10.1063/1.457480 and the pyblock pacakge.') for i in self._iterator: disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0], self._displacements[i].shape[1], self.dims) @@ -620,9 +657,22 @@ def __init__(self, self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples)) self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1)) self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1)) - self._n = np.append(self._n, coll_motion.mean()) - self._v = np.append(self._v, np.var(coll_motion, ddof=1) / (n_o[i] / d_squared.shape[0])) - self._s = np.append(self._s, np.sqrt(self._v[i])) + if block: + reblock = pyblock.blocking.reblock(coll_motion.flatten()) + opt_block = pyblock.blocking.find_optimal_block(coll_motion.flatten().size, reblock) + try: + mean = reblock[opt_block[0]].mean + var = reblock[opt_block[0]].std_err ** 2 + except TypeError: + mean = reblock[-1].mean + var = reblock[-1].std_err ** 2 + self._n = np.append(self._n, mean) + self._v = np.append(self._v, var) + self._s = np.append(self._s, np.sqrt(self._v[i])) + else: + self._n = np.append(self._n, coll_motion.mean()) + self._v = np.append(self._v, np.var(coll_motion, ddof=1) / (n_o[i] / d_squared.shape[0])) + self._s = np.append(self._s, np.sqrt(self._v[i])) self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten())) self._dt = np.append(self._dt, self._delta_t[i]) self._n_o = self._n_o[:self._n.size] @@ -643,6 +693,8 @@ class MSCDBootstrap(Bootstrap): :param n_o: Number of statistically independent observations of the MSD at each timestep. :param bootstrap: Should bootstrap resampling be used to estimate the observed MSD distribution. Optional, default is :py:attr:`False`. + :param block: Should the blocking method be used to estimate the variance, if :py:attr:`False` an + approximation is used to estimate the variance. Optional, default is :py:attr:`False`. :param sub_sample_dt: The frequency in observations to be sampled. Optional, default is :py:attr:`1` (every observation). :param n_resamples: The initial number of resamples to be performed. Optional, default is :py:attr:`1000`. @@ -665,6 +717,7 @@ def __init__(self, n_o: np.ndarray, sub_sample_dt: int = 1, bootstrap: bool = False, + block: bool = False, n_resamples: int = 1000, max_resamples: int = 10000, dimension: str = 'xyz', @@ -677,6 +730,10 @@ def __init__(self, _ = len(ionic_charge) except TypeError: ionic_charge = np.ones(self._displacements[0].shape[0]) * ionic_charge + if block: + import pyblock + print('You are using the blocking method to estimate variances, please cite ' + 'doi:10.1063/1.457480 and the pyblock pacakge.') for i in self._iterator: disp_slice = self._displacements[i][:, :, self._slice].reshape(self._displacements[i].shape[0], self._displacements[i].shape[1], self.dims) @@ -692,9 +749,22 @@ def __init__(self, self._n_bootstrap = np.append(self._n_bootstrap, np.mean(distro.samples)) self._v_bootstrap = np.append(self._v_bootstrap, np.var(distro.samples, ddof=1)) self._s_bootstrap = np.append(self._s_bootstrap, np.std(distro.samples, ddof=1)) - self._n = np.append(self._n, sq_chg_motion.mean()) - self._v = np.append(self._v, np.var(sq_chg_motion, ddof=1) / (n_o[i] / d_squared.shape[0])) - self._s = np.append(self._s, np.sqrt(self._v[i])) + if block: + reblock = pyblock.blocking.reblock(sq_chg_motion.flatten()) + opt_block = pyblock.blocking.find_optimal_block(sq_chg_motion.flatten().size, reblock) + try: + mean = reblock[opt_block[0]].mean + var = reblock[opt_block[0]].std_err ** 2 + except TypeError: + mean = reblock[-1].mean + var = reblock[-1].std_err ** 2 + self._n = np.append(self._n, mean) + self._v = np.append(self._v, var) + self._s = np.append(self._s, np.sqrt(self._v[i])) + else: + self._n = np.append(self._n, sq_chg_motion.mean()) + self._v = np.append(self._v, np.var(sq_chg_motion, ddof=1) / (n_o[i] / d_squared.shape[0])) + self._s = np.append(self._s, np.sqrt(self._v[i])) self._ngp = np.append(self._ngp, self.ngp_calculation(d_squared.flatten())) self._dt = np.append(self._dt, self._delta_t[i]) self._n_o = self._n_o[:self._n.size] diff --git a/pyproject.toml b/pyproject.toml index 1d9408a8..1549f8fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,7 +50,8 @@ dev = [ 'pytest', 'pytest-cov', 'ase', - 'h5py' + 'h5py', + 'pyblock' ] docs = [ 'pymatgen',