From bae12a3f4c9328b2fde33139cd1597c4b4d9d876 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Thu, 7 Sep 2017 17:32:12 +0200 Subject: [PATCH 01/43] implement VAMP --- pyemma/coordinates/api.py | 8 ++ pyemma/coordinates/tests/test_vamp.py | 87 +++++++++++++ pyemma/coordinates/transform/vamp.py | 181 ++++++++++++++++++++++++++ 3 files changed, 276 insertions(+) create mode 100644 pyemma/coordinates/tests/test_vamp.py create mode 100644 pyemma/coordinates/transform/vamp.py diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index e8127c59b..f8d659562 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -51,6 +51,7 @@ 'save_trajs', 'pca', # transform 'tica', + 'vamp', 'covariance_lagged', 'cluster_regspace', # cluster 'cluster_kmeans', @@ -1255,6 +1256,13 @@ def tica(data=None, lag=10, dim=-1, var_cutoff=0.95, kinetic_map=True, commute_m return res +def vamp(data=None, lag=10, dim=None, scaling=None, right=True, + stride=1, skip=0, ncov_max=float('inf')): + from pyemma.coordinates.transform.vamp import VAMP + res = VAMP(lag, dim=dim, scaling=scaling, right=right, skip=skip, ncov_max=ncov_max) + return _param_stage(data, res, stride=stride) + + def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_mean=None, remove_data_mean=False, reversible=False, bessel=True, lag=0, weights="empirical", stride=1, skip=0, chunksize=None): """ diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py new file mode 100644 index 000000000..34c190400 --- /dev/null +++ b/pyemma/coordinates/tests/test_vamp.py @@ -0,0 +1,87 @@ +# This file is part of PyEMMA. +# +# Copyright (c) 2017 Computational Molecular Biology Group, Freie Universitaet Berlin (GER) +# +# PyEMMA is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + + +""" +@author: paul +""" + +from __future__ import absolute_import +import unittest +import numpy as np +from pyemma.coordinates import vamp as pyemma_api_vamp + +#from pyemma._ext.variational.solvers.direct import sort_by_norm +#from pyemma._ext.variational.solvers.direct import eig_corr +#from pyemma._ext.variational.util import ZeroRankError +from logging import getLogger + +logger = getLogger('pyemma.'+'TestTICA') + +def random_invertible(n, eps=0.01): + 'generate real random invertible matrix' + m = np.random.randn(n, n) + u, s, v = np.linalg.svd(m) + s = np.maximum(s, eps) + return u.dot(np.diag(s)).dot(v) + + +class TestVAMPSelfConsitency(unittest.TestCase): + @classmethod + def setUpClass(cls): + N_trajs = 3 + N_frames = 1000 + dim = 30 + A = random_invertible(dim) + trajs = [] + mean = np.random.randn(dim) + for i in range(N_trajs): + # set up data + white = np.random.randn(N_frames, dim) + brown = np.cumsum(white, axis=0) + correlated = np.dot(brown, A) + trajs.append(correlated + mean) + cls.trajs = trajs + + def test(self): + tau = 10 + vamp = pyemma_api_vamp(tau) + vamp.estimate(self.trajs) + vamp.right = True + phi = [ sf[:, tau:] for sf in vamp.get_output() ] + phi_concat = np.concatenate(phi) + mean_right = phi_concat.sum(axis=1) / phi_concat.shape[1] + cov_right = phi_concat.T.dot(phi_concat) / phi_concat.shape[1] + np.testing.assert_almost_equal(mean_right, 0.0) + np.testing.assert_almost_equal(cov_right, np.eye(vamp.dimension())) + + vamp.right = False + psi = [ sf[:, 0:-tau] for sf in vamp.get_output() ] + psi_concat = np.concatenate(psi) + mean_left = psi_concat.sum(axis=1) / psi_concat.shape[1] + cov_left = psi_concat.T.dot(psi_concat) / psi_concat.shape[1] + np.testing.assert_almost_equal(mean_left, 0.0) + np.testing.assert_almost_equal(cov_left, np.eye(vamp.dimension())) + + # compute correlation between left and right + C01_psi_phi = np.zeros((vamp.dimension(), vamp.dimension())) + N_frames = 0 + for l, r in zip(psi, phi): + C01_psi_phi += l.T.dot(r) + N_frames += r.shape[1] + np.testing.assert_almost_equal(np.diag(C01_psi_phi), vamp.singular_values) + diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py new file mode 100644 index 000000000..eaaab73fa --- /dev/null +++ b/pyemma/coordinates/transform/vamp.py @@ -0,0 +1,181 @@ +# This file is part of PyEMMA. +# +# Copyright (c) 2017 Computational Molecular Biology Group, Freie Universitaet Berlin (GER) +# +# PyEMMA is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . +''' +@author: paul, marscher, wu +''' + +from __future__ import absolute_import + +import numpy as np +from pyemma.coordinates.transform.tica import TICA +from pyemma._base.model import Model +from pyemma.util.annotators import fix_docs +from pyemma._ext.variational.solvers.direct import spd_inv_sqrt +from pyemma.coordinates.estimation.covariance import LaggedCovariance +from pyemma.coordinates.data._base.transformer import StreamingEstimationTransformer + + +__all__ = ['VAMP'] + +class VAMPModel(Model): + def set_model_params(self, mean_0, mean_t, c00, ctt, c0t): + self.mean_0 = mean_0 + self.mean_t = mean_t + self.c00 = c00 + self.ctt = ctt + self.c0t = c0t + + +# TODO: remove time scales property + +@fix_docs +class VAMP(TICA): + r"""Variational approach for Markov processes (VAMP)""" + + def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, + stride=1, skip=0, ncov_max=float('inf')): + r""" Variational approach for Markov processes (VAMP) [1]_. + + Parameters + ---------- + lag : int + lag time + dim : float or int + Number of dimensions to keep: + * if dim is not set all available ranks are kept: + n_components == min(n_samples, n_features) + * if dim is an integer >= 1, this number specifies the number + of dimensions to keep. By default this will use the kinetic + variance. + * if dim is a float with ``0 < dim < 1``, select the number + of dimensions such that the amount of kinetic variance + that needs to be explained is greater than the percentage + specified by dim. + scaling : None or string + Scaling to be applied to the VAMP modes upon transformation + * None: no scaling will be applied, variance along the mode is 1 + * 'kinetic map' or 'km': modes are scaled by singular value + right : boolean + Whether to compute the right singular functions. + epsilon : float + singular value cutoff. Singular values of C0 with norms <= epsilon + will be cut off. The remaining number of singular values define + the size of the output. + stride: int, optional, default = 1 + Use only every stride-th time step. By default, every time step is used. + skip : int, default=0 + skip the first initial n frames per trajectory. + + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + """ + StreamingEstimationTransformer.__init__(self) + + self._covar = LaggedCovariance(c00=True, c0t=True, ctt=True, remove_data_mean=True, reversible=False, + lag=lag, bessel=False, stride=stride, skip=skip, weights=None, ncov_max=ncov_max) + + # empty dummy model instance + self._model = VAMPModel() # left/right? + self.set_params(lag=lag, dim=dim, scaling=scaling, right=right, + epsilon=epsilon, stride=stride, skip=skip, ncov_max=ncov_max) + + def _diagonalize(self): + # diagonalize with low rank approximation + self._logger.debug("diagonalize covariance matrices") + + mean0 = self._covar.mean + mean1 = self._covar.mean_tau + L0 = spd_inv_sqrt(self._covar.C00_) + L1 = spd_inv_sqrt(self._covar.Ctt_) + A = L0.T.dot(self._covar.C0t_).dot(L1) + + U, s, Vh = np.linalg.svd(A, compute_uv=True) + + # compute cumulative variance + cumvar = np.cumsum(s**2) + cumvar /= cumvar[-1] + + if self.dim is None: + m = np.count_nonzero(s > self.epsilon) + if isinstance(self.dim, float): + m = np.count_nonzero(cumvar >= self.dim) + else: + m = min(np.min(np.count_nonzero(s > self.epsilon)), self.dim) + singular_vectors_left = L0.dot(U[:, :m]) + singular_vectors_right = L1.dot(Vh[:m, :].T) + singular_values = s[:m] + + # remove residual contributions of the constant function + singular_vectors_left -= singular_vectors_left*mean0.dot(singular_vectors_left)[np.newaxis, :] + singular_vectors_right -= singular_vectors_right*mean1.dot(singular_vectors_right)[np.newaxis, :] + + # normalize vectors + scale_left = np.diag(singular_vectors_left.T.dot(np.diag(mean0)).dot(singular_vectors_left))**-0.5 + scale_right = np.diag(singular_vectors_right.T.dot(np.diag(mean1)).dot(singular_vectors_right))**-0.5 + singular_vectors_left *= scale_left[np.newaxis, :] + singular_vectors_right *= scale_right[np.newaxis, :] + + # scale vectors + if self.scaling is None: + pass + elif self.scaling in ['km', 'kinetic map']: + singular_vectors_left *= singular_values[np.newaxis, :]**2 ## TODO: check left/right + singular_vectors_right *= singular_values[np.newaxis, :] ** 2 ## TODO: check left/right + else: + raise ValueError('unexpected value (%s) of "scaling"'%self.scaling) + + self._logger.debug("finished diagonalisation.") + + self._model.update_model_params(cumvar=cumvar, + singular_values=singular_values, + singular_vectors_right=singular_vectors_right, + singular_vectors_left=singular_vectors_left) + + self._estimated = True + + + def _transform_array(self, X): # TODO: are these still called ics? + r"""Projects the data onto the dominant independent components. + + Parameters + ---------- + X : ndarray(n, m) + the input data + + Returns + ------- + Y : ndarray(n,) + the projected data + """ + # TODO: in principle get_output should not return data for *all* frames! + if self.right: + X_meanfree = X - self.mean + Y = np.dot(X_meanfree, self.right_singular_vectors[:, 0:self.dimension()]) + else: + X_meanfree = X - self.mean_tau + Y = np.dot(X_meanfree, self.left_singular_vectors[:, 0:self.dimension()]) + + return Y.astype(self.output_type()) + + + def output_type(self): + return StreamingEstimationTransformer.output_type(self) From e1adf9416201a9debe4036f1f1ceb83caf6f962a Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Thu, 7 Sep 2017 19:12:59 +0200 Subject: [PATCH 02/43] w.i.p --- pyemma/coordinates/tests/test_vamp.py | 37 +++--- pyemma/coordinates/transform/vamp.py | 171 ++++++++++++++++++++------ 2 files changed, 151 insertions(+), 57 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 34c190400..9d1cdc1fa 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -59,29 +59,26 @@ def setUpClass(cls): def test(self): tau = 10 - vamp = pyemma_api_vamp(tau) - vamp.estimate(self.trajs) + vamp = pyemma_api_vamp(self.trajs, lag=tau) vamp.right = True - phi = [ sf[:, tau:] for sf in vamp.get_output() ] - phi_concat = np.concatenate(phi) - mean_right = phi_concat.sum(axis=1) / phi_concat.shape[1] - cov_right = phi_concat.T.dot(phi_concat) / phi_concat.shape[1] - np.testing.assert_almost_equal(mean_right, 0.0) - np.testing.assert_almost_equal(cov_right, np.eye(vamp.dimension())) + atol = np.finfo(vamp.output_type()).eps*10.0 + phi = [ sf[tau:, :] for sf in vamp.get_output() ] + phi = np.concatenate(phi) + mean_right = phi.sum(axis=0) / phi.shape[0] + cov_right = phi.T.dot(phi) / phi.shape[0] + np.testing.assert_allclose(mean_right, 0.0, atol=atol) + np.testing.assert_allclose(cov_right, np.eye(vamp.dimension()), atol=atol) vamp.right = False - psi = [ sf[:, 0:-tau] for sf in vamp.get_output() ] - psi_concat = np.concatenate(psi) - mean_left = psi_concat.sum(axis=1) / psi_concat.shape[1] - cov_left = psi_concat.T.dot(psi_concat) / psi_concat.shape[1] - np.testing.assert_almost_equal(mean_left, 0.0) - np.testing.assert_almost_equal(cov_left, np.eye(vamp.dimension())) + psi = [ sf[0:-tau, :] for sf in vamp.get_output() ] + psi = np.concatenate(psi) + mean_left = psi.sum(axis=0) / psi.shape[0] + cov_left = psi.T.dot(psi) / psi.shape[0] + np.testing.assert_allclose(mean_left, 0.0, atol=atol) + np.testing.assert_allclose(cov_left, np.eye(vamp.dimension()), atol=atol) # compute correlation between left and right - C01_psi_phi = np.zeros((vamp.dimension(), vamp.dimension())) - N_frames = 0 - for l, r in zip(psi, phi): - C01_psi_phi += l.T.dot(r) - N_frames += r.shape[1] - np.testing.assert_almost_equal(np.diag(C01_psi_phi), vamp.singular_values) + assert phi.shape[0]==psi.shape[0] + C01_psi_phi = psi.T.dot(phi) / phi.shape[0] + np.testing.assert_allclose(np.diag(C01_psi_phi), vamp.singular_values, atol=atol) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index eaaab73fa..9485b05db 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -21,31 +21,44 @@ from __future__ import absolute_import import numpy as np -from pyemma.coordinates.transform.tica import TICA +from decorator import decorator +import sys +# from pyemma.coordinates.transform.tica import TICA from pyemma._base.model import Model from pyemma.util.annotators import fix_docs from pyemma._ext.variational.solvers.direct import spd_inv_sqrt from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.data._base.transformer import StreamingEstimationTransformer - +import warnings __all__ = ['VAMP'] + class VAMPModel(Model): - def set_model_params(self, mean_0, mean_t, c00, ctt, c0t): + def set_model_params(self, mean_0, mean_t, C00, Ctt, C0t): self.mean_0 = mean_0 self.mean_t = mean_t - self.c00 = c00 - self.ctt = ctt - self.c0t = c0t + self.C00 = C00 + self.Ctt = Ctt + self.C0t = C0t -# TODO: remove time scales property +@decorator +def _lazy_estimation(func, *args, **kw): + assert isinstance(args[0], VAMP) + tica_obj = args[0] + if not tica_obj._estimated: + tica_obj._diagonalize() + return func(*args, **kw) + @fix_docs -class VAMP(TICA): +class VAMP(StreamingEstimationTransformer): r"""Variational approach for Markov processes (VAMP)""" + def describe(self): + return "[VAMP, lag = %i; max. output dim. = %s]" % (self._lag, str(self.dim)) + def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, stride=1, skip=0, ncov_max=float('inf')): r""" Variational approach for Markov processes (VAMP) [1]_. @@ -94,9 +107,31 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, lag=lag, bessel=False, stride=stride, skip=skip, weights=None, ncov_max=ncov_max) # empty dummy model instance - self._model = VAMPModel() # left/right? + self._model = VAMPModel() # TODO: left/right? self.set_params(lag=lag, dim=dim, scaling=scaling, right=right, - epsilon=epsilon, stride=stride, skip=skip, ncov_max=ncov_max) + epsilon=epsilon, stride=stride, skip=skip, ncov_max=ncov_max) + + def _estimate(self, iterable, **kw): + indim = iterable.dimension() + + if isinstance(self.dim, int): + if not self.dim <= indim: + raise RuntimeError("requested more output dimensions (%i) than dimension" + " of input data (%i)" % (self.dim, indim)) + + if self._logger_is_active(self._loglevel_DEBUG): + self._logger.debug("Running VAMP with tau=%i; Estimating two covariance matrices" + " with dimension (%i, %i)" % (self._lag, indim, indim)) + + self._covar.estimate(iterable, **kw) + self._model.update_model_params(mean_0=self._covar.mean, + mean_t=self._covar.mean_tau, + C00=self._covar.C00_, + C0t=self._covar.C0t_, + Ctt=self._covar.Ctt_) + self._diagonalize() + + return self._model def _diagonalize(self): # diagonalize with low rank approximation @@ -111,50 +146,80 @@ def _diagonalize(self): U, s, Vh = np.linalg.svd(A, compute_uv=True) # compute cumulative variance - cumvar = np.cumsum(s**2) + cumvar = np.cumsum(s ** 2) cumvar /= cumvar[-1] - if self.dim is None: - m = np.count_nonzero(s > self.epsilon) - if isinstance(self.dim, float): - m = np.count_nonzero(cumvar >= self.dim) - else: - m = min(np.min(np.count_nonzero(s > self.epsilon)), self.dim) + self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean0, mean_t=mean1) + + # if self.dim is None: + # m = np.count_nonzero(s > self.epsilon) + # if isinstance(self.dim, float): + # m = np.count_nonzero(cumvar >= self.dim) + # else: + # m = min(np.min(np.count_nonzero(s > self.epsilon)), self.dim) + m = self.dimension(_estimating=True) + print(self.dim, m, file=sys.stderr) + singular_vectors_left = L0.dot(U[:, :m]) singular_vectors_right = L1.dot(Vh[:m, :].T) - singular_values = s[:m] # remove residual contributions of the constant function - singular_vectors_left -= singular_vectors_left*mean0.dot(singular_vectors_left)[np.newaxis, :] - singular_vectors_right -= singular_vectors_right*mean1.dot(singular_vectors_right)[np.newaxis, :] + singular_vectors_left -= singular_vectors_left * mean0.dot(singular_vectors_left)[np.newaxis, :] + singular_vectors_right -= singular_vectors_right * mean1.dot(singular_vectors_right)[np.newaxis, :] # normalize vectors - scale_left = np.diag(singular_vectors_left.T.dot(np.diag(mean0)).dot(singular_vectors_left))**-0.5 - scale_right = np.diag(singular_vectors_right.T.dot(np.diag(mean1)).dot(singular_vectors_right))**-0.5 - singular_vectors_left *= scale_left[np.newaxis, :] - singular_vectors_right *= scale_right[np.newaxis, :] + # TODO: fix me! + scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) + #print('scale left', scale_left, scale_left**0.5, file=sys.stderr) + scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) + #print('scale right', scale_right, scale_right**0.5, file=sys.stderr) + singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 + singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 # scale vectors if self.scaling is None: pass elif self.scaling in ['km', 'kinetic map']: - singular_vectors_left *= singular_values[np.newaxis, :]**2 ## TODO: check left/right - singular_vectors_right *= singular_values[np.newaxis, :] ** 2 ## TODO: check left/right + singular_vectors_left *= self.singular_values[np.newaxis, :] ** 2 ## TODO: check left/right + singular_vectors_right *= self.singular_values[np.newaxis, :] ** 2 ## TODO: check left/right else: - raise ValueError('unexpected value (%s) of "scaling"'%self.scaling) + raise ValueError('unexpected value (%s) of "scaling"' % self.scaling) self._logger.debug("finished diagonalisation.") - self._model.update_model_params(cumvar=cumvar, - singular_values=singular_values, - singular_vectors_right=singular_vectors_right, + self._model.update_model_params(singular_vectors_right=singular_vectors_right, singular_vectors_left=singular_vectors_left) self._estimated = True - def _transform_array(self, X): # TODO: are these still called ics? - r"""Projects the data onto the dominant independent components. + def dimension(self, _estimating=False): + """ output dimension """ + if self.dim is None or self.dim == 1.0: + if self._estimated or _estimating: + return np.count_nonzero(self.singular_values > self.epsilon) + else: + warnings.warn( + RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' + 'transformer has not yet been estimated. Result is only an approximation.')) + return self.data_producer.dimension() + if isinstance(self.dim, float): + if self._estimated or _estimating: + return np.count_nonzero(self.cumvar >= self.dim) + else: + raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' + 'transformer has not yet been estimated. Call estimate() before.') + else: + if self._estimated or _estimating: + return min(np.min(np.count_nonzero(self.singular_values > self.epsilon)), self.dim) + else: + warnings.warn( + RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' + 'transformer has not yet been estimated. Result is only an approximation.')) + return self.dim + + def _transform_array(self, X): + r"""Projects the data onto the dominant singular functions. Parameters ---------- @@ -168,14 +233,46 @@ def _transform_array(self, X): # TODO: are these still called ics? """ # TODO: in principle get_output should not return data for *all* frames! if self.right: - X_meanfree = X - self.mean - Y = np.dot(X_meanfree, self.right_singular_vectors[:, 0:self.dimension()]) + X_meanfree = X - self._model.mean_t + Y = np.dot(X_meanfree, self._model.singular_vectors_right[:, 0:self.dimension()]) else: - X_meanfree = X - self.mean_tau - Y = np.dot(X_meanfree, self.left_singular_vectors[:, 0:self.dimension()]) + X_meanfree = X - self._model.mean_0 + Y = np.dot(X_meanfree, self._model.singular_vectors_left[:, 0:self.dimension()]) return Y.astype(self.output_type()) - def output_type(self): return StreamingEstimationTransformer.output_type(self) + + @property + # @_lazy_estimation + def singular_values(self): + r"""Singular values of VAMP (usually denoted :math:`\sigma`) + + Returns + ------- + singular values: 1-D np.array + """ + return self._model.singular_values + + @property + # @_lazy_estimation + def singular_vectors_right(self): + r"""Right singular vectors of the VAMP problem, columnwise + + Returns + ------- + eigenvectors: 2-D ndarray + """ + return self._model.singular_vectors_right + + @property + # @_lazy_estimation + def cumvar(self): + r"""Cumulative sum of the squared and normalized VAMP singular values + + Returns + ------- + cumvar: 1D np.array + """ + return self._model.cumvar From 23a2e35627e3fb5524b62306aa848525e26f4934 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 12 Sep 2017 18:10:16 +0200 Subject: [PATCH 03/43] w.i.p --- pyemma/coordinates/tests/test_vamp.py | 46 ++++++++++++++++++--------- pyemma/coordinates/transform/vamp.py | 11 ++----- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 9d1cdc1fa..64205876f 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -30,37 +30,50 @@ #from pyemma._ext.variational.util import ZeroRankError from logging import getLogger -logger = getLogger('pyemma.'+'TestTICA') +logger = getLogger('pyemma.'+'TestVAMP') -def random_invertible(n, eps=0.01): - 'generate real random invertible matrix' + +def random_matrix(n, rank=None, eps=0.01): m = np.random.randn(n, n) u, s, v = np.linalg.svd(m) - s = np.maximum(s, eps) + if rank is None: + rank = n + if rank > n: + rank = n + s = np.concatenate((np.maximum(s, eps)[0:rank], np.zeros(n-rank))) return u.dot(np.diag(s)).dot(v) class TestVAMPSelfConsitency(unittest.TestCase): - @classmethod - def setUpClass(cls): - N_trajs = 3 - N_frames = 1000 + def test_full_rank(self): + self.do_test(20, 20) + + def test_low_rank(self): dim = 30 - A = random_invertible(dim) + rank = 15 + self.do_test(dim, rank) + + def do_test(self, dim, rank): + # setup + N_frames = [123, 456, 789] + N_trajs = len(N_frames) + A = random_matrix(dim, rank) trajs = [] mean = np.random.randn(dim) for i in range(N_trajs): # set up data - white = np.random.randn(N_frames, dim) + white = np.random.randn(N_frames[i], dim) brown = np.cumsum(white, axis=0) correlated = np.dot(brown, A) trajs.append(correlated + mean) - cls.trajs = trajs - def test(self): - tau = 10 - vamp = pyemma_api_vamp(self.trajs, lag=tau) + # test + tau = 50 + vamp = pyemma_api_vamp(trajs, lag=tau, scaling=None) vamp.right = True + + assert vamp.dimension() <= rank + atol = np.finfo(vamp.output_type()).eps*10.0 phi = [ sf[tau:, :] for sf in vamp.get_output() ] phi = np.concatenate(phi) @@ -80,5 +93,8 @@ def test(self): # compute correlation between left and right assert phi.shape[0]==psi.shape[0] C01_psi_phi = psi.T.dot(phi) / phi.shape[0] - np.testing.assert_allclose(np.diag(C01_psi_phi), vamp.singular_values, atol=atol) + n = max(C01_psi_phi.shape) + C01_psi_phi = C01_psi_phi[0:n,:][:, 0:n] + np.testing.assert_allclose(np.diag(C01_psi_phi), vamp.singular_values[0:vamp.dimension()], atol=atol) + diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 9485b05db..505844e02 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -151,28 +151,21 @@ def _diagonalize(self): self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean0, mean_t=mean1) - # if self.dim is None: + # if self.dim is None: # TODO: fix me! # m = np.count_nonzero(s > self.epsilon) # if isinstance(self.dim, float): # m = np.count_nonzero(cumvar >= self.dim) # else: # m = min(np.min(np.count_nonzero(s > self.epsilon)), self.dim) m = self.dimension(_estimating=True) - print(self.dim, m, file=sys.stderr) + #print(self.dim, m, file=sys.stderr) singular_vectors_left = L0.dot(U[:, :m]) singular_vectors_right = L1.dot(Vh[:m, :].T) - # remove residual contributions of the constant function - singular_vectors_left -= singular_vectors_left * mean0.dot(singular_vectors_left)[np.newaxis, :] - singular_vectors_right -= singular_vectors_right * mean1.dot(singular_vectors_right)[np.newaxis, :] - # normalize vectors - # TODO: fix me! scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) - #print('scale left', scale_left, scale_left**0.5, file=sys.stderr) scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) - #print('scale right', scale_right, scale_right**0.5, file=sys.stderr) singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 From 83ed0639e7bfe21519f08c7269e0ea8823258b76 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 12 Sep 2017 19:03:47 +0200 Subject: [PATCH 04/43] w.i.p --- pyemma/coordinates/tests/test_vamp.py | 37 +++++++++++++---- pyemma/coordinates/transform/vamp.py | 58 +++++++++++++++++++-------- 2 files changed, 72 insertions(+), 23 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 64205876f..1c931ac9d 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -46,14 +46,14 @@ def random_matrix(n, rank=None, eps=0.01): class TestVAMPSelfConsitency(unittest.TestCase): def test_full_rank(self): - self.do_test(20, 20) + self.do_test(20, 20, test_partial_fit=False) def test_low_rank(self): dim = 30 rank = 15 - self.do_test(dim, rank) + self.do_test(dim, rank, test_partial_fit=True) - def do_test(self, dim, rank): + def do_test(self, dim, rank, test_partial_fit=False): # setup N_frames = [123, 456, 789] N_trajs = len(N_frames) @@ -75,16 +75,16 @@ def do_test(self, dim, rank): assert vamp.dimension() <= rank atol = np.finfo(vamp.output_type()).eps*10.0 - phi = [ sf[tau:, :] for sf in vamp.get_output() ] - phi = np.concatenate(phi) + phi_trajs = [ sf[tau:, :] for sf in vamp.get_output() ] + phi = np.concatenate(phi_trajs) mean_right = phi.sum(axis=0) / phi.shape[0] cov_right = phi.T.dot(phi) / phi.shape[0] np.testing.assert_allclose(mean_right, 0.0, atol=atol) np.testing.assert_allclose(cov_right, np.eye(vamp.dimension()), atol=atol) vamp.right = False - psi = [ sf[0:-tau, :] for sf in vamp.get_output() ] - psi = np.concatenate(psi) + psi_trajs = [ sf[0:-tau, :] for sf in vamp.get_output() ] + psi = np.concatenate(psi_trajs) mean_left = psi.sum(axis=0) / psi.shape[0] cov_left = psi.T.dot(psi) / psi.shape[0] np.testing.assert_allclose(mean_left, 0.0, atol=atol) @@ -97,4 +97,27 @@ def do_test(self, dim, rank): C01_psi_phi = C01_psi_phi[0:n,:][:, 0:n] np.testing.assert_allclose(np.diag(C01_psi_phi), vamp.singular_values[0:vamp.dimension()], atol=atol) + if test_partial_fit: + vamp2 = pyemma_api_vamp(lag=tau, scaling=None) + for t in trajs: + vamp2.partial_fit(t) + + model_params = vamp._model.get_model_params() + model_params2 = vamp2._model.get_model_params() + + for n in model_params.keys(): + np.testing.assert_allclose(model_params[n], model_params2[n]) + + vamp2.singular_values # trigger diagonalization + + vamp2.right = True + for t, ref in zip(trajs, phi_trajs): + np.testing.assert_allclose(vamp2.transform(t[tau:]), ref) + + vamp2.right = False + for t, ref in zip(trajs, psi_trajs): + np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref) + + + diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 505844e02..adb1b6d3c 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -35,7 +35,7 @@ class VAMPModel(Model): - def set_model_params(self, mean_0, mean_t, C00, Ctt, C0t): + def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t): self.mean_0 = mean_0 self.mean_t = mean_t self.C00 = C00 @@ -107,7 +107,7 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, lag=lag, bessel=False, stride=stride, skip=skip, weights=None, ncov_max=ncov_max) # empty dummy model instance - self._model = VAMPModel() # TODO: left/right? + self._model = VAMPModel() self.set_params(lag=lag, dim=dim, scaling=scaling, right=right, epsilon=epsilon, stride=stride, skip=skip, ncov_max=ncov_max) @@ -133,6 +133,39 @@ def _estimate(self, iterable, **kw): return self._model + def partial_fit(self, X): + """ incrementally update the covariances and mean. + + Parameters + ---------- + X: array, list of arrays, PyEMMA reader + input data. + + Notes + ----- + The projection matrix is first being calculated upon its first access. + """ + from pyemma.coordinates import source + iterable = source(X) + + if isinstance(self.dim, int): + indim = iterable.dimension() + if not self.dim <= indim: + raise RuntimeError("requested more output dimensions (%i) than dimension" + " of input data (%i)" % (self.dim, indim)) + + self._covar.partial_fit(iterable) + self._model.update_model_params(mean_0=self._covar.mean, # TODO: inefficient, fixme + mean_t=self._covar.mean_tau, + C00=self._covar.C00_, + C0t=self._covar.C0t_, + Ctt=self._covar.Ctt_) + + #self._used_data = self._covar._used_data + self._estimated = False + + return self + def _diagonalize(self): # diagonalize with low rank approximation self._logger.debug("diagonalize covariance matrices") @@ -151,14 +184,7 @@ def _diagonalize(self): self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean0, mean_t=mean1) - # if self.dim is None: # TODO: fix me! - # m = np.count_nonzero(s > self.epsilon) - # if isinstance(self.dim, float): - # m = np.count_nonzero(cumvar >= self.dim) - # else: - # m = min(np.min(np.count_nonzero(s > self.epsilon)), self.dim) m = self.dimension(_estimating=True) - #print(self.dim, m, file=sys.stderr) singular_vectors_left = L0.dot(U[:, :m]) singular_vectors_right = L1.dot(Vh[:m, :].T) @@ -190,7 +216,7 @@ def dimension(self, _estimating=False): """ output dimension """ if self.dim is None or self.dim == 1.0: if self._estimated or _estimating: - return np.count_nonzero(self.singular_values > self.epsilon) + return np.count_nonzero(self._model.singular_values > self.epsilon) else: warnings.warn( RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' @@ -198,13 +224,13 @@ def dimension(self, _estimating=False): return self.data_producer.dimension() if isinstance(self.dim, float): if self._estimated or _estimating: - return np.count_nonzero(self.cumvar >= self.dim) + return np.count_nonzero(self._model.cumvar >= self.dim) else: raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' 'transformer has not yet been estimated. Call estimate() before.') else: if self._estimated or _estimating: - return min(np.min(np.count_nonzero(self.singular_values > self.epsilon)), self.dim) + return min(np.min(np.count_nonzero(self._model.singular_values > self.epsilon)), self.dim) else: warnings.warn( RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' @@ -224,7 +250,7 @@ def _transform_array(self, X): Y : ndarray(n,) the projected data """ - # TODO: in principle get_output should not return data for *all* frames! + # TODO: in principle get_output should not return data for *all* frames! Think about this. if self.right: X_meanfree = X - self._model.mean_t Y = np.dot(X_meanfree, self._model.singular_vectors_right[:, 0:self.dimension()]) @@ -238,7 +264,7 @@ def output_type(self): return StreamingEstimationTransformer.output_type(self) @property - # @_lazy_estimation + @_lazy_estimation def singular_values(self): r"""Singular values of VAMP (usually denoted :math:`\sigma`) @@ -249,7 +275,7 @@ def singular_values(self): return self._model.singular_values @property - # @_lazy_estimation + @_lazy_estimation def singular_vectors_right(self): r"""Right singular vectors of the VAMP problem, columnwise @@ -260,7 +286,7 @@ def singular_vectors_right(self): return self._model.singular_vectors_right @property - # @_lazy_estimation + @_lazy_estimation def cumvar(self): r"""Cumulative sum of the squared and normalized VAMP singular values From 6f98bf8ef8081c62445394df7142cd067b195a66 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 12 Sep 2017 19:24:33 +0200 Subject: [PATCH 05/43] [vamp] update docs, names --- pyemma/_ext/variational/solvers/direct.py | 1 + pyemma/coordinates/transform/vamp.py | 36 ++++++++++++++++++----- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/pyemma/_ext/variational/solvers/direct.py b/pyemma/_ext/variational/solvers/direct.py index db442aedf..51e8df1ef 100644 --- a/pyemma/_ext/variational/solvers/direct.py +++ b/pyemma/_ext/variational/solvers/direct.py @@ -272,3 +272,4 @@ def eig_corr(C0, Ct, epsilon=1e-10, method='QR', sign_maxelement=False): # return result return l, R + diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index adb1b6d3c..496502f41 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -84,6 +84,15 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, * 'kinetic map' or 'km': modes are scaled by singular value right : boolean Whether to compute the right singular functions. + If right==True, get_output() will return the right singular + functions. Otherwise, get_output() will return the left singular + functions. + Beware that only frames[tau:, :] of each trajectory returned + by get_output() contain valid values of the right singular + functions. Conversely, only frames[0:-tau, :] of each + trajectory returned by get_output() contain valid values of + the left singular functions. The remaining frames might + possibly be interpreted as some extrapolation. epsilon : float singular value cutoff. Singular values of C0 with norms <= epsilon will be cut off. The remaining number of singular values define @@ -170,11 +179,11 @@ def _diagonalize(self): # diagonalize with low rank approximation self._logger.debug("diagonalize covariance matrices") - mean0 = self._covar.mean - mean1 = self._covar.mean_tau + mean_0 = self._covar.mean + mean_t = self._covar.mean_tau L0 = spd_inv_sqrt(self._covar.C00_) - L1 = spd_inv_sqrt(self._covar.Ctt_) - A = L0.T.dot(self._covar.C0t_).dot(L1) + Lt = spd_inv_sqrt(self._covar.Ctt_) + A = L0.T.dot(self._covar.C0t_).dot(Lt) U, s, Vh = np.linalg.svd(A, compute_uv=True) @@ -182,12 +191,12 @@ def _diagonalize(self): cumvar = np.cumsum(s ** 2) cumvar /= cumvar[-1] - self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean0, mean_t=mean1) + self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean_0, mean_t=mean_t) m = self.dimension(_estimating=True) singular_vectors_left = L0.dot(U[:, :m]) - singular_vectors_right = L1.dot(Vh[:m, :].T) + singular_vectors_right = Lt.dot(Vh[:m, :].T) # normalize vectors scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) @@ -199,8 +208,8 @@ def _diagonalize(self): if self.scaling is None: pass elif self.scaling in ['km', 'kinetic map']: - singular_vectors_left *= self.singular_values[np.newaxis, :] ** 2 ## TODO: check left/right - singular_vectors_right *= self.singular_values[np.newaxis, :] ** 2 ## TODO: check left/right + singular_vectors_left *= self.singular_values[np.newaxis, :] ## TODO: check left/right + singular_vectors_right *= self.singular_values[np.newaxis, :] ## TODO: check left/right else: raise ValueError('unexpected value (%s) of "scaling"' % self.scaling) @@ -285,6 +294,17 @@ def singular_vectors_right(self): """ return self._model.singular_vectors_right + @property + @_lazy_estimation + def singular_vectors_left(self): + r"""Left singular vectors of the VAMP problem, columnwise + + Returns + ------- + eigenvectors: 2-D ndarray + """ + return self._model.singular_vectors_left + @property @_lazy_estimation def cumvar(self): From 61b1f145ca764334a8002ddb518bc672a24b32aa Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 26 Sep 2017 18:41:00 +0200 Subject: [PATCH 06/43] [vamp] implement CK test --- pyemma/coordinates/tests/test_vamp.py | 95 ++++++++++++-- pyemma/coordinates/transform/vamp.py | 172 ++++++++++++++++++++++---- 2 files changed, 238 insertions(+), 29 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 1c931ac9d..0e34ee5f0 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -24,10 +24,7 @@ import unittest import numpy as np from pyemma.coordinates import vamp as pyemma_api_vamp - -#from pyemma._ext.variational.solvers.direct import sort_by_norm -#from pyemma._ext.variational.solvers.direct import eig_corr -#from pyemma._ext.variational.util import ZeroRankError +from pyemma.msm import estimate_markov_model from logging import getLogger logger = getLogger('pyemma.'+'TestVAMP') @@ -44,9 +41,9 @@ def random_matrix(n, rank=None, eps=0.01): return u.dot(np.diag(s)).dot(v) -class TestVAMPSelfConsitency(unittest.TestCase): +class TestVAMPSelfConsistency(unittest.TestCase): def test_full_rank(self): - self.do_test(20, 20, test_partial_fit=False) + self.do_test(20, 20, test_partial_fit=True) def test_low_rank(self): dim = 30 @@ -119,5 +116,87 @@ def do_test(self, dim, rank, test_partial_fit=False): np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref) - - +def generate(T, N_steps, s0=0): + dtraj = np.zeros(N_steps, dtype=int) + s = s0 + T_cdf = T.cumsum(axis=1) + for t in range(N_steps): + dtraj[t] = s + s = np.searchsorted(T_cdf[s, :], np.random.rand()) + return dtraj + + +class TestVAMPCKTest(unittest.TestCase): + @classmethod + def setUpClass(cls): + N_steps = 10000 + N_traj = 2 + T = np.linalg.matrix_power(np.array([[0.7, 0.3, 0.0], [0.1, 0.8, 0.1], [0.0, 0.2, 0.8]]), 1) + dtrajs = [generate(T, N_steps) for _ in range(N_traj)] + p0 = np.zeros(3) + trajs = [] + lag = 1 + for dtraj in dtrajs: + traj = np.zeros((N_steps, T.shape[0])) + traj[np.arange(len(dtraj)), dtraj] = 1.0 + trajs.append(traj) + p0 += traj[:-lag, :].sum(axis=0) + vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None) + msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) + cls.dtrajs = dtrajs + cls.lag = lag + cls.msm = msm + cls.vamp = vamp + cls.p0 = p0 / p0.sum() + + def test_K_is_T(self): + m0 = self.vamp.model.mean_0 + mt = self.vamp.model.mean_t + C0 = self.vamp.model.C00 + m0[:, np.newaxis]*m0[np.newaxis, :] + C1 = self.vamp.model.C0t + m0[:, np.newaxis]*mt[np.newaxis, :] + K = np.linalg.inv(C0).dot(C1) + np.testing.assert_allclose(K, self.msm.P, atol=1E-5) + + def test_CK_covariances_against_MSM(self): + obs = np.eye(3) # observe every state + sta = np.eye(3) # restrict p0 to every state + pred, est = self.vamp.cktest(observables=obs, statistics=sta, mlags=4) + + atol = np.finfo(self.vamp.output_type()).eps*1000.0 + + #import sys + for i in range(len(pred)): + msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) + msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs) + msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) + np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=atol) + np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=atol) + np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) + #print('pred(s)', i, np.diag(pred[i]), file=sys.stderr) + #print('predMSM!', i, np.diag(msm_pred), file=sys.stderr) + #print('esti(s)', i, np.diag(est[i]), file=sys.stderr) + #print('estiMSM!', i, np.diag(msm_esti), file=sys.stderr) + + def test_CK_expectation_against_MSM(self): + obs = np.eye(3) # observe every state + pred, est = self.vamp.cktest(observables=obs, statistics=None, mlags=4) + atol = np.finfo(self.vamp.output_type()).eps*1000.0 + + for i in range(len(pred)): + msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) + msm_esti = self.p0.T.dot(msm.P).dot(obs) + msm_pred = self.p0.T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) + np.testing.assert_allclose(pred[i], msm_pred, atol=atol) + np.testing.assert_allclose(est[i], msm_esti, atol=atol) + np.testing.assert_allclose(est[i], pred[i], atol=0.006) + + def test_CK_covariances_of_singular_functions(self): + #from pyemma import config + #config.show_progress_bars = False + pred,est = self.vamp.cktest(n_observables=2, mlags=4) # auto + error = np.max(np.abs(np.array(pred) - np.array(est))) / max(np.max(pred), np.max(est)) + assert error < 0.05 + + +if __name__ == "__main__": + unittest.main() diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 496502f41..6c24513cc 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -15,17 +15,16 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . ''' -@author: paul, marscher, wu +@author: paul, marscher, wu, noe ''' from __future__ import absolute_import import numpy as np from decorator import decorator -import sys -# from pyemma.coordinates.transform.tica import TICA from pyemma._base.model import Model from pyemma.util.annotators import fix_docs +from pyemma.util.types import ensure_ndarray_or_None, ensure_ndarray from pyemma._ext.variational.solvers.direct import spd_inv_sqrt from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.data._base.transformer import StreamingEstimationTransformer @@ -35,6 +34,7 @@ class VAMPModel(Model): + # TODO: remove dummy when bugfix from Martin is committed def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t): self.mean_0 = mean_0 self.mean_t = mean_t @@ -185,38 +185,39 @@ def _diagonalize(self): Lt = spd_inv_sqrt(self._covar.Ctt_) A = L0.T.dot(self._covar.C0t_).dot(Lt) - U, s, Vh = np.linalg.svd(A, compute_uv=True) + Uprime, s, Vprimeh = np.linalg.svd(A, compute_uv=True) # compute cumulative variance cumvar = np.cumsum(s ** 2) cumvar /= cumvar[-1] + self._L0 = L0 + self._Lt = Lt self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean_0, mean_t=mean_t) m = self.dimension(_estimating=True) - singular_vectors_left = L0.dot(U[:, :m]) - singular_vectors_right = Lt.dot(Vh[:m, :].T) + U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left + V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right # normalize vectors - scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) - scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) - singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 - singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 + #scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) + #scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) + #singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 + #singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 # scale vectors if self.scaling is None: pass elif self.scaling in ['km', 'kinetic map']: - singular_vectors_left *= self.singular_values[np.newaxis, :] ## TODO: check left/right - singular_vectors_right *= self.singular_values[np.newaxis, :] ## TODO: check left/right + U *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao + V *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao else: raise ValueError('unexpected value (%s) of "scaling"' % self.scaling) self._logger.debug("finished diagonalisation.") - self._model.update_model_params(singular_vectors_right=singular_vectors_right, - singular_vectors_left=singular_vectors_left) + self._model.update_model_params(U=U, V=V) self._estimated = True @@ -259,13 +260,14 @@ def _transform_array(self, X): Y : ndarray(n,) the projected data """ - # TODO: in principle get_output should not return data for *all* frames! Think about this. + # TODO: in principle get_output should not return data for *all* frames! + # TODO: implement our own iterators? This would also include random access to be complete... if self.right: X_meanfree = X - self._model.mean_t - Y = np.dot(X_meanfree, self._model.singular_vectors_right[:, 0:self.dimension()]) + Y = np.dot(X_meanfree, self._model.V[:, 0:self.dimension()]) else: X_meanfree = X - self._model.mean_0 - Y = np.dot(X_meanfree, self._model.singular_vectors_left[:, 0:self.dimension()]) + Y = np.dot(X_meanfree, self._model.U[:, 0:self.dimension()]) return Y.astype(self.output_type()) @@ -286,24 +288,28 @@ def singular_values(self): @property @_lazy_estimation def singular_vectors_right(self): - r"""Right singular vectors of the VAMP problem, columnwise + r"""Right singular vectors V of the VAMP problem, columnwise Returns ------- eigenvectors: 2-D ndarray + Coefficients that express the right singular functions in the + basis of mean-free input features. """ - return self._model.singular_vectors_right + return self._model.V @property @_lazy_estimation def singular_vectors_left(self): - r"""Left singular vectors of the VAMP problem, columnwise + r"""Left singular vectors U of the VAMP problem, columnwise Returns ------- eigenvectors: 2-D ndarray + Coefficients that express the left singular functions in the + basis of mean-free input features. """ - return self._model.singular_vectors_left + return self._model.U @property @_lazy_estimation @@ -315,3 +321,127 @@ def cumvar(self): cumvar: 1D np.array """ return self._model.cumvar + + + def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, observables_mean_free=False): + r"""Compute future expectation of observable or covariance using the approximated Koopman operator. + + TODO: this requires some discussion + + TODO: add equations + + Parameters + ---------- + statistics : np.ndarray((input_dimension, n_statistics)), optional + Coefficients that express one or multiple statistics in + the basis of the input features. + This parameter can be None. In that case, this method + returns the future expectation value of the observable(s). + + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + + lag_multiple : int + If > 1, extrapolate to a multiple of the estimator's lag + time by assuming Markovianity of the approximated Koopman + operator. + + statistics_mean_free : bool, default=False + If true, coefficients in statistics refer to the input + features with feature means removed. + If false, coefficients in statistics refer to the + unmodified input features. + + observables_mean_free : bool, default=False + If true, coefficients in observables refer to the input + features with feature means removed. + If false, coefficients in observables refer to the + unmodified input features. + """ + import sys + + S = np.diag(np.concatenate(([1.0], self.singular_values[0:self.dimension()]))) + V = self.singular_vectors_right[:, 0:self.dimension()] + U = self.singular_vectors_left[:, 0:self.dimension()] + m_0 = self.model.mean_0 + m_t = self.model.mean_t + + dim = self.dimension() + + assert lag_multiple >= 1, 'lag_multiple = 0 not implemented' + + if lag_multiple == 1: + P = S + else: + p = np.zeros((dim + 1, dim + 1)) + p[0, 0] = 1.0 + p[1:, 0] = U.T.dot(m_t - m_0) + p[1:, 1:] = U.T.dot(self.model.Ctt).dot(V) + P = np.linalg.matrix_power(S.dot(p), lag_multiple - 1).dot(S) + + Q = np.zeros((observables.shape[1], dim + 1)) + if not observables_mean_free: + Q[:, 0] = observables.T.dot(m_t) + Q[:, 1:] = observables.T.dot(self.model.Ctt).dot(V) + + if statistics is not None: + # compute covariance + R = np.zeros((statistics.shape[1], dim + 1)) + if not statistics_mean_free: + R[:, 0] = statistics.T.dot(m_0) + R[:, 1:] = statistics.T.dot(self.model.C00).dot(U) + + if statistics is not None: + # compute lagged covariance + return Q.dot(P).dot(R.T) + else: + # compute future expectation + return Q.dot(P)[:, 0] + + + def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10): + # TODO: make better API, discuss + #from pyemma._ext.sklearn.base import clone as clone_estimator + + if n_observables is not None: + if n_observables > self.dimension(): + warnings.warn('Selected singular functions as observables but dimension ' + 'is lower than requested number of observables.') + n_observables = self.dimension() + else: + n_observables = self.dimension() + + if isinstance(observables, str) and observables == 'psi': + observables = self.singular_vectors_right[:, 0:n_observables] + observables_mean_free = True + else: + ensure_ndarray(observables, ndim=2) + observables_mean_free = False + + if isinstance(statistics, str) and statistics == 'phi': + statistics = self.singular_vectors_left[:, 0:n_observables] + statistics_mean_free = True + else: + ensure_ndarray_or_None(statistics, ndim=2) + statistics_mean_free = False + + est_1 = self.expectation(statistics, observables, lag_multiple=1, statistics_mean_free=statistics_mean_free, + observables_mean_free=observables_mean_free) + estimates = [est_1] + predictions = [est_1] + for m in np.arange(2, mlags+1): + #copy = clone_estimator(self) # TODO: why doesn't this work? + copy = VAMP(lag=self.lag*m, dim=self.dim, scaling=self.scaling, right=self.right, + epsilon=self.epsilon, stride=self.stride, skip=self.skip, + ncov_max=self.ncov_max) + #copy.lag = self.lag*m + estimates.append( + copy.estimate(self.data_producer).expectation(statistics, observables, lag_multiple=1, + statistics_mean_free=statistics_mean_free, + observables_mean_free=observables_mean_free)) + predictions.append( + self.expectation(statistics, observables, lag_multiple=m, statistics_mean_free=statistics_mean_free, + observables_mean_free=observables_mean_free)) + # TODO: create some fancy object to store results + return predictions, estimates From 359f406f6b641739840ef366826d7095158c7746 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Wed, 27 Sep 2017 17:10:44 +0200 Subject: [PATCH 07/43] [vamp] CK-test now returns LaggedModelValidator --- pyemma/coordinates/tests/test_vamp.py | 44 +-- pyemma/coordinates/transform/vamp.py | 279 ++++++++++-------- .../msm/estimators/lagged_model_validators.py | 5 +- 3 files changed, 177 insertions(+), 151 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 0e34ee5f0..2c7565dda 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -157,29 +157,11 @@ def test_K_is_T(self): K = np.linalg.inv(C0).dot(C1) np.testing.assert_allclose(K, self.msm.P, atol=1E-5) - def test_CK_covariances_against_MSM(self): - obs = np.eye(3) # observe every state - sta = np.eye(3) # restrict p0 to every state - pred, est = self.vamp.cktest(observables=obs, statistics=sta, mlags=4) - - atol = np.finfo(self.vamp.output_type()).eps*1000.0 - - #import sys - for i in range(len(pred)): - msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) - msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs) - msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) - np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=atol) - np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=atol) - np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) - #print('pred(s)', i, np.diag(pred[i]), file=sys.stderr) - #print('predMSM!', i, np.diag(msm_pred), file=sys.stderr) - #print('esti(s)', i, np.diag(est[i]), file=sys.stderr) - #print('estiMSM!', i, np.diag(msm_esti), file=sys.stderr) - def test_CK_expectation_against_MSM(self): obs = np.eye(3) # observe every state - pred, est = self.vamp.cktest(observables=obs, statistics=None, mlags=4) + cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4) + pred = cktest.predictions[1:] + est = cktest.estimates[1:] atol = np.finfo(self.vamp.output_type()).eps*1000.0 for i in range(len(pred)): @@ -193,10 +175,28 @@ def test_CK_expectation_against_MSM(self): def test_CK_covariances_of_singular_functions(self): #from pyemma import config #config.show_progress_bars = False - pred,est = self.vamp.cktest(n_observables=2, mlags=4) # auto + cktest = self.vamp.cktest(n_observables=2, mlags=4) # auto + pred = cktest.predictions[1:] + est = cktest.estimates[1:] error = np.max(np.abs(np.array(pred) - np.array(est))) / max(np.max(pred), np.max(est)) assert error < 0.05 + def test_CK_covariances_against_MSM(self): + obs = np.eye(3) # observe every state + sta = np.eye(3) # restrict p0 to every state + cktest = self.vamp.cktest(observables=obs, statistics=sta, mlags=4, show_progress=True) + atol = np.finfo(self.vamp.output_type()).eps * 1000.0 + pred = cktest.predictions[1:] + est = cktest.estimates[1:] + + for i in range(len(pred)): + msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) + msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs) + msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) + np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=atol) + np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=atol) + np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) + if __name__ == "__main__": unittest.main() diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 6c24513cc..8836b8a51 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -28,6 +28,8 @@ from pyemma._ext.variational.solvers.direct import spd_inv_sqrt from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.data._base.transformer import StreamingEstimationTransformer +from pyemma.msm.estimators.lagged_model_validators import LaggedModelValidator + import warnings __all__ = ['VAMP'] @@ -42,6 +44,106 @@ def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t): self.Ctt = Ctt self.C0t = C0t + def dimension(self, _estimated=True): # TODO: get rid of _estimated but test for existence of field instead + """ output dimension """ + if self.dim is None or self.dim == 1.0: + if _estimated: + return np.count_nonzero(self.singular_values > self.epsilon) + else: + raise RuntimeError('Requested dimension, but the dimension depends on the singular values and the ' + 'transformer has not yet been estimated. Call estimate() before.') + if isinstance(self.dim, float): + if _estimated: + return np.count_nonzero(self.cumvar >= self.dim) + else: + raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' + 'transformer has not yet been estimated. Call estimate() before.') + else: + if _estimated: + return min(np.min(np.count_nonzero(self.singular_values > self.epsilon)), self.dim) + else: + warnings.warn( + RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' + 'transformer has not yet been estimated. Result is only an approximation.')) + return self.dim + + def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, observables_mean_free=False): + r"""Compute future expectation of observable or covariance using the approximated Koopman operator. + + TODO: this requires some discussion + + TODO: add equations + + Parameters + ---------- + statistics : np.ndarray((input_dimension, n_statistics)), optional + Coefficients that express one or multiple statistics in + the basis of the input features. + This parameter can be None. In that case, this method + returns the future expectation value of the observable(s). + + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + + lag_multiple : int + If > 1, extrapolate to a multiple of the estimator's lag + time by assuming Markovianity of the approximated Koopman + operator. + + statistics_mean_free : bool, default=False + If true, coefficients in statistics refer to the input + features with feature means removed. + If false, coefficients in statistics refer to the + unmodified input features. + + observables_mean_free : bool, default=False + If true, coefficients in observables refer to the input + features with feature means removed. + If false, coefficients in observables refer to the + unmodified input features. + """ + import sys + + dim = self.dimension() + + S = np.diag(np.concatenate(([1.0], self.singular_values[0:dim]))) + V = self.V[:, 0:dim] + U = self.U[:, 0:dim] + m_0 = self.mean_0 + m_t = self.mean_t + + + assert lag_multiple >= 1, 'lag_multiple = 0 not implemented' + + if lag_multiple == 1: + P = S + else: + p = np.zeros((dim + 1, dim + 1)) + p[0, 0] = 1.0 + p[1:, 0] = U.T.dot(m_t - m_0) + p[1:, 1:] = U.T.dot(self.Ctt).dot(V) + P = np.linalg.matrix_power(S.dot(p), lag_multiple - 1).dot(S) + + Q = np.zeros((observables.shape[1], dim + 1)) + if not observables_mean_free: + Q[:, 0] = observables.T.dot(m_t) + Q[:, 1:] = observables.T.dot(self.Ctt).dot(V) + + if statistics is not None: + # compute covariance + R = np.zeros((statistics.shape[1], dim + 1)) + if not statistics_mean_free: + R[:, 0] = statistics.T.dot(m_0) + R[:, 1:] = statistics.T.dot(self.C00).dot(U) + + if statistics is not None: + # compute lagged covariance + return Q.dot(P).dot(R.T) + else: + # compute future expectation + return Q.dot(P)[:, 0] + @decorator def _lazy_estimation(func, *args, **kw): @@ -112,15 +214,16 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, """ StreamingEstimationTransformer.__init__(self) - self._covar = LaggedCovariance(c00=True, c0t=True, ctt=True, remove_data_mean=True, reversible=False, - lag=lag, bessel=False, stride=stride, skip=skip, weights=None, ncov_max=ncov_max) - # empty dummy model instance self._model = VAMPModel() self.set_params(lag=lag, dim=dim, scaling=scaling, right=right, epsilon=epsilon, stride=stride, skip=skip, ncov_max=ncov_max) + self._covar = None + self._model.update_model_params(dim=dim, epsilon=epsilon) def _estimate(self, iterable, **kw): + self._covar = LaggedCovariance(c00=True, c0t=True, ctt=True, remove_data_mean=True, reversible=False, + lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=None, ncov_max=self.ncov_max) indim = iterable.dimension() if isinstance(self.dim, int): @@ -163,6 +266,10 @@ def partial_fit(self, X): raise RuntimeError("requested more output dimensions (%i) than dimension" " of input data (%i)" % (self.dim, indim)) + if self._covar is None: + self._covar = LaggedCovariance(c00=True, c0t=True, ctt=True, remove_data_mean=True, reversible=False, + lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=None, + ncov_max=self.ncov_max) self._covar.partial_fit(iterable) self._model.update_model_params(mean_0=self._covar.mean, # TODO: inefficient, fixme mean_t=self._covar.mean_tau, @@ -195,7 +302,7 @@ def _diagonalize(self): self._Lt = Lt self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean_0, mean_t=mean_t) - m = self.dimension(_estimating=True) + m = self._model.dimension(_estimated=True) U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right @@ -222,30 +329,9 @@ def _diagonalize(self): self._estimated = True - def dimension(self, _estimating=False): - """ output dimension """ - if self.dim is None or self.dim == 1.0: - if self._estimated or _estimating: - return np.count_nonzero(self._model.singular_values > self.epsilon) - else: - warnings.warn( - RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' - 'transformer has not yet been estimated. Result is only an approximation.')) - return self.data_producer.dimension() - if isinstance(self.dim, float): - if self._estimated or _estimating: - return np.count_nonzero(self._model.cumvar >= self.dim) - else: - raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' - 'transformer has not yet been estimated. Call estimate() before.') - else: - if self._estimated or _estimating: - return min(np.min(np.count_nonzero(self._model.singular_values > self.epsilon)), self.dim) - else: - warnings.warn( - RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' - 'transformer has not yet been estimated. Result is only an approximation.')) - return self.dim + def dimension(self): + return self._model.dimension(_estimated=self._estimated) + def _transform_array(self, X): r"""Projects the data onto the dominant singular functions. @@ -322,88 +408,13 @@ def cumvar(self): """ return self._model.cumvar + def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, + observables_mean_free=False): + return self._model.expectation(statistics, observables, lag_multiple=lag_multiple, + statistics_mean_free=statistics_mean_free, + observables_mean_free=observables_mean_free) - def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, observables_mean_free=False): - r"""Compute future expectation of observable or covariance using the approximated Koopman operator. - - TODO: this requires some discussion - - TODO: add equations - - Parameters - ---------- - statistics : np.ndarray((input_dimension, n_statistics)), optional - Coefficients that express one or multiple statistics in - the basis of the input features. - This parameter can be None. In that case, this method - returns the future expectation value of the observable(s). - - observables : np.ndarray((input_dimension, n_observables)) - Coefficients that express one or multiple observables in - the basis of the input features. - - lag_multiple : int - If > 1, extrapolate to a multiple of the estimator's lag - time by assuming Markovianity of the approximated Koopman - operator. - - statistics_mean_free : bool, default=False - If true, coefficients in statistics refer to the input - features with feature means removed. - If false, coefficients in statistics refer to the - unmodified input features. - - observables_mean_free : bool, default=False - If true, coefficients in observables refer to the input - features with feature means removed. - If false, coefficients in observables refer to the - unmodified input features. - """ - import sys - - S = np.diag(np.concatenate(([1.0], self.singular_values[0:self.dimension()]))) - V = self.singular_vectors_right[:, 0:self.dimension()] - U = self.singular_vectors_left[:, 0:self.dimension()] - m_0 = self.model.mean_0 - m_t = self.model.mean_t - - dim = self.dimension() - - assert lag_multiple >= 1, 'lag_multiple = 0 not implemented' - - if lag_multiple == 1: - P = S - else: - p = np.zeros((dim + 1, dim + 1)) - p[0, 0] = 1.0 - p[1:, 0] = U.T.dot(m_t - m_0) - p[1:, 1:] = U.T.dot(self.model.Ctt).dot(V) - P = np.linalg.matrix_power(S.dot(p), lag_multiple - 1).dot(S) - - Q = np.zeros((observables.shape[1], dim + 1)) - if not observables_mean_free: - Q[:, 0] = observables.T.dot(m_t) - Q[:, 1:] = observables.T.dot(self.model.Ctt).dot(V) - - if statistics is not None: - # compute covariance - R = np.zeros((statistics.shape[1], dim + 1)) - if not statistics_mean_free: - R[:, 0] = statistics.T.dot(m_0) - R[:, 1:] = statistics.T.dot(self.model.C00).dot(U) - - if statistics is not None: - # compute lagged covariance - return Q.dot(P).dot(R.T) - else: - # compute future expectation - return Q.dot(P)[:, 0] - - - def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10): - # TODO: make better API, discuss - #from pyemma._ext.sklearn.base import clone as clone_estimator - + def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True): if n_observables is not None: if n_observables > self.dimension(): warnings.warn('Selected singular functions as observables but dimension ' @@ -426,22 +437,34 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= ensure_ndarray_or_None(statistics, ndim=2) statistics_mean_free = False - est_1 = self.expectation(statistics, observables, lag_multiple=1, statistics_mean_free=statistics_mean_free, - observables_mean_free=observables_mean_free) - estimates = [est_1] - predictions = [est_1] - for m in np.arange(2, mlags+1): - #copy = clone_estimator(self) # TODO: why doesn't this work? - copy = VAMP(lag=self.lag*m, dim=self.dim, scaling=self.scaling, right=self.right, - epsilon=self.epsilon, stride=self.stride, skip=self.skip, - ncov_max=self.ncov_max) - #copy.lag = self.lag*m - estimates.append( - copy.estimate(self.data_producer).expectation(statistics, observables, lag_multiple=1, - statistics_mean_free=statistics_mean_free, - observables_mean_free=observables_mean_free)) - predictions.append( - self.expectation(statistics, observables, lag_multiple=m, statistics_mean_free=statistics_mean_free, - observables_mean_free=observables_mean_free)) - # TODO: create some fancy object to store results - return predictions, estimates + ck = VAMPChapmanKolmogorovValidator(self, self, observables, statistics, observables_mean_free, + statistics_mean_free, mlags=mlags, n_jobs=n_jobs, + show_progress=show_progress) + ck.estimate(self.data_producer) + return ck + + +class VAMPChapmanKolmogorovValidator(LaggedModelValidator): + def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, + mlags=10, n_jobs=1, show_progress=True): + LaggedModelValidator.__init__(self, model, estimator, mlags=mlags, + n_jobs=n_jobs, show_progress=show_progress) + self.statistics = statistics + self.observables = observables + self.observables_mean_free = observables_mean_free + self.statistics_mean_free = statistics_mean_free + if self.statistics is not None: + self.nsets = min(self.observables.shape[1], self.statistics.shape[1]) + + + def _compute_observables(self, model, estimator, mlag=1): + # for lag time 0 we return a matrix of nan, until the correct solution is implemented + if mlag == 0 or model is None: + if self.statistics is None: + return np.zeros(self.observables.shape[1]) + np.nan + else: + return np.zeros((self.observables.shape[1], self.statistics.shape[1])) + np.nan + else: + return model.expectation(self.statistics, self.observables, lag_multiple=mlag, + statistics_mean_free=self.statistics_mean_free, + observables_mean_free=self.observables_mean_free) diff --git a/pyemma/msm/estimators/lagged_model_validators.py b/pyemma/msm/estimators/lagged_model_validators.py index a39b3c141..27f193d60 100644 --- a/pyemma/msm/estimators/lagged_model_validators.py +++ b/pyemma/msm/estimators/lagged_model_validators.py @@ -81,7 +81,10 @@ def __init__(self, model, estimator, mlags=None, conf=0.95, err_est=False, self.test_estimator = estimator # set mlags - maxlength = np.max([len(dtraj) for dtraj in estimator.discrete_trajectories_full]) + try: + maxlength = np.max([len(dtraj) for dtraj in estimator.discrete_trajectories_full]) + except AttributeError: + maxlength = np.max(estimator.trajectory_lengths()) maxmlag = int(math.floor(maxlength / estimator.lag)) if mlags is None: mlags = maxmlag From da013442f5e5f3d02cd44056edaeef0974d0dde8 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Wed, 27 Sep 2017 17:30:53 +0200 Subject: [PATCH 08/43] [vamp] more progressbar tweaking/debugging --- pyemma/_base/estimator.py | 12 ++---------- pyemma/coordinates/transform/vamp.py | 6 +++++- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pyemma/_base/estimator.py b/pyemma/_base/estimator.py index 88564d92f..081c9da08 100644 --- a/pyemma/_base/estimator.py +++ b/pyemma/_base/estimator.py @@ -299,9 +299,8 @@ def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=N if evaluate is not None and evaluate_args is not None and len(evaluate) != len(evaluate_args): raise ValueError("length mismatch: evaluate ({}) and evaluate_args ({})".format(len(evaluate), len(evaluate_args))) - if progress_reporter is not None: - progress_reporter._progress_register(len(estimators), stage=0, - description="estimating %s" % str(estimator.__class__.__name__)) + if progress_reporter is not None and show_progress: + progress_reporter._progress_register(len(estimators), stage=0, description="estimating %s" % str(estimator.__class__.__name__)) if n_jobs > 1 and os.name == 'posix': if hasattr(estimators[0], 'logger'): @@ -318,7 +317,6 @@ def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=N pool = Parallel(processes=n_jobs) args = list(task_iter) if progress_reporter is not None: - progress_reporter._progress_register(len(estimators), stage=0, description="estimating %s" % str(estimator.__class__.__name__)) from pyemma._base.model import SampledModel for a in args: if isinstance(a[0], SampledModel): @@ -352,12 +350,6 @@ def error_callback(*args, **kw): estimators[0].logger.debug('estimating %s with n_jobs=1 because of the setting or ' 'you not have a POSIX system', estimator) res = [] - if progress_reporter is not None: - from pyemma._base.model import SampledModel - if isinstance(estimator, SampledModel): - for e in estimators: - e.show_progress = False - for estimator, param_set in zip(estimators, param_sets): res.append(_estimate_param_scan_worker(estimator, param_set, X, evaluate, evaluate_args, failfast, return_exceptions)) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 8836b8a51..16e71e453 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -414,7 +414,11 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) - def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True): + def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=False): + # drop reference to LaggedCovariance to avoid probelms during cloning + # In future pyemma versions, this will be no longer a problem... + self._covar = None + if n_observables is not None: if n_observables > self.dimension(): warnings.warn('Selected singular functions as observables but dimension ' From 53b70978f4c32b0e8e7f11c2ae6e64683d5f2938 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Thu, 28 Sep 2017 10:56:04 +0200 Subject: [PATCH 09/43] [vamp] w.i.p. to make future-proof for serializable estimators/models --- pyemma/coordinates/tests/test_vamp.py | 3 +- pyemma/coordinates/transform/vamp.py | 64 ++++++++++++++++----------- 2 files changed, 40 insertions(+), 27 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 2c7565dda..edc9779c3 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -103,7 +103,8 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params2 = vamp2._model.get_model_params() for n in model_params.keys(): - np.testing.assert_allclose(model_params[n], model_params2[n]) + if model_params[n] is not None and model_params2[n] is not None: + np.testing.assert_allclose(model_params[n], model_params2[n]) vamp2.singular_values # trigger diagonalization diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 16e71e453..8f930b430 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -37,29 +37,35 @@ class VAMPModel(Model): # TODO: remove dummy when bugfix from Martin is committed - def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t): + def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_values, cumvar, dim, epsilon): self.mean_0 = mean_0 self.mean_t = mean_t self.C00 = C00 self.Ctt = Ctt self.C0t = C0t + self.U = U + self.V = V + self.singular_values = singular_values + self.cumvar = cumvar + self.dim = dim + self.epsilon = epsilon - def dimension(self, _estimated=True): # TODO: get rid of _estimated but test for existence of field instead + def dimension(self): """ output dimension """ if self.dim is None or self.dim == 1.0: - if _estimated: + if hasattr(self, 'singular_values') and self.singular_values is not None: return np.count_nonzero(self.singular_values > self.epsilon) else: raise RuntimeError('Requested dimension, but the dimension depends on the singular values and the ' 'transformer has not yet been estimated. Call estimate() before.') if isinstance(self.dim, float): - if _estimated: + if hasattr(self, 'cumvar') and self.cumvar is not None: return np.count_nonzero(self.cumvar >= self.dim) else: raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' 'transformer has not yet been estimated. Call estimate() before.') else: - if _estimated: + if hasattr(self, 'singular_values') and self.singular_values is not None: return min(np.min(np.count_nonzero(self.singular_values > self.epsilon)), self.dim) else: warnings.warn( @@ -67,7 +73,8 @@ def dimension(self, _estimated=True): # TODO: get rid of _estimated but test for 'transformer has not yet been estimated. Result is only an approximation.')) return self.dim - def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, observables_mean_free=False): + def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, + observables_mean_free=False): r"""Compute future expectation of observable or covariance using the approximated Koopman operator. TODO: this requires some discussion @@ -113,7 +120,6 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f m_0 = self.mean_0 m_t = self.mean_t - assert lag_multiple >= 1, 'lag_multiple = 0 not implemented' if lag_multiple == 1: @@ -223,7 +229,8 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, def _estimate(self, iterable, **kw): self._covar = LaggedCovariance(c00=True, c0t=True, ctt=True, remove_data_mean=True, reversible=False, - lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=None, ncov_max=self.ncov_max) + lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=None, + ncov_max=self.ncov_max) indim = iterable.dimension() if isinstance(self.dim, int): @@ -271,13 +278,13 @@ def partial_fit(self, X): lag=self.lag, bessel=False, stride=self.stride, skip=self.skip, weights=None, ncov_max=self.ncov_max) self._covar.partial_fit(iterable) - self._model.update_model_params(mean_0=self._covar.mean, # TODO: inefficient, fixme + self._model.update_model_params(mean_0=self._covar.mean, # TODO: inefficient, fixme mean_t=self._covar.mean_tau, C00=self._covar.C00_, C0t=self._covar.C0t_, Ctt=self._covar.Ctt_) - #self._used_data = self._covar._used_data + # self._used_data = self._covar._used_data self._estimated = False return self @@ -302,23 +309,23 @@ def _diagonalize(self): self._Lt = Lt self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean_0, mean_t=mean_t) - m = self._model.dimension(_estimated=True) + m = self._model.dimension() - U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left - V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right + U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left + V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right # normalize vectors - #scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) - #scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) - #singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 - #singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 + # scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) + # scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) + # singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 + # singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 # scale vectors if self.scaling is None: pass elif self.scaling in ['km', 'kinetic map']: - U *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao - V *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao + U *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao + V *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao else: raise ValueError('unexpected value (%s) of "scaling"' % self.scaling) @@ -328,10 +335,8 @@ def _diagonalize(self): self._estimated = True - def dimension(self): - return self._model.dimension(_estimated=self._estimated) - + return self._model.dimension() def _transform_array(self, X): r"""Projects the data onto the dominant singular functions. @@ -414,8 +419,9 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) - def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=False): - # drop reference to LaggedCovariance to avoid probelms during cloning + def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=False, + iterable=None): + # drop reference to LaggedCovariance to avoid problems during cloning # In future pyemma versions, this will be no longer a problem... self._covar = None @@ -444,7 +450,11 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= ck = VAMPChapmanKolmogorovValidator(self, self, observables, statistics, observables_mean_free, statistics_mean_free, mlags=mlags, n_jobs=n_jobs, show_progress=show_progress) - ck.estimate(self.data_producer) + + if iterable is None: + iterable = self.data_producer + + ck.estimate(iterable) return ck @@ -460,7 +470,6 @@ def __init__(self, model, estimator, observables, statistics, observables_mean_f if self.statistics is not None: self.nsets = min(self.observables.shape[1], self.statistics.shape[1]) - def _compute_observables(self, model, estimator, mlag=1): # for lag time 0 we return a matrix of nan, until the correct solution is implemented if mlag == 0 or model is None: @@ -472,3 +481,6 @@ def _compute_observables(self, model, estimator, mlag=1): return model.expectation(self.statistics, self.observables, lag_multiple=mlag, statistics_mean_free=self.statistics_mean_free, observables_mean_free=self.observables_mean_free) + + def _compute_observables_conf(self, model, estimator, mlag=1): + raise NotImplementedError('estimation of confidence intervals not yet implemented for VAMP') From d7275f22fdeedf5fe78fac5a43385d33ff24eeea Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Mon, 2 Oct 2017 12:24:23 +0200 Subject: [PATCH 10/43] [vamp] new unit test: test against MSM without removal of data mean --- pyemma/coordinates/tests/test_vamp.py | 45 +++++++++++++++++++++++++-- pyemma/coordinates/transform/vamp.py | 22 ++++++++----- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index edc9779c3..d20654ee8 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -92,7 +92,7 @@ def do_test(self, dim, rank, test_partial_fit=False): C01_psi_phi = psi.T.dot(phi) / phi.shape[0] n = max(C01_psi_phi.shape) C01_psi_phi = C01_psi_phi[0:n,:][:, 0:n] - np.testing.assert_allclose(np.diag(C01_psi_phi), vamp.singular_values[0:vamp.dimension()], atol=atol) + np.testing.assert_allclose(C01_psi_phi, np.diag(vamp.singular_values[0:vamp.dimension()]), atol=atol) if test_partial_fit: vamp2 = pyemma_api_vamp(lag=tau, scaling=None) @@ -127,21 +127,31 @@ def generate(T, N_steps, s0=0): return dtraj +def assert_allclose_ignore_phase(A, B, atol): + A = np.atleast_2d(A) + B = np.atleast_2d(B) + assert A.shape == B.shape + for i in range(B.shape[1]): + assert np.allclose(A[:, i], B[:, i], atol=atol) or np.allclose(A[:, i], -B[:, i], atol=atol) + + class TestVAMPCKTest(unittest.TestCase): @classmethod def setUpClass(cls): N_steps = 10000 N_traj = 2 - T = np.linalg.matrix_power(np.array([[0.7, 0.3, 0.0], [0.1, 0.8, 0.1], [0.0, 0.2, 0.8]]), 1) + lag = 1 + T = np.linalg.matrix_power(np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] p0 = np.zeros(3) + p1 = np.zeros(3) trajs = [] - lag = 1 for dtraj in dtrajs: traj = np.zeros((N_steps, T.shape[0])) traj[np.arange(len(dtraj)), dtraj] = 1.0 trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) + p1 += traj[lag:, :].sum(axis=0) vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None) msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) cls.dtrajs = dtrajs @@ -149,6 +159,7 @@ def setUpClass(cls): cls.msm = msm cls.vamp = vamp cls.p0 = p0 / p0.sum() + cls.p1 = p1 / p1.sum() def test_K_is_T(self): m0 = self.vamp.model.mean_0 @@ -158,6 +169,34 @@ def test_K_is_T(self): K = np.linalg.inv(C0).dot(C1) np.testing.assert_allclose(K, self.msm.P, atol=1E-5) + Tsym = np.diag(self.p0 ** 0.5).dot(self.msm.P).dot(np.diag(self.p1 ** -0.5)) + np.testing.assert_allclose(np.linalg.svd(Tsym)[1][1:], self.vamp.singular_values[0:2], atol=1E-7) + + def test_singular_functions_against_MSM(self): + Tsym = np.diag(self.p0 ** 0.5).dot(self.msm.P).dot(np.diag(self.p1 ** -0.5)) + Up, S, Vhp = np.linalg.svd(Tsym) + Vp = Vhp.T + U = Up * (self.p0 ** -0.5)[:, np.newaxis] + V = Vp * (self.p1 ** -0.5)[:, np.newaxis] + assert_allclose_ignore_phase(U[:, 0], np.ones(3), atol=1E-5) + assert_allclose_ignore_phase(V[:, 0], np.ones(3), atol=1E-5) + U = U[:, 1:] + V = V[:, 1:] + self.vamp.right = True + phi = self.vamp.transform(np.eye(3)) + self.vamp.right = False + psi = self.vamp.transform(np.eye(3)) + assert_allclose_ignore_phase(U, psi, atol=1E-5) + assert_allclose_ignore_phase(V, phi, atol=1E-5) + references_sf = [U.T.dot(np.diag(self.p0)).dot(np.linalg.matrix_power(self.msm.P, k*self.lag)).dot(V) for k in + range(10-1)] + cktest = self.vamp.cktest(n_observables=2, mlags=10) + pred_sf = cktest.predictions + esti_sf = cktest.estimates + for e, p, r in zip(esti_sf[1:], pred_sf[1:], references_sf[1:]): + np.testing.assert_allclose(np.diag(p), np.diag(r), atol=1E-5) + np.testing.assert_allclose(np.abs(p), np.abs(r), atol=1E-4) + def test_CK_expectation_against_MSM(self): obs = np.eye(3) # observe every state cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 8f930b430..9ebd8a906 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -110,7 +110,7 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f If false, coefficients in observables refer to the unmodified input features. """ - import sys + # TODO: implement the case lag_multiple=0 dim = self.dimension() @@ -314,12 +314,6 @@ def _diagonalize(self): U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right - # normalize vectors - # scale_left = np.diag(singular_vectors_left.T.dot(self._model.C00).dot(singular_vectors_left)) - # scale_right = np.diag(singular_vectors_right.T.dot(self._model.Ctt).dot(singular_vectors_right)) - # singular_vectors_left *= scale_left[np.newaxis, :]**-0.5 - # singular_vectors_right *= scale_right[np.newaxis, :]**-0.5 - # scale vectors if self.scaling is None: pass @@ -413,13 +407,25 @@ def cumvar(self): """ return self._model.cumvar + @property + def show_progress(self): + if self._covar is None: + return False + else: + return self._covar.show_progress + + @show_progress.setter + def show_progress(self, value): + if self._covar is not None: + self._covar.show_progress = value + def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, observables_mean_free=False): return self._model.expectation(statistics, observables, lag_multiple=lag_multiple, statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) - def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=False, + def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True, iterable=None): # drop reference to LaggedCovariance to avoid problems during cloning # In future pyemma versions, this will be no longer a problem... From c29affd6d7a65131b197e516eab4abea3b9dba49 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Mon, 2 Oct 2017 16:25:01 +0200 Subject: [PATCH 11/43] [vamp] drafted implementation of scores, w.i.p --- pyemma/coordinates/tests/test_vamp.py | 6 ++++++ pyemma/coordinates/transform/vamp.py | 31 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index d20654ee8..ae087d85c 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -237,6 +237,12 @@ def test_CK_covariances_against_MSM(self): np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=atol) np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) + def test_score(self): + #TODO: complete test! + self.vamp.score(other=self.vamp, score=1) + self.vamp.score(other=self.vamp, score=2) + self.vamp.score(other=self.vamp, score='E') + if __name__ == "__main__": unittest.main() diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 9ebd8a906..1849e2c5e 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -29,6 +29,7 @@ from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.data._base.transformer import StreamingEstimationTransformer from pyemma.msm.estimators.lagged_model_validators import LaggedModelValidator +from pyemma.util.linalg import mdot import warnings @@ -150,6 +151,30 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f # compute future expectation return Q.dot(P)[:, 0] + def score(self, other=None, score='E'): + # TODO: test me! + # TODO: implement for TICA too + # TODO: check compatibility of models, e.g. equal lag time, equal features? + if other is None: + other = self + Uk = self.U[:, 0:self.dimension()] + Vk = self.V[:, 0:self.dimension()] + if score == 1 or score == 2: + A = spd_inv_sqrt(Uk.T.dot(other.C00).dot(Uk)) + B = Uk.T.dot(other.C0t).dot(Vk) + C = spd_inv_sqrt(Vk.T.dot(other.Ctt).dot(Vk)) + ABC = mdot(A, B, C) + if score == 1: + return np.linalg.norm(ABC, ord='nuc') + elif score == 2: + return np.linalg.norm(ABC, ord='fro')**2 + elif score == 'E' or score == 'e': + Sk = np.diag(self.singular_values[0:self.dimension()]) + return np.trace(2.0*mdot(Vk, Sk, Uk.T, other.C0t) - mdot(Vk, Sk, Uk.T, other.C00, Uk, Sk, Vk.T, other.Ctt)) + else: + raise ValueError('"score" should be one of 1, 2 or "E"') + # TODO: add the contribution (+1) of the constant singular functions to the result? + @decorator def _lazy_estimation(func, *args, **kw): @@ -463,6 +488,12 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= ck.estimate(iterable) return ck + def score(self, other=None, score='E'): + if other is None: + return self.model.score(None, score=score) + else: + return self.model.score(other.model, score=score) + class VAMPChapmanKolmogorovValidator(LaggedModelValidator): def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, From 7aefe990e74004e2625a0baae690ec9b43401ead Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Mon, 2 Oct 2017 18:20:08 +0200 Subject: [PATCH 12/43] [vamp] fix error in unit test --- pyemma/coordinates/tests/test_vamp.py | 21 ++++++++++----------- pyemma/coordinates/transform/vamp.py | 2 ++ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index ae087d85c..b23de9f5b 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -160,6 +160,7 @@ def setUpClass(cls): cls.vamp = vamp cls.p0 = p0 / p0.sum() cls.p1 = p1 / p1.sum() + cls.atol = np.finfo(vamp.output_type()).eps*1000.0 def test_K_is_T(self): m0 = self.vamp.model.mean_0 @@ -188,28 +189,27 @@ def test_singular_functions_against_MSM(self): psi = self.vamp.transform(np.eye(3)) assert_allclose_ignore_phase(U, psi, atol=1E-5) assert_allclose_ignore_phase(V, phi, atol=1E-5) - references_sf = [U.T.dot(np.diag(self.p0)).dot(np.linalg.matrix_power(self.msm.P, k*self.lag)).dot(V) for k in + references_sf = [U.T.dot(np.diag(self.p0)).dot(np.linalg.matrix_power(self.msm.P, k*self.lag)).dot(V).T for k in range(10-1)] cktest = self.vamp.cktest(n_observables=2, mlags=10) pred_sf = cktest.predictions esti_sf = cktest.estimates for e, p, r in zip(esti_sf[1:], pred_sf[1:], references_sf[1:]): - np.testing.assert_allclose(np.diag(p), np.diag(r), atol=1E-5) - np.testing.assert_allclose(np.abs(p), np.abs(r), atol=1E-4) + np.testing.assert_allclose(np.diag(p), np.diag(r), atol=1E-6) + np.testing.assert_allclose(np.abs(p), np.abs(r), atol=1E-6) def test_CK_expectation_against_MSM(self): obs = np.eye(3) # observe every state cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4) pred = cktest.predictions[1:] est = cktest.estimates[1:] - atol = np.finfo(self.vamp.output_type()).eps*1000.0 for i in range(len(pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) msm_esti = self.p0.T.dot(msm.P).dot(obs) msm_pred = self.p0.T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) - np.testing.assert_allclose(pred[i], msm_pred, atol=atol) - np.testing.assert_allclose(est[i], msm_esti, atol=atol) + np.testing.assert_allclose(pred[i], msm_pred, atol=self.atol) + np.testing.assert_allclose(est[i], msm_esti, atol=self.atol) np.testing.assert_allclose(est[i], pred[i], atol=0.006) def test_CK_covariances_of_singular_functions(self): @@ -225,16 +225,15 @@ def test_CK_covariances_against_MSM(self): obs = np.eye(3) # observe every state sta = np.eye(3) # restrict p0 to every state cktest = self.vamp.cktest(observables=obs, statistics=sta, mlags=4, show_progress=True) - atol = np.finfo(self.vamp.output_type()).eps * 1000.0 pred = cktest.predictions[1:] est = cktest.estimates[1:] for i in range(len(pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) - msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs) - msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) - np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=atol) - np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=atol) + msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs).T + msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs).T + np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=self.atol) + np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=self.atol) np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) def test_score(self): diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 1849e2c5e..83a6cf3b7 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -147,6 +147,8 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f if statistics is not None: # compute lagged covariance return Q.dot(P).dot(R.T) + # TODO: discuss whether we want to return this or the transpose + # TODO: from MSMs one might expect to first index to refer to the statistics, here it is the other way round else: # compute future expectation return Q.dot(P)[:, 0] From 63f4307d03d48a3c65c86a20297b812075719af1 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Wed, 4 Oct 2017 19:27:01 +0200 Subject: [PATCH 13/43] [vamp] drafted unit test for scores --- pyemma/coordinates/tests/test_vamp.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index b23de9f5b..1c6edd14f 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -41,7 +41,7 @@ def random_matrix(n, rank=None, eps=0.01): return u.dot(np.diag(s)).dot(v) -class TestVAMPSelfConsistency(unittest.TestCase): +class TestVAMPEstimatorSelfConsistency(unittest.TestCase): def test_full_rank(self): self.do_test(20, 20, test_partial_fit=True) @@ -135,7 +135,7 @@ def assert_allclose_ignore_phase(A, B, atol): assert np.allclose(A[:, i], B[:, i], atol=atol) or np.allclose(A[:, i], -B[:, i], atol=atol) -class TestVAMPCKTest(unittest.TestCase): +class TestVAMPModel(unittest.TestCase): @classmethod def setUpClass(cls): N_steps = 10000 @@ -236,12 +236,22 @@ def test_CK_covariances_against_MSM(self): np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=self.atol) np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) - def test_score(self): - #TODO: complete test! - self.vamp.score(other=self.vamp, score=1) - self.vamp.score(other=self.vamp, score=2) - self.vamp.score(other=self.vamp, score='E') + def test_self_score_with_MSM(self): + T = self.msm.P + Tadj = np.diag(1./self.p1).dot(T.T).dot(np.diag(self.p0)) + NFro = np.trace(T.dot(Tadj)) + s2 = self.vamp.score(score=2) + np.testing.assert_allclose(s2 + 1, NFro) + Tsym = np.diag(self.p0**0.5).dot(T).dot(np.diag(self.p1**-0.5)) + Nnuc = np.linalg.norm(Tsym, ord='nuc') + s1 = self.vamp.score(score=1) + np.testing.assert_allclose(s1 + 1, Nnuc) + + sE = self.vamp.score(score='E') + np.testing.assert_allclose(s1 + 1, Nnuc) # see paper appendix H.2 + + # TODO: test cross score if __name__ == "__main__": unittest.main() From 492d19135af10bf8391337445e1ded1f47b14e16 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 9 Oct 2017 17:34:00 +0200 Subject: [PATCH 14/43] scoring changes: * moved diagonalization method to model * Added score test * made signatures of score method analog to MSM --- pyemma/coordinates/tests/test_vamp.py | 35 ++++-- pyemma/coordinates/transform/vamp.py | 151 ++++++++++++++++---------- 2 files changed, 118 insertions(+), 68 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 1c6edd14f..98b30e172 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -27,6 +27,8 @@ from pyemma.msm import estimate_markov_model from logging import getLogger +from pyemma.msm.estimators._dtraj_stats import cvsplit_dtrajs + logger = getLogger('pyemma.'+'TestVAMP') @@ -139,7 +141,7 @@ class TestVAMPModel(unittest.TestCase): @classmethod def setUpClass(cls): N_steps = 10000 - N_traj = 2 + N_traj = 20 lag = 1 T = np.linalg.matrix_power(np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag) dtrajs = [generate(T, N_steps) for _ in range(N_traj)] @@ -152,8 +154,9 @@ def setUpClass(cls): trajs.append(traj) p0 += traj[:-lag, :].sum(axis=0) p1 += traj[lag:, :].sum(axis=0) - vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None) + vamp = pyemma_api_vamp(trajs, lag=lag, scaling=None, dim=1.0) msm = estimate_markov_model(dtrajs, lag=lag, reversible=False) + cls.trajs = trajs cls.dtrajs = dtrajs cls.lag = lag cls.msm = msm @@ -240,18 +243,32 @@ def test_self_score_with_MSM(self): T = self.msm.P Tadj = np.diag(1./self.p1).dot(T.T).dot(np.diag(self.p0)) NFro = np.trace(T.dot(Tadj)) - s2 = self.vamp.score(score=2) - np.testing.assert_allclose(s2 + 1, NFro) + s2 = self.vamp.score(score_method='VAMP2') + np.testing.assert_allclose(s2, NFro) Tsym = np.diag(self.p0**0.5).dot(T).dot(np.diag(self.p1**-0.5)) Nnuc = np.linalg.norm(Tsym, ord='nuc') - s1 = self.vamp.score(score=1) - np.testing.assert_allclose(s1 + 1, Nnuc) + s1 = self.vamp.score(score_method='VAMP1') + np.testing.assert_allclose(s1, Nnuc) + + # TODO: check why this is not equal + #sE = self.vamp.score(score_method='VAMPE') + #np.testing.assert_allclose(sE, Nnuc) # see paper appendix H.2 + + def test_score_vs_MSM(self): + dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs) + trajs_test, trajs_train = cvsplit_dtrajs(self.trajs) + + methods = ('VAMP1', 'VAMP2', 'VAMPE') + + for m in methods: + msm_train = estimate_markov_model(dtrajs=dtrajs_train, lag=self.lag, reversible=False) + score_msm = msm_train.score(dtrajs_test, score_method=m, score_k=None) - sE = self.vamp.score(score='E') - np.testing.assert_allclose(s1 + 1, Nnuc) # see paper appendix H.2 + vamp_train = pyemma_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) + score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) - # TODO: test cross score + self.assertAlmostEqual(score_msm, score_vamp, places=2, msg=m) if __name__ == "__main__": unittest.main() diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 83a6cf3b7..bead6ea15 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -153,30 +153,93 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f # compute future expectation return Q.dot(P)[:, 0] - def score(self, other=None, score='E'): + def _diagionalize(self, scaling=None): + """ performs SVD on covariance matrices and save left, right singular vectors and values in the model. + + Parameters + ---------- + scaling: str or None + + """ + + L0 = spd_inv_sqrt(self.C00) + Lt = spd_inv_sqrt(self.Ctt) + A = L0.T.dot(self.C0t).dot(Lt) + + Uprime, s, Vprimeh = np.linalg.svd(A, compute_uv=True) + + # compute cumulative variance + cumvar = np.cumsum(s ** 2) + cumvar /= cumvar[-1] + + self._L0 = L0 + self._Lt = Lt + + m = self.dimension() + + U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left + V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right + + # scale vectors + if scaling is None: + pass + elif scaling in ['km', 'kinetic map']: + U *= s[np.newaxis, 0:m] ## TODO: check left/right, ask Hao + V *= s[np.newaxis, 0:m] ## TODO: check left/right, ask Hao + else: + raise ValueError('unexpected value (%s) of "scaling"' % scaling) + + self.U = U + self.singular_values = s + self.V = V + + def score(self, test_model=None, score_method='VAMP2'): + """ + + Parameters + ---------- + test_model + score_method : str, optional, default='VAMP2' + Overwrite scoring method to be used if desired. If `None`, the estimators scoring + method will be used. + Available scores are based on the variational approach for Markov processes [1]_ [2]_ : + + * 'VAMP1' Sum of singular values of the symmetrized transition matrix [2]_ . + If the MSM is reversible, this is equal to the sum of transition + matrix eigenvalues, also called Rayleigh quotient [1]_ [3]_ . + * 'VAMP2' Sum of squared singular values of the symmetrized transition matrix [2]_ . + If the MSM is reversible, this is equal to the kinetic variance [4]_ . + * 'VAMPE' ... + + Returns + ------- + + """ # TODO: test me! # TODO: implement for TICA too # TODO: check compatibility of models, e.g. equal lag time, equal features? - if other is None: - other = self + if test_model is None: + test_model = self Uk = self.U[:, 0:self.dimension()] Vk = self.V[:, 0:self.dimension()] - if score == 1 or score == 2: - A = spd_inv_sqrt(Uk.T.dot(other.C00).dot(Uk)) - B = Uk.T.dot(other.C0t).dot(Vk) - C = spd_inv_sqrt(Vk.T.dot(other.Ctt).dot(Vk)) + res = None + if score_method == 'VAMP1' or score_method == 'VAMP2': + A = spd_inv_sqrt(Uk.T.dot(test_model.C00).dot(Uk)) + B = Uk.T.dot(test_model.C0t).dot(Vk) + C = spd_inv_sqrt(Vk.T.dot(test_model.Ctt).dot(Vk)) ABC = mdot(A, B, C) - if score == 1: - return np.linalg.norm(ABC, ord='nuc') - elif score == 2: - return np.linalg.norm(ABC, ord='fro')**2 - elif score == 'E' or score == 'e': + if score_method == 'VAMP1': + res = np.linalg.norm(ABC, ord='nuc') + elif score_method == 'VAMP2': + res = np.linalg.norm(ABC, ord='fro')**2 + elif score_method == 'VAMPE': Sk = np.diag(self.singular_values[0:self.dimension()]) - return np.trace(2.0*mdot(Vk, Sk, Uk.T, other.C0t) - mdot(Vk, Sk, Uk.T, other.C00, Uk, Sk, Vk.T, other.Ctt)) + res = np.trace(2.0 * mdot(Vk, Sk, Uk.T, test_model.C0t) - mdot(Vk, Sk, Uk.T, test_model.C00, Uk, Sk, Vk.T, test_model.Ctt)) else: - raise ValueError('"score" should be one of 1, 2 or "E"') - # TODO: add the contribution (+1) of the constant singular functions to the result? - + raise ValueError('"score" should be one of VAMP1, VAMP2 or VAMPE') + # add the contribution (+1) of the constant singular functions to the result + assert res + return res + 1 @decorator def _lazy_estimation(func, *args, **kw): @@ -260,10 +323,9 @@ def _estimate(self, iterable, **kw): ncov_max=self.ncov_max) indim = iterable.dimension() - if isinstance(self.dim, int): - if not self.dim <= indim: - raise RuntimeError("requested more output dimensions (%i) than dimension" - " of input data (%i)" % (self.dim, indim)) + if isinstance(self.dim, int) and not self.dim <= indim: + raise RuntimeError("requested more output dimensions (%i) than dimension" + " of input data (%i)" % (self.dim, indim)) if self._logger_is_active(self._loglevel_DEBUG): self._logger.debug("Running VAMP with tau=%i; Estimating two covariance matrices" @@ -319,41 +381,8 @@ def partial_fit(self, X): def _diagonalize(self): # diagonalize with low rank approximation self._logger.debug("diagonalize covariance matrices") - - mean_0 = self._covar.mean - mean_t = self._covar.mean_tau - L0 = spd_inv_sqrt(self._covar.C00_) - Lt = spd_inv_sqrt(self._covar.Ctt_) - A = L0.T.dot(self._covar.C0t_).dot(Lt) - - Uprime, s, Vprimeh = np.linalg.svd(A, compute_uv=True) - - # compute cumulative variance - cumvar = np.cumsum(s ** 2) - cumvar /= cumvar[-1] - - self._L0 = L0 - self._Lt = Lt - self._model.update_model_params(cumvar=cumvar, singular_values=s, mean_0=mean_0, mean_t=mean_t) - - m = self._model.dimension() - - U = L0.dot(Uprime[:, :m]) # U in the paper singular_vectors_left - V = Lt.dot(Vprimeh[:m, :].T) # V in the paper singular_vectors_right - - # scale vectors - if self.scaling is None: - pass - elif self.scaling in ['km', 'kinetic map']: - U *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao - V *= self.singular_values[np.newaxis, :] ## TODO: check left/right, ask Hao - else: - raise ValueError('unexpected value (%s) of "scaling"' % self.scaling) - - self._logger.debug("finished diagonalisation.") - - self._model.update_model_params(U=U, V=V) - + self.model._diagonolize(self.scaling) + self._logger.debug("finished diagonalization.") self._estimated = True def dimension(self): @@ -490,11 +519,15 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= ck.estimate(iterable) return ck - def score(self, other=None, score='E'): - if other is None: - return self.model.score(None, score=score) + def score(self, test_data=None, score_method='VAMP2'): + from pyemma._ext.sklearn.base import clone as clone_estimator + est = clone_estimator(self) + + if test_data is None: + return self.model.score(None, score_method=score_method) else: - return self.model.score(other.model, score=score) + est.estimate(test_data) + return self.model.score(est.model, score_method=score_method) class VAMPChapmanKolmogorovValidator(LaggedModelValidator): From b6a4776f24ec635ffb787228302c3d82125bda44 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 9 Oct 2017 17:34:27 +0200 Subject: [PATCH 15/43] [msm] fix parameter type in docstring --- pyemma/msm/estimators/maximum_likelihood_msm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyemma/msm/estimators/maximum_likelihood_msm.py b/pyemma/msm/estimators/maximum_likelihood_msm.py index e68b56dd3..3120b1141 100644 --- a/pyemma/msm/estimators/maximum_likelihood_msm.py +++ b/pyemma/msm/estimators/maximum_likelihood_msm.py @@ -228,7 +228,7 @@ def score(self, dtrajs, score_method=None, score_k=None): score_method : str Overwrite scoring method if desired. If `None`, the estimators scoring method will be used. See __init__ for documentation. - score_k : str + score_k : int or None Overwrite scoring rank if desired. If `None`, the estimators scoring rank will be used. See __init__ for documentation. score_method : str, optional, default='VAMP2' From 23b55736d2c275077988613770c9ae9ee6f2d541 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 9 Oct 2017 18:25:57 +0200 Subject: [PATCH 16/43] fixed tests, moved lazy estimation to model --- pyemma/coordinates/tests/test_vamp.py | 13 ++-- pyemma/coordinates/transform/vamp.py | 86 +++++++++++++++++++-------- 2 files changed, 69 insertions(+), 30 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 98b30e172..d97cde6af 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -252,12 +252,15 @@ def test_self_score_with_MSM(self): np.testing.assert_allclose(s1, Nnuc) # TODO: check why this is not equal - #sE = self.vamp.score(score_method='VAMPE') - #np.testing.assert_allclose(sE, Nnuc) # see paper appendix H.2 + sE = self.vamp.score(score_method='VAMPE') + np.testing.assert_allclose(sE, NFro) # see paper appendix H.2 def test_score_vs_MSM(self): - dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs) - trajs_test, trajs_train = cvsplit_dtrajs(self.trajs) + from pyemma.util.contexts import numpy_random_seed + with numpy_random_seed(32): + trajs_test, trajs_train = cvsplit_dtrajs(self.trajs) + with numpy_random_seed(32): + dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs) methods = ('VAMP1', 'VAMP2', 'VAMPE') @@ -268,7 +271,7 @@ def test_score_vs_MSM(self): vamp_train = pyemma_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) - self.assertAlmostEqual(score_msm, score_vamp, places=2, msg=m) + self.assertAlmostEqual(score_msm, score_vamp, places=3, msg=m) if __name__ == "__main__": unittest.main() diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index bead6ea15..455ff22a9 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -21,7 +21,7 @@ from __future__ import absolute_import import numpy as np -from decorator import decorator + from pyemma._base.model import Model from pyemma.util.annotators import fix_docs from pyemma.util.types import ensure_ndarray_or_None, ensure_ndarray @@ -44,18 +44,64 @@ def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_ self.C00 = C00 self.Ctt = Ctt self.C0t = C0t - self.U = U - self.V = V - self.singular_values = singular_values + self._svd_performed = False + self._U = U + self._V = V + self._singular_values = singular_values self.cumvar = cumvar self.dim = dim self.epsilon = epsilon + @property + def U(self): + if not self._svd_performed: + self._diagonalize() + return self._U + + @property + def V(self): + if not self._svd_performed: + self._diagonalize() + return self._V + + @property + def singular_values(self): + if not self._svd_performed: + self._diagonalize() + return self._singular_values + + @property + def C00(self): + return self._C00 + + @C00.setter + def C00(self, val): + self._svd_performed = False + self._C00 = val + + @property + def C0t(self): + return self._C0t + + @C0t.setter + def C0t(self, val): + self._svd_performed = False + self._C0t = val + + @property + def Ctt(self): + return self._Ctt + + @Ctt.setter + def Ctt(self, val): + self._svd_performed = False + self._Ctt = val + def dimension(self): """ output dimension """ if self.dim is None or self.dim == 1.0: - if hasattr(self, 'singular_values') and self.singular_values is not None: - return np.count_nonzero(self.singular_values > self.epsilon) + if hasattr(self, '_singular_values') and self._singular_values is not None: + return np.count_nonzero(self._singular_values > self.epsilon) else: raise RuntimeError('Requested dimension, but the dimension depends on the singular values and the ' 'transformer has not yet been estimated. Call estimate() before.') @@ -66,8 +112,8 @@ def dimension(self): raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' 'transformer has not yet been estimated. Call estimate() before.') else: - if hasattr(self, 'singular_values') and self.singular_values is not None: - return min(np.min(np.count_nonzero(self.singular_values > self.epsilon)), self.dim) + if hasattr(self, '_singular_values') and self._singular_values is not None: + return min(np.min(np.count_nonzero(self._singular_values > self.epsilon)), self.dim) else: warnings.warn( RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' @@ -153,7 +199,7 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f # compute future expectation return Q.dot(P)[:, 0] - def _diagionalize(self, scaling=None): + def _diagonalize(self, scaling=None): """ performs SVD on covariance matrices and save left, right singular vectors and values in the model. Parameters @@ -167,10 +213,12 @@ def _diagionalize(self, scaling=None): A = L0.T.dot(self.C0t).dot(Lt) Uprime, s, Vprimeh = np.linalg.svd(A, compute_uv=True) + self._singular_values = s # compute cumulative variance cumvar = np.cumsum(s ** 2) cumvar /= cumvar[-1] + self.cumvar = cumvar self._L0 = L0 self._Lt = Lt @@ -189,9 +237,9 @@ def _diagionalize(self, scaling=None): else: raise ValueError('unexpected value (%s) of "scaling"' % scaling) - self.U = U - self.singular_values = s - self.V = V + self._U = U + self._V = V + self._svd_performed = True def score(self, test_model=None, score_method='VAMP2'): """ @@ -241,14 +289,6 @@ def score(self, test_model=None, score_method='VAMP2'): assert res return res + 1 -@decorator -def _lazy_estimation(func, *args, **kw): - assert isinstance(args[0], VAMP) - tica_obj = args[0] - if not tica_obj._estimated: - tica_obj._diagonalize() - return func(*args, **kw) - @fix_docs class VAMP(StreamingEstimationTransformer): @@ -381,7 +421,7 @@ def partial_fit(self, X): def _diagonalize(self): # diagonalize with low rank approximation self._logger.debug("diagonalize covariance matrices") - self.model._diagonolize(self.scaling) + self.model._diagonalize(self.scaling) self._logger.debug("finished diagonalization.") self._estimated = True @@ -416,7 +456,6 @@ def output_type(self): return StreamingEstimationTransformer.output_type(self) @property - @_lazy_estimation def singular_values(self): r"""Singular values of VAMP (usually denoted :math:`\sigma`) @@ -427,7 +466,6 @@ def singular_values(self): return self._model.singular_values @property - @_lazy_estimation def singular_vectors_right(self): r"""Right singular vectors V of the VAMP problem, columnwise @@ -440,7 +478,6 @@ def singular_vectors_right(self): return self._model.V @property - @_lazy_estimation def singular_vectors_left(self): r"""Left singular vectors U of the VAMP problem, columnwise @@ -453,7 +490,6 @@ def singular_vectors_left(self): return self._model.U @property - @_lazy_estimation def cumvar(self): r"""Cumulative sum of the squared and normalized VAMP singular values From 1d74319d919d7581d30719d26a838d0000a65d61 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Mon, 23 Oct 2017 16:04:07 +0200 Subject: [PATCH 17/43] [vamp] bugfixes --- pyemma/coordinates/transform/vamp.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 455ff22a9..d9f7e7e48 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -99,7 +99,7 @@ def Ctt(self, val): def dimension(self): """ output dimension """ - if self.dim is None or self.dim == 1.0: + if self.dim is None or (isinstance(self.dim, float) and self.dim == 1.0): if hasattr(self, '_singular_values') and self._singular_values is not None: return np.count_nonzero(self._singular_values > self.epsilon) else: @@ -557,6 +557,10 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= def score(self, test_data=None, score_method='VAMP2'): from pyemma._ext.sklearn.base import clone as clone_estimator + # drop reference to LaggedCovariance to avoid problems during cloning + # In future pyemma versions, this will be no longer a problem... + self._covar = None + est = clone_estimator(self) if test_data is None: From 7def433946d0c1123181ce204b608c7afe1a0de2 Mon Sep 17 00:00:00 2001 From: marscher Date: Thu, 1 Feb 2018 17:22:52 +0100 Subject: [PATCH 18/43] updated to devel, made serializable, removed obsolete hacks. --- devtools/ci/travis/install_miniconda.sh | 2 +- devtools/conda-recipe/meta.yaml | 4 +- pyemma/__init__.py | 1 + pyemma/_base/estimator.py | 9 +- pyemma/_base/loggable.py | 1 - pyemma/_base/model.py | 2 +- pyemma/_ext/sklearn/base.py | 1 - pyemma/_ext/sklearn/parameter_search.py | 1 - pyemma/_ext/variational/__init__.py | 1 - .../_ext/variational/estimators/__init__.py | 1 - pyemma/_ext/variational/estimators/moments.py | 1 - .../estimators/tests/benchmark_moments.py | 4 +- .../estimators/tests/test_moments.py | 3 +- .../estimators/tests/test_running_moments.py | 1 - pyemma/_ext/variational/solvers/direct.py | 1 - .../variational/solvers/tests/test_direct.py | 1 - pyemma/coordinates/acf.py | 1 - pyemma/coordinates/clustering/assign.py | 1 - pyemma/coordinates/clustering/interface.py | 1 - pyemma/coordinates/clustering/kmeans.py | 1 + pyemma/coordinates/clustering/regspace.py | 1 - .../clustering/tests/test_assign.py | 2 +- .../clustering/tests/test_cluster.py | 3 +- .../clustering/tests/test_cluster_samples.py | 3 +- .../clustering/tests/test_kmeans.py | 1 - .../tests/test_mini_batch_kmeans.py | 1 - .../clustering/tests/test_regspace.py | 1 - .../clustering/tests/test_uniform_time.py | 3 +- pyemma/coordinates/clustering/uniform_time.py | 1 - pyemma/coordinates/data/_base/iterable.py | 1 - .../data/_base/random_accessible.py | 1 + .../data/_base/streaming_estimator.py | 1 - pyemma/coordinates/data/_base/transformer.py | 4 +- pyemma/coordinates/data/data_in_memory.py | 1 - pyemma/coordinates/data/feature_reader.py | 1 - .../data/featurization/featurizer.py | 1 - pyemma/coordinates/data/numpy_filereader.py | 1 - pyemma/coordinates/data/py_csv_reader.py | 1 - pyemma/coordinates/data/sources_merger.py | 2 +- .../coordinates/data/util/frames_from_file.py | 1 - pyemma/coordinates/data/util/reader_utils.py | 14 +- .../coordinates/data/util/traj_info_cache.py | 1 - pyemma/coordinates/estimation/covariance.py | 4 +- pyemma/coordinates/estimation/koopman.py | 1 - pyemma/coordinates/pipelines.py | 1 - pyemma/coordinates/tests/__init__.py | 1 - pyemma/coordinates/tests/test_acf.py | 5 +- .../coordinates/tests/test_covar_estimator.py | 1 - pyemma/coordinates/tests/test_csvreader.py | 1 - pyemma/coordinates/tests/test_datainmemory.py | 1 - pyemma/coordinates/tests/test_discretizer.py | 1 - .../coordinates/tests/test_featurereader.py | 1 - .../tests/test_featurereader_and_tica.py | 2 - .../test_featurereader_and_tica_projection.py | 2 - pyemma/coordinates/tests/test_featurizer.py | 1 + .../tests/test_frames_from_file.py | 1 - .../coordinates/tests/test_numpyfilereader.py | 2 - pyemma/coordinates/tests/test_pca.py | 1 - pyemma/coordinates/tests/test_pipeline.py | 2 - .../tests/test_random_access_stride.py | 3 +- pyemma/coordinates/tests/test_save_traj.py | 1 - pyemma/coordinates/tests/test_save_trajs.py | 3 +- pyemma/coordinates/tests/test_source.py | 3 +- pyemma/coordinates/tests/test_stride.py | 4 +- pyemma/coordinates/tests/test_tica.py | 1 - .../coordinates/tests/test_traj_info_cache.py | 3 +- pyemma/coordinates/transform/pca.py | 1 - pyemma/coordinates/transform/tica.py | 1 - pyemma/coordinates/transform/vamp.py | 24 +-- pyemma/coordinates/util/patches.py | 7 +- pyemma/coordinates/util/stat.py | 3 +- pyemma/datasets/api.py | 1 - pyemma/datasets/double_well_discrete.py | 5 +- pyemma/datasets/double_well_thermo.py | 1 - pyemma/msm/__init__.py | 1 - pyemma/msm/api.py | 1 - pyemma/msm/estimators/__init__.py | 3 +- pyemma/msm/estimators/_dtraj_stats.py | 3 + pyemma/msm/estimators/bayesian_hmsm.py | 1 - pyemma/msm/estimators/bayesian_msm.py | 1 - .../msm/estimators/lagged_model_validators.py | 2 +- .../msm/estimators/maximum_likelihood_hmsm.py | 2 +- pyemma/msm/models/__init__.py | 1 - pyemma/msm/models/hmsm.py | 3 +- pyemma/msm/models/hmsm_sampled.py | 1 - pyemma/msm/models/msm.py | 3 +- pyemma/msm/models/reactive_flux.py | 2 - pyemma/msm/tests/birth_death_chain.py | 3 +- pyemma/msm/tests/test_amm.py | 3 +- pyemma/msm/tests/test_bayesian_hmsm.py | 1 - pyemma/msm/tests/test_bayesian_msm.py | 1 - pyemma/msm/tests/test_cktest.py | 1 - pyemma/msm/tests/test_estimator.py | 2 +- pyemma/msm/tests/test_hmsm.py | 1 - pyemma/msm/tests/test_its.py | 1 - pyemma/msm/tests/test_its_oom.py | 3 +- pyemma/msm/tests/test_msm.py | 1 - pyemma/msm/tests/test_oom_msm.py | 3 +- pyemma/msm/tests/test_tpt.py | 3 +- pyemma/plots/__init__.py | 1 - pyemma/plots/markovtests.py | 1 - pyemma/plots/networks.py | 1 - pyemma/plots/plots2d.py | 1 - pyemma/plots/tests/test_its.py | 3 +- pyemma/plots/tests/test_markovtests.py | 1 - pyemma/plots/tests/test_networks.py | 1 - pyemma/plots/tests/test_plots2d.py | 1 - pyemma/plots/timescales.py | 1 - pyemma/thermo/tests/test_TRAM.py | 1 - pyemma/util/__init__.py | 1 - pyemma/util/annotators.py | 1 - pyemma/util/debug.py | 1 - pyemma/util/files.py | 3 +- pyemma/util/indices.py | 1 - pyemma/util/linalg.py | 1 - pyemma/util/log.py | 1 - pyemma/util/numeric.py | 1 - pyemma/util/reflection.py | 139 +++++++----------- pyemma/util/statistics.py | 1 - pyemma/util/tests/statistics_test.py | 3 +- .../util/tests/test_discrete_trajectories.py | 3 +- pyemma/util/tests/test_shortcut.py | 3 +- pyemma/util/types.py | 4 +- setup.py | 8 +- 124 files changed, 142 insertions(+), 261 deletions(-) diff --git a/devtools/ci/travis/install_miniconda.sh b/devtools/ci/travis/install_miniconda.sh index ace5acd15..76b5ac968 100755 --- a/devtools/ci/travis/install_miniconda.sh +++ b/devtools/ci/travis/install_miniconda.sh @@ -30,5 +30,5 @@ else # if it does not exist, we need to install miniconda fi # we want to have an up to date conda-build. -conda install conda-build=3.2 +conda install conda-build=3 conda info -a # for debugging diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml index 6095c74dd..598f9ba54 100644 --- a/devtools/conda-recipe/meta.yaml +++ b/devtools/conda-recipe/meta.yaml @@ -42,10 +42,11 @@ requirements: - numpy >=1.11,<1.14 # [win and py36] - pathos - psutil >3.1 - - python >=3 + - python - pyyaml - scipy - setuptools + - six >=1.10 - thermotools >=0.2.6 - tqdm @@ -56,6 +57,7 @@ test: files: - matplotlibrc requires: + - h5py - pytest - pytest-cov # TODO: disabled on win64, until https://bugs.python.org/issue31701 is fixed. diff --git a/pyemma/__init__.py b/pyemma/__init__.py index ec4344440..332b6962b 100644 --- a/pyemma/__init__.py +++ b/pyemma/__init__.py @@ -21,6 +21,7 @@ PyEMMA - Emma's Markov Model Algorithms ======================================= """ +from __future__ import absolute_import # set version from versioneer. from ._version import get_versions diff --git a/pyemma/_base/estimator.py b/pyemma/_base/estimator.py index 081c9da08..77453efe3 100644 --- a/pyemma/_base/estimator.py +++ b/pyemma/_base/estimator.py @@ -300,7 +300,8 @@ def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=N raise ValueError("length mismatch: evaluate ({}) and evaluate_args ({})".format(len(evaluate), len(evaluate_args))) if progress_reporter is not None and show_progress: - progress_reporter._progress_register(len(estimators), stage=0, description="estimating %s" % str(estimator.__class__.__name__)) + progress_reporter._progress_register(len(estimators), stage=0, + description="estimating %s" % str(estimator.__class__.__name__)) if n_jobs > 1 and os.name == 'posix': if hasattr(estimators[0], 'logger'): @@ -350,6 +351,12 @@ def error_callback(*args, **kw): estimators[0].logger.debug('estimating %s with n_jobs=1 because of the setting or ' 'you not have a POSIX system', estimator) res = [] + if progress_reporter is not None: + from pyemma._base.model import SampledModel + if isinstance(estimator, SampledModel): + for e in estimators: + e.show_progress = False + for estimator, param_set in zip(estimators, param_sets): res.append(_estimate_param_scan_worker(estimator, param_set, X, evaluate, evaluate_args, failfast, return_exceptions)) diff --git a/pyemma/_base/loggable.py b/pyemma/_base/loggable.py index f4d3d9a07..542dda2c5 100644 --- a/pyemma/_base/loggable.py +++ b/pyemma/_base/loggable.py @@ -21,7 +21,6 @@ @author: marscher ''' -from __future__ import absolute_import import logging import weakref from itertools import count diff --git a/pyemma/_base/model.py b/pyemma/_base/model.py index 94bdeba71..2399b0dc4 100644 --- a/pyemma/_base/model.py +++ b/pyemma/_base/model.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as _np import warnings @@ -102,7 +103,6 @@ def get_model_params(self, deep=True): # catch deprecated param values. # This is set in utils/__init__.py but it gets overwritten # when running under python3 somehow. - from pyemma.util.exceptions import PyEMMA_DeprecationWarning warnings.simplefilter("always", DeprecationWarning) warnings.simplefilter("always", PyEMMA_DeprecationWarning) try: diff --git a/pyemma/_ext/sklearn/base.py b/pyemma/_ext/sklearn/base.py index c2de6d350..b305ac3e7 100644 --- a/pyemma/_ext/sklearn/base.py +++ b/pyemma/_ext/sklearn/base.py @@ -12,7 +12,6 @@ Base classes for all estimators. """ -from __future__ import absolute_import # Author: Gael Varoquaux # License: BSD 3 clause diff --git a/pyemma/_ext/sklearn/parameter_search.py b/pyemma/_ext/sklearn/parameter_search.py index 0e452a1f0..0c5d4a86f 100644 --- a/pyemma/_ext/sklearn/parameter_search.py +++ b/pyemma/_ext/sklearn/parameter_search.py @@ -12,7 +12,6 @@ Parameter estimation tools """ -from __future__ import absolute_import # Author: Alexandre Gramfort , # Gael Varoquaux # Andreas Mueller diff --git a/pyemma/_ext/variational/__init__.py b/pyemma/_ext/variational/__init__.py index f0257fdc2..e3e473432 100644 --- a/pyemma/_ext/variational/__init__.py +++ b/pyemma/_ext/variational/__init__.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import __author__ = 'noe' # import subpackages such that they are available after the main package import diff --git a/pyemma/_ext/variational/estimators/__init__.py b/pyemma/_ext/variational/estimators/__init__.py index 796068eb7..2aee16ab1 100644 --- a/pyemma/_ext/variational/estimators/__init__.py +++ b/pyemma/_ext/variational/estimators/__init__.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import from .moments import moments_XX, moments_XXXY, moments_block from .moments import covar, covars diff --git a/pyemma/_ext/variational/estimators/moments.py b/pyemma/_ext/variational/estimators/moments.py index 1124c6350..422145b12 100644 --- a/pyemma/_ext/variational/estimators/moments.py +++ b/pyemma/_ext/variational/estimators/moments.py @@ -71,7 +71,6 @@ of the mean if needed. """ -from __future__ import absolute_import __author__ = 'noe' diff --git a/pyemma/_ext/variational/estimators/tests/benchmark_moments.py b/pyemma/_ext/variational/estimators/tests/benchmark_moments.py index 1c3c078a0..563cd5df2 100644 --- a/pyemma/_ext/variational/estimators/tests/benchmark_moments.py +++ b/pyemma/_ext/variational/estimators/tests/benchmark_moments.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import -from __future__ import print_function __author__ = 'noe' import time @@ -158,4 +156,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/pyemma/_ext/variational/estimators/tests/test_moments.py b/pyemma/_ext/variational/estimators/tests/test_moments.py index 4345ffc95..9359e6657 100644 --- a/pyemma/_ext/variational/estimators/tests/test_moments.py +++ b/pyemma/_ext/variational/estimators/tests/test_moments.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import unittest import numpy as np from .. import moments @@ -332,4 +331,4 @@ def test_moments_XY_weighted_sym_sparseconst(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/_ext/variational/estimators/tests/test_running_moments.py b/pyemma/_ext/variational/estimators/tests/test_running_moments.py index aa0adebeb..4fa9923ab 100644 --- a/pyemma/_ext/variational/estimators/tests/test_running_moments.py +++ b/pyemma/_ext/variational/estimators/tests/test_running_moments.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import unittest import numpy as np from .. import running_moments diff --git a/pyemma/_ext/variational/solvers/direct.py b/pyemma/_ext/variational/solvers/direct.py index 51e8df1ef..469194d53 100644 --- a/pyemma/_ext/variational/solvers/direct.py +++ b/pyemma/_ext/variational/solvers/direct.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import numpy as _np from ..util import ZeroRankError as _ZeroRankError diff --git a/pyemma/_ext/variational/solvers/tests/test_direct.py b/pyemma/_ext/variational/solvers/tests/test_direct.py index ae062f914..d3a916e29 100644 --- a/pyemma/_ext/variational/solvers/tests/test_direct.py +++ b/pyemma/_ext/variational/solvers/tests/test_direct.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import unittest import numpy as np from .. import direct diff --git a/pyemma/coordinates/acf.py b/pyemma/coordinates/acf.py index 86eb8d489..746bcde3d 100644 --- a/pyemma/coordinates/acf.py +++ b/pyemma/coordinates/acf.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import, print_function import numpy as np import sys diff --git a/pyemma/coordinates/clustering/assign.py b/pyemma/coordinates/clustering/assign.py index da343f453..bb4f84a38 100644 --- a/pyemma/coordinates/clustering/assign.py +++ b/pyemma/coordinates/clustering/assign.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import import numpy as np diff --git a/pyemma/coordinates/clustering/interface.py b/pyemma/coordinates/clustering/interface.py index 25bc4d575..babc8bec8 100644 --- a/pyemma/coordinates/clustering/interface.py +++ b/pyemma/coordinates/clustering/interface.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import import os diff --git a/pyemma/coordinates/clustering/kmeans.py b/pyemma/coordinates/clustering/kmeans.py index a36c83f5f..68fa580a0 100644 --- a/pyemma/coordinates/clustering/kmeans.py +++ b/pyemma/coordinates/clustering/kmeans.py @@ -35,6 +35,7 @@ from pyemma.util.units import bytes_to_string from pyemma.util.contexts import random_seed +from six.moves import range import numpy as np diff --git a/pyemma/coordinates/clustering/regspace.py b/pyemma/coordinates/clustering/regspace.py index 8677c4930..80413061e 100644 --- a/pyemma/coordinates/clustering/regspace.py +++ b/pyemma/coordinates/clustering/regspace.py @@ -23,7 +23,6 @@ @author: marscher ''' -from __future__ import absolute_import import warnings diff --git a/pyemma/coordinates/clustering/tests/test_assign.py b/pyemma/coordinates/clustering/tests/test_assign.py index 94d8e04d9..dab49c7b1 100644 --- a/pyemma/coordinates/clustering/tests/test_assign.py +++ b/pyemma/coordinates/clustering/tests/test_assign.py @@ -24,7 +24,7 @@ from mock import patch from pyemma.util.files import TemporaryDirectory from logging import getLogger - +from six.moves import range import numpy as np import pyemma.coordinates as coor import pyemma.util.types as types diff --git a/pyemma/coordinates/clustering/tests/test_cluster.py b/pyemma/coordinates/clustering/tests/test_cluster.py index 725d07365..8bb701ecc 100644 --- a/pyemma/coordinates/clustering/tests/test_cluster.py +++ b/pyemma/coordinates/clustering/tests/test_cluster.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import import unittest import os import tempfile @@ -196,4 +195,4 @@ def setUpClass(cls): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/clustering/tests/test_cluster_samples.py b/pyemma/coordinates/clustering/tests/test_cluster_samples.py index a2a6ea660..6b6ce90f0 100644 --- a/pyemma/coordinates/clustering/tests/test_cluster_samples.py +++ b/pyemma/coordinates/clustering/tests/test_cluster_samples.py @@ -24,7 +24,6 @@ @author: gph82, clonker """ -from __future__ import absolute_import import unittest @@ -72,4 +71,4 @@ def test_sample_indexes_by_state(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/clustering/tests/test_kmeans.py b/pyemma/coordinates/clustering/tests/test_kmeans.py index f21ae9107..56ff9e37d 100644 --- a/pyemma/coordinates/clustering/tests/test_kmeans.py +++ b/pyemma/coordinates/clustering/tests/test_kmeans.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import os import random diff --git a/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py b/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py index 400d755ed..745c722da 100644 --- a/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py +++ b/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import unittest from unittest import TestCase import numpy as np diff --git a/pyemma/coordinates/clustering/tests/test_regspace.py b/pyemma/coordinates/clustering/tests/test_regspace.py index 6cff2ee12..c4fcee9e3 100644 --- a/pyemma/coordinates/clustering/tests/test_regspace.py +++ b/pyemma/coordinates/clustering/tests/test_regspace.py @@ -23,7 +23,6 @@ @author: marscher ''' -from __future__ import absolute_import import itertools import unittest diff --git a/pyemma/coordinates/clustering/tests/test_uniform_time.py b/pyemma/coordinates/clustering/tests/test_uniform_time.py index 442ff8ca6..2d51e4207 100644 --- a/pyemma/coordinates/clustering/tests/test_uniform_time.py +++ b/pyemma/coordinates/clustering/tests/test_uniform_time.py @@ -23,7 +23,6 @@ @author: marscher ''' -from __future__ import absolute_import import unittest import numpy as np @@ -69,4 +68,4 @@ def test_big_k(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/clustering/uniform_time.py b/pyemma/coordinates/clustering/uniform_time.py index 329d2549b..70db3f683 100644 --- a/pyemma/coordinates/clustering/uniform_time.py +++ b/pyemma/coordinates/clustering/uniform_time.py @@ -17,7 +17,6 @@ # along with this program. If not, see . -from __future__ import absolute_import, division import math diff --git a/pyemma/coordinates/data/_base/iterable.py b/pyemma/coordinates/data/_base/iterable.py index d67eb3006..e3a07b6fa 100644 --- a/pyemma/coordinates/data/_base/iterable.py +++ b/pyemma/coordinates/data/_base/iterable.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import print_function from abc import ABCMeta, abstractmethod import numpy as np import six diff --git a/pyemma/coordinates/data/_base/random_accessible.py b/pyemma/coordinates/data/_base/random_accessible.py index 61c35ff43..652e6ecea 100644 --- a/pyemma/coordinates/data/_base/random_accessible.py +++ b/pyemma/coordinates/data/_base/random_accessible.py @@ -2,6 +2,7 @@ import numpy as np import numbers + import six diff --git a/pyemma/coordinates/data/_base/streaming_estimator.py b/pyemma/coordinates/data/_base/streaming_estimator.py index 89258e636..7576d5eda 100644 --- a/pyemma/coordinates/data/_base/streaming_estimator.py +++ b/pyemma/coordinates/data/_base/streaming_estimator.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import from pyemma._base.estimator import Estimator from pyemma.coordinates.data import DataInMemory diff --git a/pyemma/coordinates/data/_base/transformer.py b/pyemma/coordinates/data/_base/transformer.py index 089da7a59..204ae616b 100644 --- a/pyemma/coordinates/data/_base/transformer.py +++ b/pyemma/coordinates/data/_base/transformer.py @@ -16,12 +16,10 @@ # along with this program. If not, see . -from __future__ import absolute_import from abc import ABCMeta, abstractmethod import numpy as np -import six from pyemma._ext.sklearn.base import TransformerMixin from pyemma.coordinates.data._base.datasource import DataSource, DataSourceIterator @@ -35,7 +33,7 @@ __author__ = 'noe, marscher' -class Transformer(six.with_metaclass(ABCMeta, TransformerMixin)): +class Transformer(TransformerMixin, metaclass=ABCMeta): """ A transformer takes data and transforms it """ @abstractmethod diff --git a/pyemma/coordinates/data/data_in_memory.py b/pyemma/coordinates/data/data_in_memory.py index de19e2191..6ffbc1855 100644 --- a/pyemma/coordinates/data/data_in_memory.py +++ b/pyemma/coordinates/data/data_in_memory.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import functools import numbers diff --git a/pyemma/coordinates/data/feature_reader.py b/pyemma/coordinates/data/feature_reader.py index 69741c02f..c01c809a2 100644 --- a/pyemma/coordinates/data/feature_reader.py +++ b/pyemma/coordinates/data/feature_reader.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import mdtraj import numpy as np diff --git a/pyemma/coordinates/data/featurization/featurizer.py b/pyemma/coordinates/data/featurization/featurizer.py index 496f3e564..eedb3cd6b 100644 --- a/pyemma/coordinates/data/featurization/featurizer.py +++ b/pyemma/coordinates/data/featurization/featurizer.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import warnings diff --git a/pyemma/coordinates/data/numpy_filereader.py b/pyemma/coordinates/data/numpy_filereader.py index 3855f8e9b..006a7e287 100644 --- a/pyemma/coordinates/data/numpy_filereader.py +++ b/pyemma/coordinates/data/numpy_filereader.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import functools diff --git a/pyemma/coordinates/data/py_csv_reader.py b/pyemma/coordinates/data/py_csv_reader.py index 8914a8b32..d17196862 100644 --- a/pyemma/coordinates/data/py_csv_reader.py +++ b/pyemma/coordinates/data/py_csv_reader.py @@ -20,7 +20,6 @@ @author: marscher """ -from __future__ import absolute_import import csv import os diff --git a/pyemma/coordinates/data/sources_merger.py b/pyemma/coordinates/data/sources_merger.py index 1e663352f..9472d1750 100644 --- a/pyemma/coordinates/data/sources_merger.py +++ b/pyemma/coordinates/data/sources_merger.py @@ -21,7 +21,7 @@ class SourcesMerger(DataSource, SerializableMixIn): chunk: int chunk size to use for underlying iterators. """ - def __init__(self, sources, chunk=5000): + def __init__(self, sources: [list, tuple], chunk=5000): super(SourcesMerger, self).__init__(chunksize=chunk) self.sources = sources self._is_reader = True diff --git a/pyemma/coordinates/data/util/frames_from_file.py b/pyemma/coordinates/data/util/frames_from_file.py index 7f0cc6a56..23694e44c 100644 --- a/pyemma/coordinates/data/util/frames_from_file.py +++ b/pyemma/coordinates/data/util/frames_from_file.py @@ -15,7 +15,6 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import itertools from logging import getLogger diff --git a/pyemma/coordinates/data/util/reader_utils.py b/pyemma/coordinates/data/util/reader_utils.py index db1024639..7e1e52aff 100644 --- a/pyemma/coordinates/data/util/reader_utils.py +++ b/pyemma/coordinates/data/util/reader_utils.py @@ -23,6 +23,7 @@ import numpy as np import os +from six import string_types def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): r""" @@ -43,8 +44,7 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): from pyemma.coordinates.data.py_csv_reader import PyCSVReader from pyemma.coordinates.data import FeatureReader from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader - import six - str = six.string_types + # fragmented trajectories if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and @@ -52,15 +52,15 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer) # normal trajectories - if (isinstance(input_files, str) + if (isinstance(input_files, string_types) or (isinstance(input_files, (list, tuple)) - and (any(isinstance(item, str) for item in input_files) + and (any(isinstance(item, string_types) for item in input_files) or len(input_files) is 0))): reader = None # check: if single string create a one-element list - if isinstance(input_files, str): + if isinstance(input_files, string_types): input_list = [input_files] - elif len(input_files) > 0 and all(isinstance(item, str) for item in input_files): + elif len(input_files) > 0 and all(isinstance(item, string_types) for item in input_files): input_list = input_files else: if len(input_files) is 0: @@ -177,7 +177,7 @@ def preallocate_empty_trajectory(top, n_frames=1): def enforce_top(top): - if isinstance(top, str): + if isinstance(top, string_types): top = md.load(top).top elif isinstance(top, md.Trajectory): top = top.top diff --git a/pyemma/coordinates/data/util/traj_info_cache.py b/pyemma/coordinates/data/util/traj_info_cache.py index 2a0c55acd..d6915fd95 100644 --- a/pyemma/coordinates/data/util/traj_info_cache.py +++ b/pyemma/coordinates/data/util/traj_info_cache.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import hashlib import os diff --git a/pyemma/coordinates/estimation/covariance.py b/pyemma/coordinates/estimation/covariance.py index 2405f3200..4eb12e739 100644 --- a/pyemma/coordinates/estimation/covariance.py +++ b/pyemma/coordinates/estimation/covariance.py @@ -77,8 +77,8 @@ class LaggedCovariance(StreamingEstimator): Use only every stride-th time step. By default, every time step is used. skip : int, optional, default=0 skip the first initial n frames per trajectory. - chunksize : deprecated, default=NoTImplemented - The chunk size can be se during estimation. + chunksize : deprecated, default=NotImplemented + The chunk size can be set during estimation. """ def __init__(self, c00=True, c0t=False, ctt=False, remove_constant_mean=None, remove_data_mean=False, reversible=False, diff --git a/pyemma/coordinates/estimation/koopman.py b/pyemma/coordinates/estimation/koopman.py index b97c2cc65..b685d1eac 100644 --- a/pyemma/coordinates/estimation/koopman.py +++ b/pyemma/coordinates/estimation/koopman.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as np import scipy.linalg as scl diff --git a/pyemma/coordinates/pipelines.py b/pyemma/coordinates/pipelines.py index 64cdadca4..82b26d6b1 100644 --- a/pyemma/coordinates/pipelines.py +++ b/pyemma/coordinates/pipelines.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import from logging import getLogger diff --git a/pyemma/coordinates/tests/__init__.py b/pyemma/coordinates/tests/__init__.py index ccfa38071..b19ccf90e 100644 --- a/pyemma/coordinates/tests/__init__.py +++ b/pyemma/coordinates/tests/__init__.py @@ -16,4 +16,3 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import diff --git a/pyemma/coordinates/tests/test_acf.py b/pyemma/coordinates/tests/test_acf.py index 209bc3beb..2d139fdc5 100644 --- a/pyemma/coordinates/tests/test_acf.py +++ b/pyemma/coordinates/tests/test_acf.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import import unittest import numpy as np @@ -43,6 +42,6 @@ def test(self): refacf /= refacf[0] # normalize np.testing.assert_allclose(refacf, testacf) - + if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/tests/test_covar_estimator.py b/pyemma/coordinates/tests/test_covar_estimator.py index 9cdd9a4ed..cf80a397a 100644 --- a/pyemma/coordinates/tests/test_covar_estimator.py +++ b/pyemma/coordinates/tests/test_covar_estimator.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/coordinates/tests/test_csvreader.py b/pyemma/coordinates/tests/test_csvreader.py index 4c1a5996f..10db86cda 100644 --- a/pyemma/coordinates/tests/test_csvreader.py +++ b/pyemma/coordinates/tests/test_csvreader.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import numpy as np import unittest diff --git a/pyemma/coordinates/tests/test_datainmemory.py b/pyemma/coordinates/tests/test_datainmemory.py index fd1dce01d..b24c828f0 100644 --- a/pyemma/coordinates/tests/test_datainmemory.py +++ b/pyemma/coordinates/tests/test_datainmemory.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import pyemma diff --git a/pyemma/coordinates/tests/test_discretizer.py b/pyemma/coordinates/tests/test_discretizer.py index 13605841b..2e189494a 100644 --- a/pyemma/coordinates/tests/test_discretizer.py +++ b/pyemma/coordinates/tests/test_discretizer.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import import os import tempfile import unittest diff --git a/pyemma/coordinates/tests/test_featurereader.py b/pyemma/coordinates/tests/test_featurereader.py index 1a88015e9..8d70c68be 100644 --- a/pyemma/coordinates/tests/test_featurereader.py +++ b/pyemma/coordinates/tests/test_featurereader.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import glob import tempfile diff --git a/pyemma/coordinates/tests/test_featurereader_and_tica.py b/pyemma/coordinates/tests/test_featurereader_and_tica.py index f79edcfc6..047ccfc10 100644 --- a/pyemma/coordinates/tests/test_featurereader_and_tica.py +++ b/pyemma/coordinates/tests/test_featurereader_and_tica.py @@ -22,9 +22,7 @@ @author: Fabian Paul ''' -from __future__ import print_function -from __future__ import absolute_import import unittest import os import tempfile diff --git a/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py b/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py index 4b62d2e6e..98bb7e29b 100644 --- a/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py +++ b/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py @@ -23,8 +23,6 @@ @author: Fabian Paul ''' -from __future__ import absolute_import -from __future__ import print_function import os import tempfile diff --git a/pyemma/coordinates/tests/test_featurizer.py b/pyemma/coordinates/tests/test_featurizer.py index 484affc4d..f6f894748 100644 --- a/pyemma/coordinates/tests/test_featurizer.py +++ b/pyemma/coordinates/tests/test_featurizer.py @@ -1,3 +1,4 @@ + # This file is part of PyEMMA. # # Copyright (c) 2015, 2014 Computational Molecular Biology Group, Freie Universitaet Berlin (GER) diff --git a/pyemma/coordinates/tests/test_frames_from_file.py b/pyemma/coordinates/tests/test_frames_from_file.py index dbca37879..3a5c97840 100644 --- a/pyemma/coordinates/tests/test_frames_from_file.py +++ b/pyemma/coordinates/tests/test_frames_from_file.py @@ -24,7 +24,6 @@ @author: gph82, clonker ''' -from __future__ import absolute_import import pkg_resources import unittest diff --git a/pyemma/coordinates/tests/test_numpyfilereader.py b/pyemma/coordinates/tests/test_numpyfilereader.py index 0076f629c..8862c525e 100644 --- a/pyemma/coordinates/tests/test_numpyfilereader.py +++ b/pyemma/coordinates/tests/test_numpyfilereader.py @@ -22,8 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import -from __future__ import print_function import shutil import tempfile diff --git a/pyemma/coordinates/tests/test_pca.py b/pyemma/coordinates/tests/test_pca.py index 77f55f982..c4901e8d0 100644 --- a/pyemma/coordinates/tests/test_pca.py +++ b/pyemma/coordinates/tests/test_pca.py @@ -23,7 +23,6 @@ @author: marscher ''' -from __future__ import absolute_import import unittest import os import pkg_resources diff --git a/pyemma/coordinates/tests/test_pipeline.py b/pyemma/coordinates/tests/test_pipeline.py index 833c31431..5558a6588 100644 --- a/pyemma/coordinates/tests/test_pipeline.py +++ b/pyemma/coordinates/tests/test_pipeline.py @@ -18,9 +18,7 @@ -from __future__ import print_function -from __future__ import absolute_import import unittest import os diff --git a/pyemma/coordinates/tests/test_random_access_stride.py b/pyemma/coordinates/tests/test_random_access_stride.py index 9732ed828..26f8e8aa2 100644 --- a/pyemma/coordinates/tests/test_random_access_stride.py +++ b/pyemma/coordinates/tests/test_random_access_stride.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import os import tempfile @@ -469,7 +468,7 @@ def test_RA_high_stride(self): for ext in savable_formats_mdtra_18: traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0] - from mock import patch + from unittest.mock import patch # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride. with patch('pyemma.coordinates.util.patches.iterload.MEMORY_CUTOFF', n_bytes - 1): r = coor.source(traj, top=get_top()) diff --git a/pyemma/coordinates/tests/test_save_traj.py b/pyemma/coordinates/tests/test_save_traj.py index 461df3581..9b9595e9f 100644 --- a/pyemma/coordinates/tests/test_save_traj.py +++ b/pyemma/coordinates/tests/test_save_traj.py @@ -24,7 +24,6 @@ @author: gph82, clonker """ -from __future__ import absolute_import import unittest import os diff --git a/pyemma/coordinates/tests/test_save_trajs.py b/pyemma/coordinates/tests/test_save_trajs.py index f9d3c7ddd..c41d41079 100644 --- a/pyemma/coordinates/tests/test_save_trajs.py +++ b/pyemma/coordinates/tests/test_save_trajs.py @@ -24,7 +24,6 @@ @author: gph82, clonker """ -from __future__ import absolute_import import unittest import os @@ -165,4 +164,4 @@ def test_out_of_bound_indexes(self): save_trajs(self.reader, self.sets, outfiles=self.one_pass_files) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/tests/test_source.py b/pyemma/coordinates/tests/test_source.py index a7e383b3c..b779721b7 100644 --- a/pyemma/coordinates/tests/test_source.py +++ b/pyemma/coordinates/tests/test_source.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import import unittest import os import numpy as np @@ -177,4 +176,4 @@ def test_trajfiles(self): assert types.is_list_of_string(self.inp.filenames) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/tests/test_stride.py b/pyemma/coordinates/tests/test_stride.py index 7cc064c22..8252e6471 100644 --- a/pyemma/coordinates/tests/test_stride.py +++ b/pyemma/coordinates/tests/test_stride.py @@ -18,9 +18,7 @@ -from __future__ import print_function -from __future__ import absolute_import import unittest import os import tempfile @@ -120,4 +118,4 @@ def tearDownClass(cls): super(TestStride, cls).tearDownClass() if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/coordinates/tests/test_tica.py b/pyemma/coordinates/tests/test_tica.py index 3219f6f45..de81ca9a9 100644 --- a/pyemma/coordinates/tests/test_tica.py +++ b/pyemma/coordinates/tests/test_tica.py @@ -23,7 +23,6 @@ @author: marscher """ -from __future__ import absolute_import import unittest import os import pkg_resources diff --git a/pyemma/coordinates/tests/test_traj_info_cache.py b/pyemma/coordinates/tests/test_traj_info_cache.py index a7eaa0797..c578e7c3b 100644 --- a/pyemma/coordinates/tests/test_traj_info_cache.py +++ b/pyemma/coordinates/tests/test_traj_info_cache.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import, print_function from tempfile import NamedTemporaryFile @@ -28,7 +27,7 @@ import tempfile import unittest -import mock +from unittest import mock from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader diff --git a/pyemma/coordinates/transform/pca.py b/pyemma/coordinates/transform/pca.py index 15dd449c8..58967f9dc 100644 --- a/pyemma/coordinates/transform/pca.py +++ b/pyemma/coordinates/transform/pca.py @@ -17,7 +17,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import math diff --git a/pyemma/coordinates/transform/tica.py b/pyemma/coordinates/transform/tica.py index e5dcfb403..0153e25db 100644 --- a/pyemma/coordinates/transform/tica.py +++ b/pyemma/coordinates/transform/tica.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import numpy as np from decorator import decorator diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index d9f7e7e48..ac6dbcc30 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -23,6 +23,7 @@ import numpy as np from pyemma._base.model import Model +from pyemma._base.serialization.serialization import SerializableMixIn from pyemma.util.annotators import fix_docs from pyemma.util.types import ensure_ndarray_or_None, ensure_ndarray from pyemma._ext.variational.solvers.direct import spd_inv_sqrt @@ -36,9 +37,11 @@ __all__ = ['VAMP'] -class VAMPModel(Model): - # TODO: remove dummy when bugfix from Martin is committed - def set_model_params(self, dummy, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_values, cumvar, dim, epsilon): +class VAMPModel(Model, SerializableMixIn): + __serialize_version = 0 + __serialize_fields = ('_U', '_V', '_svd_performed') + + def set_model_params(self, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_values, cumvar, dim, epsilon): self.mean_0 = mean_0 self.mean_t = mean_t self.C00 = C00 @@ -291,9 +294,11 @@ def score(self, test_model=None, score_method='VAMP2'): @fix_docs -class VAMP(StreamingEstimationTransformer): +class VAMP(StreamingEstimationTransformer, SerializableMixIn): r"""Variational approach for Markov processes (VAMP)""" + __serialize_version = 0 + def describe(self): return "[VAMP, lag = %i; max. output dim. = %s]" % (self._lag, str(self.dim)) @@ -519,10 +524,6 @@ def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_f def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True, iterable=None): - # drop reference to LaggedCovariance to avoid problems during cloning - # In future pyemma versions, this will be no longer a problem... - self._covar = None - if n_observables is not None: if n_observables > self.dimension(): warnings.warn('Selected singular functions as observables but dimension ' @@ -557,10 +558,6 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= def score(self, test_data=None, score_method='VAMP2'): from pyemma._ext.sklearn.base import clone as clone_estimator - # drop reference to LaggedCovariance to avoid problems during cloning - # In future pyemma versions, this will be no longer a problem... - self._covar = None - est = clone_estimator(self) if test_data is None: @@ -571,6 +568,9 @@ def score(self, test_data=None, score_method='VAMP2'): class VAMPChapmanKolmogorovValidator(LaggedModelValidator): + __serialize_version = 0 + __serialize_fields = ('nsets', 'statistics', 'observables', 'observables_mean_free', 'statistics_mean_free') + def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, mlags=10, n_jobs=1, show_progress=True): LaggedModelValidator.__init__(self, model, estimator, mlags=mlags, diff --git a/pyemma/coordinates/util/patches.py b/pyemma/coordinates/util/patches.py index 849e27bff..82978c2ed 100644 --- a/pyemma/coordinates/util/patches.py +++ b/pyemma/coordinates/util/patches.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import from collections import namedtuple @@ -274,7 +273,7 @@ def _random_access_generator(self, f): def _read_traj_data(atom_indices, f, n_frames, **kwargs): """ - + Parameters ---------- atom_indices @@ -285,7 +284,7 @@ def _read_traj_data(atom_indices, f, n_frames, **kwargs): Returns ------- data : TrajData(xyz, unitcell_length, unitcell_angles, box) - + Format read() return values: amber_netcdf_restart_f: xyz [Ang], time, cell_l, cell_a amber restart: xyz[Ang], time, cell_l, cell_a @@ -303,7 +302,7 @@ def _read_traj_data(atom_indices, f, n_frames, **kwargs): trr: xyz[nm], time, step, box (n, 3, 3), lambd? xtc: xyz[nm], time, step, box - + xyz: xyz lh5: xyz [nm] arc: xyz[Ang] diff --git a/pyemma/coordinates/util/stat.py b/pyemma/coordinates/util/stat.py index 9d8b4eed4..615d574e6 100644 --- a/pyemma/coordinates/util/stat.py +++ b/pyemma/coordinates/util/stat.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import import numpy as np from pyemma.util.annotators import deprecated @@ -88,4 +87,4 @@ def histogram(transform, dimensions, nbins): for _, chunk in transform: part, _ = np.histogramdd(chunk[:, dimensions], bins=bins) res += part - return res, bins \ No newline at end of file + return res, bins diff --git a/pyemma/datasets/api.py b/pyemma/datasets/api.py index 663a9bb8e..c73ab564f 100644 --- a/pyemma/datasets/api.py +++ b/pyemma/datasets/api.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import from pyemma.datasets.double_well_thermo import DoubleWellSampler as _DWS __author__ = 'noe' diff --git a/pyemma/datasets/double_well_discrete.py b/pyemma/datasets/double_well_discrete.py index 984d3f460..2b1710300 100644 --- a/pyemma/datasets/double_well_discrete.py +++ b/pyemma/datasets/double_well_discrete.py @@ -16,7 +16,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +<<<<<<< HEAD from __future__ import absolute_import +======= +>>>>>>> dea94f91... updated to devel, made serializable, removed obsolete hacks. __author__ = 'noe' @@ -86,4 +89,4 @@ def generate_traj(self, N, start=None, stop=None, dt=1): def generate_trajs(self, M, N, start=None, stop=None, dt=1): """ Generates M random trajectories of length N each with time step dt """ from msmtools.generation import generate_trajs - return generate_trajs(self._P, M, N, start=start, stop=stop, dt=dt) \ No newline at end of file + return generate_trajs(self._P, M, N, start=start, stop=stop, dt=dt) diff --git a/pyemma/datasets/double_well_thermo.py b/pyemma/datasets/double_well_thermo.py index aa0326625..840583913 100644 --- a/pyemma/datasets/double_well_thermo.py +++ b/pyemma/datasets/double_well_thermo.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as _np from pyemma.coordinates import assign_to_centers as _assign_to_centers diff --git a/pyemma/msm/__init__.py b/pyemma/msm/__init__.py index 9f6739baf..bf8d5dc5e 100644 --- a/pyemma/msm/__init__.py +++ b/pyemma/msm/__init__.py @@ -78,7 +78,6 @@ """ -from __future__ import absolute_import as _ ###################################################### from msmtools.analysis.dense.pcca import PCCA diff --git a/pyemma/msm/api.py b/pyemma/msm/api.py index b4f7d1fd4..f4658e5ee 100644 --- a/pyemma/msm/api.py +++ b/pyemma/msm/api.py @@ -20,7 +20,6 @@ """ -from __future__ import absolute_import from .estimators import MaximumLikelihoodHMSM as _ML_HMSM from .estimators import BayesianMSM as _Bayes_MSM from .estimators import BayesianHMSM as _Bayes_HMSM diff --git a/pyemma/msm/estimators/__init__.py b/pyemma/msm/estimators/__init__.py index b562a3a9e..3d0008970 100644 --- a/pyemma/msm/estimators/__init__.py +++ b/pyemma/msm/estimators/__init__.py @@ -16,12 +16,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import __author__ = 'noe' from .maximum_likelihood_msm import MaximumLikelihoodMSM from .maximum_likelihood_msm import OOMReweightedMSM -from .maximum_likelihood_msm import AugmentedMarkovModel +from .maximum_likelihood_msm import AugmentedMarkovModel from .bayesian_msm import BayesianMSM from .maximum_likelihood_hmsm import MaximumLikelihoodHMSM from .bayesian_hmsm import BayesianHMSM diff --git a/pyemma/msm/estimators/_dtraj_stats.py b/pyemma/msm/estimators/_dtraj_stats.py index da6d1e61d..5678bc305 100644 --- a/pyemma/msm/estimators/_dtraj_stats.py +++ b/pyemma/msm/estimators/_dtraj_stats.py @@ -16,7 +16,10 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +<<<<<<< HEAD from __future__ import absolute_import +======= +>>>>>>> dea94f91... updated to devel, made serializable, removed obsolete hacks. import numpy as np diff --git a/pyemma/msm/estimators/bayesian_hmsm.py b/pyemma/msm/estimators/bayesian_hmsm.py index d644302e1..9dfb578d3 100644 --- a/pyemma/msm/estimators/bayesian_hmsm.py +++ b/pyemma/msm/estimators/bayesian_hmsm.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import, print_function import numpy as _np diff --git a/pyemma/msm/estimators/bayesian_msm.py b/pyemma/msm/estimators/bayesian_msm.py index 8e42e42c5..321206d81 100644 --- a/pyemma/msm/estimators/bayesian_msm.py +++ b/pyemma/msm/estimators/bayesian_msm.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import diff --git a/pyemma/msm/estimators/lagged_model_validators.py b/pyemma/msm/estimators/lagged_model_validators.py index 27f193d60..da45352cf 100644 --- a/pyemma/msm/estimators/lagged_model_validators.py +++ b/pyemma/msm/estimators/lagged_model_validators.py @@ -17,7 +17,7 @@ # along with this program. If not, see . from __future__ import absolute_import - +from six.moves import range import math import numpy as np diff --git a/pyemma/msm/estimators/maximum_likelihood_hmsm.py b/pyemma/msm/estimators/maximum_likelihood_hmsm.py index d659e1d3c..5e77ab21c 100644 --- a/pyemma/msm/estimators/maximum_likelihood_hmsm.py +++ b/pyemma/msm/estimators/maximum_likelihood_hmsm.py @@ -17,7 +17,7 @@ # along with this program. If not, see . from __future__ import absolute_import -# +from six.moves import range from pyemma.util.annotators import alias, aliased, fix_docs import numpy as _np diff --git a/pyemma/msm/models/__init__.py b/pyemma/msm/models/__init__.py index 0251324d6..7db732578 100644 --- a/pyemma/msm/models/__init__.py +++ b/pyemma/msm/models/__init__.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import __author__ = 'noe' from .msm import MSM diff --git a/pyemma/msm/models/hmsm.py b/pyemma/msm/models/hmsm.py index 40b27d43d..7df98f1fc 100644 --- a/pyemma/msm/models/hmsm.py +++ b/pyemma/msm/models/hmsm.py @@ -25,7 +25,6 @@ """ -from __future__ import absolute_import import numpy as _np @@ -479,4 +478,4 @@ def simulate(self, N, start=None, stop=None, dt=1): # for each time step, sample microstate for t, h in enumerate(htraj): otraj[t] = output_distributions[h].rvs() # current cluster - return htraj, otraj \ No newline at end of file + return htraj, otraj diff --git a/pyemma/msm/models/hmsm_sampled.py b/pyemma/msm/models/hmsm_sampled.py index 80188e07d..5f2e1298b 100644 --- a/pyemma/msm/models/hmsm_sampled.py +++ b/pyemma/msm/models/hmsm_sampled.py @@ -25,7 +25,6 @@ """ -from __future__ import absolute_import from pyemma._base.model import SampledModel as _SampledModel from pyemma.msm.models.hmsm import HMSM as _HMSM diff --git a/pyemma/msm/models/msm.py b/pyemma/msm/models/msm.py index 3e53d33cc..54d61bdc2 100644 --- a/pyemma/msm/models/msm.py +++ b/pyemma/msm/models/msm.py @@ -25,7 +25,8 @@ """ -from __future__ import absolute_import + +from pyemma._base.serialization.serialization import SerializableMixIn from pyemma._base.serialization.serialization import SerializableMixIn diff --git a/pyemma/msm/models/reactive_flux.py b/pyemma/msm/models/reactive_flux.py index 08a5b22f1..08902c301 100644 --- a/pyemma/msm/models/reactive_flux.py +++ b/pyemma/msm/models/reactive_flux.py @@ -21,8 +21,6 @@ __moduleauthor__ = "Benjamin Trendelkamp-Schroer, Frank Noe" """ -from __future__ import absolute_import -from __future__ import division import numpy as np from msmtools import flux as tptapi diff --git a/pyemma/msm/tests/birth_death_chain.py b/pyemma/msm/tests/birth_death_chain.py index bd3c6e3f4..3c9655c4b 100644 --- a/pyemma/msm/tests/birth_death_chain.py +++ b/pyemma/msm/tests/birth_death_chain.py @@ -23,7 +23,6 @@ """ -from __future__ import absolute_import import numpy as np @@ -270,4 +269,4 @@ def rate(self, a, b): pi = self.stationary_distribution() qminus = self.committor_backward(a, b) kAB = F / (pi * qminus).sum() - return kAB \ No newline at end of file + return kAB diff --git a/pyemma/msm/tests/test_amm.py b/pyemma/msm/tests/test_amm.py index e22a6364e..16f4993e3 100644 --- a/pyemma/msm/tests/test_amm.py +++ b/pyemma/msm/tests/test_amm.py @@ -334,8 +334,7 @@ def test_simulate_MSM(self): # ---------------------------------- def test_two_state_kinetics(self): - #pass - self._two_state_kinetics(self.amm, eps=0.01) + self._two_state_kinetics(self.amm) @unittest.skipIf(six.PY2, 'only py3') def test_serialize(self): diff --git a/pyemma/msm/tests/test_bayesian_hmsm.py b/pyemma/msm/tests/test_bayesian_hmsm.py index 73b9170ec..66d35763a 100644 --- a/pyemma/msm/tests/test_bayesian_hmsm.py +++ b/pyemma/msm/tests/test_bayesian_hmsm.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import unittest import numpy as np from pyemma.msm import bayesian_hidden_markov_model diff --git a/pyemma/msm/tests/test_bayesian_msm.py b/pyemma/msm/tests/test_bayesian_msm.py index 7d78501a7..c3324753a 100644 --- a/pyemma/msm/tests/test_bayesian_msm.py +++ b/pyemma/msm/tests/test_bayesian_msm.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import unittest import numpy as np from pyemma.msm import bayesian_markov_model diff --git a/pyemma/msm/tests/test_cktest.py b/pyemma/msm/tests/test_cktest.py index 74ecc7ec1..be1d7df83 100644 --- a/pyemma/msm/tests/test_cktest.py +++ b/pyemma/msm/tests/test_cktest.py @@ -23,7 +23,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_estimator.py b/pyemma/msm/tests/test_estimator.py index 3979e8e50..8fea1e95f 100644 --- a/pyemma/msm/tests/test_estimator.py +++ b/pyemma/msm/tests/test_estimator.py @@ -16,7 +16,7 @@ # along with this program. If not, see . import unittest -import mock +from unittest import mock from pyemma import msm from functools import wraps diff --git a/pyemma/msm/tests/test_hmsm.py b/pyemma/msm/tests/test_hmsm.py index fc665a092..f03516785 100644 --- a/pyemma/msm/tests/test_hmsm.py +++ b/pyemma/msm/tests/test_hmsm.py @@ -21,7 +21,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_its.py b/pyemma/msm/tests/test_its.py index 0507003b9..651389497 100644 --- a/pyemma/msm/tests/test_its.py +++ b/pyemma/msm/tests/test_its.py @@ -24,7 +24,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np from pyemma import msm diff --git a/pyemma/msm/tests/test_its_oom.py b/pyemma/msm/tests/test_its_oom.py index 997caa8cc..516f9278a 100644 --- a/pyemma/msm/tests/test_its_oom.py +++ b/pyemma/msm/tests/test_its_oom.py @@ -20,7 +20,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np @@ -163,4 +162,4 @@ def test_ignore_errors(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/msm/tests/test_msm.py b/pyemma/msm/tests/test_msm.py index fb85cd68a..d137d0645 100644 --- a/pyemma/msm/tests/test_msm.py +++ b/pyemma/msm/tests/test_msm.py @@ -24,7 +24,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_oom_msm.py b/pyemma/msm/tests/test_oom_msm.py index 69e2a872a..8cc0da5f0 100644 --- a/pyemma/msm/tests/test_oom_msm.py +++ b/pyemma/msm/tests/test_oom_msm.py @@ -21,7 +21,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np @@ -1651,4 +1650,4 @@ def test_simulate_MSM(self): assert (start == traj[0]) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/msm/tests/test_tpt.py b/pyemma/msm/tests/test_tpt.py index 4311627ab..647457c0a 100644 --- a/pyemma/msm/tests/test_tpt.py +++ b/pyemma/msm/tests/test_tpt.py @@ -24,7 +24,6 @@ """ -from __future__ import absolute_import import unittest import numpy as np from pyemma.util.numeric import assert_allclose @@ -248,4 +247,4 @@ def test_time_units(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/plots/__init__.py b/pyemma/plots/__init__.py index 962666ca7..f98e3d6d6 100644 --- a/pyemma/plots/__init__.py +++ b/pyemma/plots/__init__.py @@ -62,7 +62,6 @@ NetworkPlot """ -from __future__ import absolute_import from .timescales import plot_implied_timescales from .plots2d import contour, scatter_contour, plot_free_energy from .networks import plot_markov_model, plot_flux, plot_network, NetworkPlot diff --git a/pyemma/plots/markovtests.py b/pyemma/plots/markovtests.py index e527a3888..1d16b6deb 100644 --- a/pyemma/plots/markovtests.py +++ b/pyemma/plots/markovtests.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import __author__ = 'noe' import math diff --git a/pyemma/plots/networks.py b/pyemma/plots/networks.py index fa33e9f4c..80d6a18cb 100644 --- a/pyemma/plots/networks.py +++ b/pyemma/plots/networks.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import numpy as _np import warnings from pyemma.util import types as _types diff --git a/pyemma/plots/plots2d.py b/pyemma/plots/plots2d.py index 4dc4e6d74..ded54a33e 100644 --- a/pyemma/plots/plots2d.py +++ b/pyemma/plots/plots2d.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as _np diff --git a/pyemma/plots/tests/test_its.py b/pyemma/plots/tests/test_its.py index 2cc3e0411..07786ecca 100644 --- a/pyemma/plots/tests/test_its.py +++ b/pyemma/plots/tests/test_its.py @@ -22,7 +22,6 @@ @author: gph82 ''' -from __future__ import absolute_import import unittest import numpy as np @@ -61,4 +60,4 @@ def test_process(self): if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/plots/tests/test_markovtests.py b/pyemma/plots/tests/test_markovtests.py index 1483b908c..19d1c7dd4 100644 --- a/pyemma/plots/tests/test_markovtests.py +++ b/pyemma/plots/tests/test_markovtests.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import import unittest import numpy as np import pyemma diff --git a/pyemma/plots/tests/test_networks.py b/pyemma/plots/tests/test_networks.py index 3c80ab908..a3ce51392 100644 --- a/pyemma/plots/tests/test_networks.py +++ b/pyemma/plots/tests/test_networks.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/plots/tests/test_plots2d.py b/pyemma/plots/tests/test_plots2d.py index 3e8905a96..9d51d38a8 100644 --- a/pyemma/plots/tests/test_plots2d.py +++ b/pyemma/plots/tests/test_plots2d.py @@ -16,7 +16,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/plots/timescales.py b/pyemma/plots/timescales.py index 11ec1ff08..bc5571313 100644 --- a/pyemma/plots/timescales.py +++ b/pyemma/plots/timescales.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as _np diff --git a/pyemma/thermo/tests/test_TRAM.py b/pyemma/thermo/tests/test_TRAM.py index 9cbf80ba4..fd3d5c2a6 100644 --- a/pyemma/thermo/tests/test_TRAM.py +++ b/pyemma/thermo/tests/test_TRAM.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import unittest diff --git a/pyemma/util/__init__.py b/pyemma/util/__init__.py index 97f3473a8..9bfb66daf 100644 --- a/pyemma/util/__init__.py +++ b/pyemma/util/__init__.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import from ._config import Config as _Config # default config instance diff --git a/pyemma/util/annotators.py b/pyemma/util/annotators.py index 5843d4fc9..8356a567c 100644 --- a/pyemma/util/annotators.py +++ b/pyemma/util/annotators.py @@ -15,7 +15,6 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import warnings from decorator import decorator, decorate diff --git a/pyemma/util/debug.py b/pyemma/util/debug.py index f6bece8b1..e4361e381 100644 --- a/pyemma/util/debug.py +++ b/pyemma/util/debug.py @@ -26,7 +26,6 @@ @author: marscher ''' -from __future__ import absolute_import, print_function import signal from logging import getLogger diff --git a/pyemma/util/files.py b/pyemma/util/files.py index 3dd5b894d..20574a372 100644 --- a/pyemma/util/files.py +++ b/pyemma/util/files.py @@ -22,7 +22,6 @@ @author: marscher ''' -from __future__ import absolute_import, print_function import os import errno @@ -70,4 +69,4 @@ def __enter__(self): return self.tmpdir def __exit__(self, *args): - shutil.rmtree(self.tmpdir, ignore_errors=True) \ No newline at end of file + shutil.rmtree(self.tmpdir, ignore_errors=True) diff --git a/pyemma/util/indices.py b/pyemma/util/indices.py index 81607f4b9..d2d7d5cfc 100644 --- a/pyemma/util/indices.py +++ b/pyemma/util/indices.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as np diff --git a/pyemma/util/linalg.py b/pyemma/util/linalg.py index 51be657aa..56243a4b1 100644 --- a/pyemma/util/linalg.py +++ b/pyemma/util/linalg.py @@ -17,7 +17,6 @@ # along with this program. If not, see . -from __future__ import absolute_import import numpy as np import scipy.linalg import scipy.sparse diff --git a/pyemma/util/log.py b/pyemma/util/log.py index 219000ce9..c164daadc 100644 --- a/pyemma/util/log.py +++ b/pyemma/util/log.py @@ -20,7 +20,6 @@ @author: marscher ''' -from __future__ import absolute_import import logging from logging.config import dictConfig diff --git a/pyemma/util/numeric.py b/pyemma/util/numeric.py index d2cdd830b..897b48f64 100644 --- a/pyemma/util/numeric.py +++ b/pyemma/util/numeric.py @@ -21,7 +21,6 @@ @author: marscher ''' -from __future__ import absolute_import from numpy.testing import assert_allclose as assert_allclose_np __all__ = ['assert_allclose', diff --git a/pyemma/util/reflection.py b/pyemma/util/reflection.py index 9bfb42145..dd50365cd 100644 --- a/pyemma/util/reflection.py +++ b/pyemma/util/reflection.py @@ -16,11 +16,8 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import division, print_function, absolute_import import inspect -import six -#from six import string_types from collections import namedtuple __author__ = 'noe, marscher' @@ -39,89 +36,57 @@ # This way, the caller code does not need to know whether it uses a legacy # .getargspec or bright and shiny .signature. -try: - # is it python 3.3 or higher? - inspect.signature - - # Apparently, yes. Wrap inspect.signature - - ArgSpec = namedtuple('ArgSpec', ['args', 'varargs', 'keywords', 'defaults']) - - def getargspec_no_self(func): - """inspect.getargspec replacement using inspect.signature. - - inspect.getargspec is deprecated in python 3. This is a replacement - based on the (new in python 3.3) `inspect.signature`. - - Parameters - ---------- - func : callable - A callable to inspect - - Returns - ------- - argspec : ArgSpec(args, varargs, varkw, defaults) - This is similar to the result of inspect.getargspec(func) under - python 2.x. - NOTE: if the first argument of `func` is self, it is *not*, I repeat - *not* included in argspec.args. - This is done for consistency between inspect.getargspec() under - python 2.x, and inspect.signature() under python 3.x. - """ - sig = inspect.signature(func) - args = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD - ] - varargs = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.VAR_POSITIONAL - ] - varargs = varargs[0] if varargs else None - varkw = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.VAR_KEYWORD - ] - varkw = varkw[0] if varkw else None - defaults = [ - p.default for p in sig.parameters.values() - if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and - p.default is not p.empty) - ] or None - - if args[0] == 'self': - args.pop(0) - - return ArgSpec(args, varargs, varkw, defaults) - -except AttributeError: - # python 2.x - def getargspec_no_self(func): - """inspect.getargspec replacement for compatibility with python 3.x. - - inspect.getargspec is deprecated in python 3. This wraps it, and - *removes* `self` from the argument list of `func`, if present. - This is done for forward compatibility with python 3. - - Parameters - ---------- - func : callable - A callable to inspect - - Returns - ------- - argspec : ArgSpec(args, varargs, varkw, defaults) - This is similar to the result of inspect.getargspec(func) under - python 2.x. - NOTE: if the first argument of `func` is self, it is *not*, I repeat - *not* included in argspec.args. - This is done for consistency between inspect.getargspec() under - python 2.x, and inspect.signature() under python 3.x. - """ - argspec = inspect.getargspec(func) - if argspec.args[0] == 'self': - argspec.args.pop(0) - return argspec + +# Apparently, yes. Wrap inspect.signature + +ArgSpec = namedtuple('ArgSpec', ['args', 'varargs', 'keywords', 'defaults']) + +def getargspec_no_self(func): + """inspect.getargspec replacement using inspect.signature. + + inspect.getargspec is deprecated in python 3. This is a replacement + based on the (new in python 3.3) `inspect.signature`. + + Parameters + ---------- + func : callable + A callable to inspect + + Returns + ------- + argspec : ArgSpec(args, varargs, varkw, defaults) + This is similar to the result of inspect.getargspec(func) under + python 2.x. + NOTE: if the first argument of `func` is self, it is *not*, I repeat + *not* included in argspec.args. + This is done for consistency between inspect.getargspec() under + python 2.x, and inspect.signature() under python 3.x. + """ + sig = inspect.signature(func) + args = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD + ] + varargs = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_POSITIONAL + ] + varargs = varargs[0] if varargs else None + varkw = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_KEYWORD + ] + varkw = varkw[0] if varkw else None + defaults = [ + p.default for p in sig.parameters.values() + if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and + p.default is not p.empty) + ] or None + + if args[0] == 'self': + args.pop(0) + + return ArgSpec(args, varargs, varkw, defaults) def call_member(obj, f, *args, **kwargs): @@ -138,7 +103,7 @@ def call_member(obj, f, *args, **kwargs): in that case """ # get function name - if not isinstance(f, six.string_types): + if not isinstance(f, str): fname = f.__func__.__name__ else: fname = f diff --git a/pyemma/util/statistics.py b/pyemma/util/statistics.py index 0447a3784..646cb333f 100644 --- a/pyemma/util/statistics.py +++ b/pyemma/util/statistics.py @@ -23,7 +23,6 @@ @author: noe ''' -from __future__ import absolute_import import numpy as np import math diff --git a/pyemma/util/tests/statistics_test.py b/pyemma/util/tests/statistics_test.py index aa9dd0c8f..43b3b2274 100644 --- a/pyemma/util/tests/statistics_test.py +++ b/pyemma/util/tests/statistics_test.py @@ -23,7 +23,6 @@ @author: noe ''' -from __future__ import absolute_import import unittest from pyemma.util import statistics import numpy as np @@ -54,4 +53,4 @@ def test_confidence_interval(self): self.assertConfidence(np.random.normal(size=10000), 0.95, 0.01) if __name__ == "__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/util/tests/test_discrete_trajectories.py b/pyemma/util/tests/test_discrete_trajectories.py index d6df11411..b2984fd04 100644 --- a/pyemma/util/tests/test_discrete_trajectories.py +++ b/pyemma/util/tests/test_discrete_trajectories.py @@ -23,7 +23,6 @@ """ -from __future__ import absolute_import import os import unittest @@ -200,4 +199,4 @@ def test_sample_by_state_replace_subset(self): assert(dtraj[sidx[i][t,1]] == subset[i]) if __name__=="__main__": - unittest.main() \ No newline at end of file + unittest.main() diff --git a/pyemma/util/tests/test_shortcut.py b/pyemma/util/tests/test_shortcut.py index 780456333..aa68f9cdc 100644 --- a/pyemma/util/tests/test_shortcut.py +++ b/pyemma/util/tests/test_shortcut.py @@ -18,7 +18,6 @@ -from __future__ import absolute_import from pyemma.util.annotators import shortcut, aliased, alias import unittest @@ -53,4 +52,4 @@ def test_alias_class_inst(self): inst = Foo() assert hasattr(inst, "bar2") assert hasattr(inst, "bar3") - self.assertEqual(inst.bar.__doc__, inst.bar2.__doc__) \ No newline at end of file + self.assertEqual(inst.bar.__doc__, inst.bar2.__doc__) diff --git a/pyemma/util/types.py b/pyemma/util/types.py index ca3957134..ad0d827de 100644 --- a/pyemma/util/types.py +++ b/pyemma/util/types.py @@ -137,7 +137,7 @@ def is_float_array(l): return False def is_string(s): - return isinstance(s, str) + return isinstance(s, string_types) def is_iterable(I): return isinstance(I, collections.Iterable) @@ -147,7 +147,7 @@ def is_list(S): return isinstance(S, (list, tuple)) def is_list_of_string(S): - return isinstance(S, (list, tuple)) and (all(isinstance(s, str) for s in S)) + return isinstance(S, (list, tuple)) and (all(isinstance(s, string_types) for s in S)) def ensure_dtraj(dtraj): r"""Makes sure that dtraj is a discrete trajectory (array of int) diff --git a/setup.py b/setup.py index b2f41c2ce..b201bad95 100755 --- a/setup.py +++ b/setup.py @@ -59,6 +59,7 @@ Operating System :: MacOS :: MacOS X Operating System :: POSIX Operating System :: Microsoft :: Windows +Programming Language :: Python :: 2.7 Programming Language :: Python :: 3 Topic :: Scientific/Engineering :: Bio-Informatics Topic :: Scientific/Engineering :: Chemistry @@ -66,7 +67,12 @@ Topic :: Scientific/Engineering :: Physics """ - +from setup_util import lazy_cythonize +try: + from setuptools import setup, Extension, find_packages +except ImportError as ie: + print("PyEMMA requires setuptools. Please install it with conda or pip.") + sys.exit(1) ############################################################################### # Extensions From 28f3622fe958e4e8d8ca6c8aceccd83e4815a010 Mon Sep 17 00:00:00 2001 From: marscher Date: Thu, 1 Feb 2018 17:50:26 +0100 Subject: [PATCH 19/43] documentation [ci skip] --- pyemma/coordinates/api.py | 79 ++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 10 deletions(-) diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index f8d659562..78df6af59 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -1256,17 +1256,72 @@ def tica(data=None, lag=10, dim=-1, var_cutoff=0.95, kinetic_map=True, commute_m return res -def vamp(data=None, lag=10, dim=None, scaling=None, right=True, - stride=1, skip=0, ncov_max=float('inf')): +def vamp(data=None, lag=10, dim=None, scaling=None, right=True, ncov_max=float('inf'), + stride=1, skip=0, chunksize=None): + r""" Variational approach for Markov processes (VAMP) [1]_. + + Parameters + ---------- + lag : int + lag time + dim : float or int + Number of dimensions to keep: + * if dim is not set all available ranks are kept: + n_components == min(n_samples, n_features) + * if dim is an integer >= 1, this number specifies the number + of dimensions to keep. By default this will use the kinetic + variance. + * if dim is a float with ``0 < dim < 1``, select the number + of dimensions such that the amount of kinetic variance + that needs to be explained is greater than the percentage + specified by dim. + scaling : None or string + Scaling to be applied to the VAMP modes upon transformation + * None: no scaling will be applied, variance along the mode is 1 + * 'kinetic map' or 'km': modes are scaled by singular value + right : boolean + Whether to compute the right singular functions. + If right==True, get_output() will return the right singular + functions. Otherwise, get_output() will return the left singular + functions. + Beware that only frames[tau:, :] of each trajectory returned + by get_output() contain valid values of the right singular + functions. Conversely, only frames[0:-tau, :] of each + trajectory returned by get_output() contain valid values of + the left singular functions. The remaining frames might + possibly be interpreted as some extrapolation. + epsilon : float + singular value cutoff. Singular values of C0 with norms <= epsilon + will be cut off. The remaining number of singular values define + the size of the output. + stride: int, optional, default = 1 + Use only every stride-th time step. By default, every time step is used. + skip : int, default=0 + skip the first initial n frames per trajectory. + ncov_max : int, default=infinity + limit the memory usage of the algorithm from [3]_ to an amount that corresponds + to ncov_max additional copies of each correlation matrix + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for + computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. + """ from pyemma.coordinates.transform.vamp import VAMP res = VAMP(lag, dim=dim, scaling=scaling, right=right, skip=skip, ncov_max=ncov_max) - return _param_stage(data, res, stride=stride) + if data is not None: + res.estimate(data, stride=stride, chunksize=chunksize) + return res def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_mean=None, remove_data_mean=False, - reversible=False, bessel=True, lag=0, weights="empirical", stride=1, skip=0, chunksize=None): - """ - Compute lagged covariances between time series. If data is available as an array of size (TxN), where T is the + reversible=False, bessel=True, lag=0, weights="empirical", stride=1, skip=0, chunksize=None, + ncov_max=float('inf')): + r"""Compute lagged covariances between time series. If data is available as an array of size (TxN), where T is the number of time steps and N the number of dimensions, this function can compute lagged covariances like .. math:: @@ -1314,6 +1369,9 @@ def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_ to optimize thread usage and gain processing speed. If None is passed, use the default value of the underlying reader/data source. Choose zero to disable chunking at all. + ncov_max : int, default=infinity + limit the memory usage of the algorithm from [2]_ to an amount that corresponds + to ncov_max additional copies of each correlation matrix Returns ------- @@ -1322,8 +1380,9 @@ def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_ .. [1] Wu, H., Nueske, F., Paul, F., Klus, S., Koltai, P., and Noe, F. 2016. Bias reduced variational approximation of molecular kinetics from short off-equilibrium simulations. J. Chem. Phys. (submitted) - - """ + .. [2] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for + computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. + """ from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.estimation.koopman import _KoopmanEstimator @@ -1332,7 +1391,7 @@ def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_ if weights== "koopman": if data is None: raise ValueError("Data must be supplied for reweighting='koopman'") - koop = _KoopmanEstimator(lag=lag, stride=stride, skip=skip) + koop = _KoopmanEstimator(lag=lag, stride=stride, skip=skip, ncov_max=ncov_max) koop.estimate(data, chunksize=chunksize) weights = koop.weights elif weights == "empirical": @@ -1350,7 +1409,7 @@ def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_ # chunksize is an estimation parameter for now. lc = LaggedCovariance(c00=c00, c0t=c0t, ctt=ctt, remove_constant_mean=remove_constant_mean, remove_data_mean=remove_data_mean, reversible=reversible, bessel=bessel, lag=lag, - weights=weights, stride=stride, skip=skip) + weights=weights, stride=stride, skip=skip, ncov_max=ncov_max) if data is not None: lc.estimate(data, chunksize=chunksize) return lc From 518d7233d3a2a3956ab8a860801d00eed174ca04 Mon Sep 17 00:00:00 2001 From: marscher Date: Thu, 1 Feb 2018 18:43:55 +0100 Subject: [PATCH 20/43] merged devel --- pyemma/_base/model.py | 1 + pyemma/coordinates/api.py | 6 +++--- pyemma/coordinates/estimation/covariance.py | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyemma/_base/model.py b/pyemma/_base/model.py index 2399b0dc4..3c2deb093 100644 --- a/pyemma/_base/model.py +++ b/pyemma/_base/model.py @@ -103,6 +103,7 @@ def get_model_params(self, deep=True): # catch deprecated param values. # This is set in utils/__init__.py but it gets overwritten # when running under python3 somehow. + from pyemma.util.exceptions import PyEMMA_DeprecationWarning warnings.simplefilter("always", DeprecationWarning) warnings.simplefilter("always", PyEMMA_DeprecationWarning) try: diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index 78df6af59..1eb88a81c 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -376,9 +376,9 @@ def source(inp, features=None, top=None, chunksize=None, **kw): # CASE 1: input is a string or list of strings # check: if single string create a one-element list - if isinstance(inp, str) or ( + if isinstance(inp, _string_types) or ( isinstance(inp, (list, tuple)) - and (any(isinstance(item, (list, tuple, str)) for item in inp) or len(inp) is 0)): + and (any(isinstance(item, (list, tuple, _string_types)) for item in inp) or len(inp) is 0)): reader = create_file_reader(inp, top, features, chunksize=cs, **kw) elif isinstance(inp, _np.ndarray) or (isinstance(inp, (list, tuple)) @@ -717,7 +717,7 @@ def save_traj(traj_inp, indexes, outfile, top=None, stride = 1, chunksize=None, # Do we have what we need? if not isinstance(traj_inp, (list, tuple)): raise TypeError("traj_inp has to be of type list, not %s" % type(traj_inp)) - if not isinstance(top, (str, Topology, Trajectory)): + if not isinstance(top, (_string_types, Topology, Trajectory)): raise TypeError("traj_inp cannot be a list of files without an input " "top of type str (eg filename.pdb), mdtraj.Trajectory or mdtraj.Topology. " "Got type %s instead" % type(top)) diff --git a/pyemma/coordinates/estimation/covariance.py b/pyemma/coordinates/estimation/covariance.py index 4eb12e739..a6df2e9d7 100644 --- a/pyemma/coordinates/estimation/covariance.py +++ b/pyemma/coordinates/estimation/covariance.py @@ -15,7 +15,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as np import numbers From c2ca70501f90ba46b9b41d3e3f2d00db90b1d0f2 Mon Sep 17 00:00:00 2001 From: marscher Date: Fri, 2 Feb 2018 14:16:48 +0100 Subject: [PATCH 21/43] [coor/api] string types usage, chunksize --- pyemma/coordinates/api.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index 1eb88a81c..c7fdafbdd 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -1382,12 +1382,11 @@ def covariance_lagged(data=None, c00=True, c0t=True, ctt=False, remove_constant_ approximation of molecular kinetics from short off-equilibrium simulations. J. Chem. Phys. (submitted) .. [2] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. - """ - + """ from pyemma.coordinates.estimation.covariance import LaggedCovariance from pyemma.coordinates.estimation.koopman import _KoopmanEstimator import types - if isinstance(weights, str): + if isinstance(weights, _string_types): if weights== "koopman": if data is None: raise ValueError("Data must be supplied for reweighting='koopman'") From 6a717c48b36d39aaae131b58801449ab79e46370 Mon Sep 17 00:00:00 2001 From: "Martin K. Scherer" Date: Sun, 4 Feb 2018 14:22:48 +0100 Subject: [PATCH 22/43] fix merge mistakes --- devtools/conda-recipe/meta.yaml | 3 +- .../tests/test_random_access_stride.py | 2 +- pyemma/datasets/double_well_discrete.py | 3 - pyemma/msm/estimators/_dtraj_stats.py | 4 - pyemma/util/reflection.py | 139 +++++++++++------- pyemma/util/types.py | 2 + 6 files changed, 91 insertions(+), 62 deletions(-) diff --git a/devtools/conda-recipe/meta.yaml b/devtools/conda-recipe/meta.yaml index 598f9ba54..e8f26ef92 100644 --- a/devtools/conda-recipe/meta.yaml +++ b/devtools/conda-recipe/meta.yaml @@ -24,7 +24,7 @@ requirements: - numpy 1.9.* # [not (win and (py35 or py36))] - numpy 1.9.* # [win and py35] - numpy 1.11.* # [win and py36] - - python >=3 + - python - scipy - setuptools - gcc # [ not win ] @@ -57,7 +57,6 @@ test: files: - matplotlibrc requires: - - h5py - pytest - pytest-cov # TODO: disabled on win64, until https://bugs.python.org/issue31701 is fixed. diff --git a/pyemma/coordinates/tests/test_random_access_stride.py b/pyemma/coordinates/tests/test_random_access_stride.py index 26f8e8aa2..f818aefec 100644 --- a/pyemma/coordinates/tests/test_random_access_stride.py +++ b/pyemma/coordinates/tests/test_random_access_stride.py @@ -468,7 +468,7 @@ def test_RA_high_stride(self): for ext in savable_formats_mdtra_18: traj = create_traj(length=n, dir=self.tmpdir, format=ext)[0] - from unittest.mock import patch + from mock import patch # temporarily overwrite the memory cutoff with a smaller value, to trigger the switch to RA stride. with patch('pyemma.coordinates.util.patches.iterload.MEMORY_CUTOFF', n_bytes - 1): r = coor.source(traj, top=get_top()) diff --git a/pyemma/datasets/double_well_discrete.py b/pyemma/datasets/double_well_discrete.py index 2b1710300..2121595dc 100644 --- a/pyemma/datasets/double_well_discrete.py +++ b/pyemma/datasets/double_well_discrete.py @@ -16,10 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -<<<<<<< HEAD from __future__ import absolute_import -======= ->>>>>>> dea94f91... updated to devel, made serializable, removed obsolete hacks. __author__ = 'noe' diff --git a/pyemma/msm/estimators/_dtraj_stats.py b/pyemma/msm/estimators/_dtraj_stats.py index 5678bc305..096a35651 100644 --- a/pyemma/msm/estimators/_dtraj_stats.py +++ b/pyemma/msm/estimators/_dtraj_stats.py @@ -16,11 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -<<<<<<< HEAD from __future__ import absolute_import -======= ->>>>>>> dea94f91... updated to devel, made serializable, removed obsolete hacks. - import numpy as np diff --git a/pyemma/util/reflection.py b/pyemma/util/reflection.py index dd50365cd..9bfb42145 100644 --- a/pyemma/util/reflection.py +++ b/pyemma/util/reflection.py @@ -16,8 +16,11 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import division, print_function, absolute_import import inspect +import six +#from six import string_types from collections import namedtuple __author__ = 'noe, marscher' @@ -36,57 +39,89 @@ # This way, the caller code does not need to know whether it uses a legacy # .getargspec or bright and shiny .signature. - -# Apparently, yes. Wrap inspect.signature - -ArgSpec = namedtuple('ArgSpec', ['args', 'varargs', 'keywords', 'defaults']) - -def getargspec_no_self(func): - """inspect.getargspec replacement using inspect.signature. - - inspect.getargspec is deprecated in python 3. This is a replacement - based on the (new in python 3.3) `inspect.signature`. - - Parameters - ---------- - func : callable - A callable to inspect - - Returns - ------- - argspec : ArgSpec(args, varargs, varkw, defaults) - This is similar to the result of inspect.getargspec(func) under - python 2.x. - NOTE: if the first argument of `func` is self, it is *not*, I repeat - *not* included in argspec.args. - This is done for consistency between inspect.getargspec() under - python 2.x, and inspect.signature() under python 3.x. - """ - sig = inspect.signature(func) - args = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD - ] - varargs = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.VAR_POSITIONAL - ] - varargs = varargs[0] if varargs else None - varkw = [ - p.name for p in sig.parameters.values() - if p.kind == inspect.Parameter.VAR_KEYWORD - ] - varkw = varkw[0] if varkw else None - defaults = [ - p.default for p in sig.parameters.values() - if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and - p.default is not p.empty) - ] or None - - if args[0] == 'self': - args.pop(0) - - return ArgSpec(args, varargs, varkw, defaults) +try: + # is it python 3.3 or higher? + inspect.signature + + # Apparently, yes. Wrap inspect.signature + + ArgSpec = namedtuple('ArgSpec', ['args', 'varargs', 'keywords', 'defaults']) + + def getargspec_no_self(func): + """inspect.getargspec replacement using inspect.signature. + + inspect.getargspec is deprecated in python 3. This is a replacement + based on the (new in python 3.3) `inspect.signature`. + + Parameters + ---------- + func : callable + A callable to inspect + + Returns + ------- + argspec : ArgSpec(args, varargs, varkw, defaults) + This is similar to the result of inspect.getargspec(func) under + python 2.x. + NOTE: if the first argument of `func` is self, it is *not*, I repeat + *not* included in argspec.args. + This is done for consistency between inspect.getargspec() under + python 2.x, and inspect.signature() under python 3.x. + """ + sig = inspect.signature(func) + args = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD + ] + varargs = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_POSITIONAL + ] + varargs = varargs[0] if varargs else None + varkw = [ + p.name for p in sig.parameters.values() + if p.kind == inspect.Parameter.VAR_KEYWORD + ] + varkw = varkw[0] if varkw else None + defaults = [ + p.default for p in sig.parameters.values() + if (p.kind == inspect.Parameter.POSITIONAL_OR_KEYWORD and + p.default is not p.empty) + ] or None + + if args[0] == 'self': + args.pop(0) + + return ArgSpec(args, varargs, varkw, defaults) + +except AttributeError: + # python 2.x + def getargspec_no_self(func): + """inspect.getargspec replacement for compatibility with python 3.x. + + inspect.getargspec is deprecated in python 3. This wraps it, and + *removes* `self` from the argument list of `func`, if present. + This is done for forward compatibility with python 3. + + Parameters + ---------- + func : callable + A callable to inspect + + Returns + ------- + argspec : ArgSpec(args, varargs, varkw, defaults) + This is similar to the result of inspect.getargspec(func) under + python 2.x. + NOTE: if the first argument of `func` is self, it is *not*, I repeat + *not* included in argspec.args. + This is done for consistency between inspect.getargspec() under + python 2.x, and inspect.signature() under python 3.x. + """ + argspec = inspect.getargspec(func) + if argspec.args[0] == 'self': + argspec.args.pop(0) + return argspec def call_member(obj, f, *args, **kwargs): @@ -103,7 +138,7 @@ def call_member(obj, f, *args, **kwargs): in that case """ # get function name - if not isinstance(f, str): + if not isinstance(f, six.string_types): fname = f.__func__.__name__ else: fname = f diff --git a/pyemma/util/types.py b/pyemma/util/types.py index ad0d827de..e5192570e 100644 --- a/pyemma/util/types.py +++ b/pyemma/util/types.py @@ -27,6 +27,8 @@ import numbers import collections +from six import string_types + # ====================================================================================================================== # BASIC TYPE CHECKS # ====================================================================================================================== From 9d0892cb1e227ef534fd54e830c5167b5e6f63e6 Mon Sep 17 00:00:00 2001 From: "Martin K. Scherer" Date: Sun, 4 Feb 2018 23:45:26 +0100 Subject: [PATCH 23/43] revert (eps defined) --- pyemma/msm/tests/test_amm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyemma/msm/tests/test_amm.py b/pyemma/msm/tests/test_amm.py index 16f4993e3..e22a6364e 100644 --- a/pyemma/msm/tests/test_amm.py +++ b/pyemma/msm/tests/test_amm.py @@ -334,7 +334,8 @@ def test_simulate_MSM(self): # ---------------------------------- def test_two_state_kinetics(self): - self._two_state_kinetics(self.amm) + #pass + self._two_state_kinetics(self.amm, eps=0.01) @unittest.skipIf(six.PY2, 'only py3') def test_serialize(self): From ba3d6738b4a7b917fecd025c0ea042359feb425e Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 15:57:05 +0100 Subject: [PATCH 24/43] [doc] amend changelog --- doc/source/CHANGELOG.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/CHANGELOG.rst b/doc/source/CHANGELOG.rst index 35a82c182..22d0e1d7c 100644 --- a/doc/source/CHANGELOG.rst +++ b/doc/source/CHANGELOG.rst @@ -5,7 +5,7 @@ Changelog ---------------- As of this version the usage of Python 2.7 is officially deprecated. Please upgrade -your Python installation to at least version 3.5. +your Python installation to at least version 3.5 to catch future updates. **New features**: @@ -13,11 +13,12 @@ your Python installation to at least version 3.5. data into estimation of Markov models from molecular simulations. The method is described in [1]. #1111 - msm: Added mincount_connectivity argument to MSM estimators. This option enables to omit counts below a given threshold. #1106 -- coodinates: selection based features allow alignment to a reference structure. #1184 +- coordinates: selection based features allow alignment to a reference structure. #1184 - coordinates: two new center of mass features: ResidueCOMFeature() and GroupCOMFeature() - coordinates: new configuration variable 'default_chunksize' can be set to limit the size of a fragmented extracted per iteration from a data source. This is invariant to the dimension of data sets. #1190 - datasets: added Prinz potential (quadwell). #1226 +- coordinates: added VAMP estimator. #1237 - References: From 182ba9b1cf56d6ced666ee869952c64a421f1773 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 15:57:55 +0100 Subject: [PATCH 25/43] revert absolute import removal --- pyemma/__init__.py | 1 - pyemma/_base/loggable.py | 1 + pyemma/_base/model.py | 1 - pyemma/_ext/sklearn/base.py | 1 + pyemma/_ext/sklearn/parameter_search.py | 1 + pyemma/_ext/variational/__init__.py | 1 + pyemma/_ext/variational/estimators/__init__.py | 1 + pyemma/_ext/variational/estimators/moments.py | 1 + .../estimators/tests/benchmark_moments.py | 4 +++- .../variational/estimators/tests/test_moments.py | 3 ++- .../estimators/tests/test_running_moments.py | 1 + pyemma/_ext/variational/solvers/direct.py | 2 +- .../_ext/variational/solvers/tests/test_direct.py | 1 + pyemma/coordinates/clustering/assign.py | 1 + pyemma/coordinates/clustering/interface.py | 1 + pyemma/coordinates/clustering/kmeans.py | 1 - pyemma/coordinates/clustering/regspace.py | 1 + pyemma/coordinates/clustering/tests/test_assign.py | 2 +- .../coordinates/clustering/tests/test_cluster.py | 3 ++- .../clustering/tests/test_cluster_samples.py | 3 ++- pyemma/coordinates/clustering/tests/test_kmeans.py | 1 + .../clustering/tests/test_mini_batch_kmeans.py | 1 + .../coordinates/clustering/tests/test_regspace.py | 1 + .../clustering/tests/test_uniform_time.py | 3 ++- pyemma/coordinates/clustering/uniform_time.py | 1 + pyemma/coordinates/data/_base/iterable.py | 1 + pyemma/coordinates/data/_base/random_accessible.py | 1 - .../coordinates/data/_base/streaming_estimator.py | 1 + pyemma/coordinates/data/_base/transformer.py | 4 +++- pyemma/coordinates/data/data_in_memory.py | 1 + pyemma/coordinates/data/feature_reader.py | 1 + .../coordinates/data/featurization/featurizer.py | 1 + pyemma/coordinates/data/numpy_filereader.py | 1 + pyemma/coordinates/data/py_csv_reader.py | 1 + pyemma/coordinates/data/sources_merger.py | 2 +- pyemma/coordinates/data/util/frames_from_file.py | 1 + pyemma/coordinates/data/util/reader_utils.py | 14 +++++++------- pyemma/coordinates/data/util/traj_info_cache.py | 1 + pyemma/coordinates/estimation/koopman.py | 1 + pyemma/coordinates/pipelines.py | 1 + pyemma/coordinates/tests/__init__.py | 1 + pyemma/coordinates/tests/test_acf.py | 5 +++-- pyemma/coordinates/tests/test_covar_estimator.py | 1 + pyemma/coordinates/tests/test_csvreader.py | 1 + pyemma/coordinates/tests/test_datainmemory.py | 1 + pyemma/coordinates/tests/test_discretizer.py | 1 + pyemma/coordinates/tests/test_featurereader.py | 1 + .../tests/test_featurereader_and_tica.py | 2 ++ .../test_featurereader_and_tica_projection.py | 2 ++ pyemma/coordinates/tests/test_featurizer.py | 1 - pyemma/coordinates/tests/test_frames_from_file.py | 1 + pyemma/coordinates/tests/test_numpyfilereader.py | 2 ++ pyemma/coordinates/tests/test_pca.py | 1 + pyemma/coordinates/tests/test_pipeline.py | 2 ++ .../coordinates/tests/test_random_access_stride.py | 1 + pyemma/coordinates/tests/test_save_traj.py | 1 + pyemma/coordinates/tests/test_save_trajs.py | 3 ++- pyemma/coordinates/tests/test_source.py | 3 ++- pyemma/coordinates/tests/test_stride.py | 4 +++- pyemma/coordinates/tests/test_tica.py | 1 + pyemma/coordinates/tests/test_traj_info_cache.py | 3 ++- pyemma/coordinates/transform/pca.py | 1 + pyemma/coordinates/transform/tica.py | 1 + pyemma/coordinates/util/patches.py | 7 ++++--- pyemma/coordinates/util/stat.py | 3 ++- pyemma/datasets/api.py | 1 + pyemma/datasets/double_well_discrete.py | 2 +- pyemma/datasets/double_well_thermo.py | 1 + pyemma/msm/__init__.py | 1 + pyemma/msm/api.py | 1 + pyemma/msm/estimators/__init__.py | 1 + pyemma/msm/estimators/_dtraj_stats.py | 1 + pyemma/msm/estimators/bayesian_hmsm.py | 1 + pyemma/msm/estimators/bayesian_msm.py | 1 + pyemma/msm/models/__init__.py | 1 + pyemma/msm/models/hmsm.py | 3 ++- pyemma/msm/models/hmsm_sampled.py | 1 + pyemma/msm/models/msm.py | 3 +-- pyemma/msm/models/reactive_flux.py | 2 ++ pyemma/msm/tests/birth_death_chain.py | 3 ++- pyemma/msm/tests/test_bayesian_hmsm.py | 1 + pyemma/msm/tests/test_bayesian_msm.py | 1 + pyemma/msm/tests/test_cktest.py | 1 + pyemma/msm/tests/test_estimator.py | 2 +- pyemma/msm/tests/test_hmsm.py | 1 + pyemma/msm/tests/test_its.py | 1 + pyemma/msm/tests/test_its_oom.py | 3 ++- pyemma/msm/tests/test_msm.py | 1 + pyemma/msm/tests/test_oom_msm.py | 3 ++- pyemma/msm/tests/test_tpt.py | 3 ++- pyemma/plots/__init__.py | 1 + pyemma/plots/markovtests.py | 1 + pyemma/plots/networks.py | 1 + pyemma/plots/plots2d.py | 1 + pyemma/plots/tests/test_its.py | 3 ++- pyemma/plots/tests/test_markovtests.py | 1 + pyemma/plots/tests/test_networks.py | 1 + pyemma/plots/tests/test_plots2d.py | 1 + pyemma/plots/timescales.py | 1 + pyemma/thermo/tests/test_TRAM.py | 1 + pyemma/util/__init__.py | 1 + pyemma/util/annotators.py | 1 + pyemma/util/debug.py | 1 + pyemma/util/files.py | 3 ++- pyemma/util/indices.py | 1 + pyemma/util/linalg.py | 1 + pyemma/util/log.py | 1 + pyemma/util/numeric.py | 1 + pyemma/util/statistics.py | 1 + pyemma/util/tests/statistics_test.py | 3 ++- pyemma/util/tests/test_discrete_trajectories.py | 3 ++- pyemma/util/tests/test_shortcut.py | 3 ++- 112 files changed, 147 insertions(+), 45 deletions(-) diff --git a/pyemma/__init__.py b/pyemma/__init__.py index 332b6962b..ec4344440 100644 --- a/pyemma/__init__.py +++ b/pyemma/__init__.py @@ -21,7 +21,6 @@ PyEMMA - Emma's Markov Model Algorithms ======================================= """ -from __future__ import absolute_import # set version from versioneer. from ._version import get_versions diff --git a/pyemma/_base/loggable.py b/pyemma/_base/loggable.py index 542dda2c5..f4d3d9a07 100644 --- a/pyemma/_base/loggable.py +++ b/pyemma/_base/loggable.py @@ -21,6 +21,7 @@ @author: marscher ''' +from __future__ import absolute_import import logging import weakref from itertools import count diff --git a/pyemma/_base/model.py b/pyemma/_base/model.py index 3c2deb093..94bdeba71 100644 --- a/pyemma/_base/model.py +++ b/pyemma/_base/model.py @@ -16,7 +16,6 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . -from __future__ import absolute_import import numpy as _np import warnings diff --git a/pyemma/_ext/sklearn/base.py b/pyemma/_ext/sklearn/base.py index b305ac3e7..c2de6d350 100644 --- a/pyemma/_ext/sklearn/base.py +++ b/pyemma/_ext/sklearn/base.py @@ -12,6 +12,7 @@ Base classes for all estimators. """ +from __future__ import absolute_import # Author: Gael Varoquaux # License: BSD 3 clause diff --git a/pyemma/_ext/sklearn/parameter_search.py b/pyemma/_ext/sklearn/parameter_search.py index 0c5d4a86f..0e452a1f0 100644 --- a/pyemma/_ext/sklearn/parameter_search.py +++ b/pyemma/_ext/sklearn/parameter_search.py @@ -12,6 +12,7 @@ Parameter estimation tools """ +from __future__ import absolute_import # Author: Alexandre Gramfort , # Gael Varoquaux # Andreas Mueller diff --git a/pyemma/_ext/variational/__init__.py b/pyemma/_ext/variational/__init__.py index e3e473432..f0257fdc2 100644 --- a/pyemma/_ext/variational/__init__.py +++ b/pyemma/_ext/variational/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import __author__ = 'noe' # import subpackages such that they are available after the main package import diff --git a/pyemma/_ext/variational/estimators/__init__.py b/pyemma/_ext/variational/estimators/__init__.py index 2aee16ab1..796068eb7 100644 --- a/pyemma/_ext/variational/estimators/__init__.py +++ b/pyemma/_ext/variational/estimators/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from .moments import moments_XX, moments_XXXY, moments_block from .moments import covar, covars diff --git a/pyemma/_ext/variational/estimators/moments.py b/pyemma/_ext/variational/estimators/moments.py index 422145b12..1124c6350 100644 --- a/pyemma/_ext/variational/estimators/moments.py +++ b/pyemma/_ext/variational/estimators/moments.py @@ -71,6 +71,7 @@ of the mean if needed. """ +from __future__ import absolute_import __author__ = 'noe' diff --git a/pyemma/_ext/variational/estimators/tests/benchmark_moments.py b/pyemma/_ext/variational/estimators/tests/benchmark_moments.py index 563cd5df2..1c3c078a0 100644 --- a/pyemma/_ext/variational/estimators/tests/benchmark_moments.py +++ b/pyemma/_ext/variational/estimators/tests/benchmark_moments.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import +from __future__ import print_function __author__ = 'noe' import time @@ -156,4 +158,4 @@ def main(): if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/pyemma/_ext/variational/estimators/tests/test_moments.py b/pyemma/_ext/variational/estimators/tests/test_moments.py index 9359e6657..4345ffc95 100644 --- a/pyemma/_ext/variational/estimators/tests/test_moments.py +++ b/pyemma/_ext/variational/estimators/tests/test_moments.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import unittest import numpy as np from .. import moments @@ -331,4 +332,4 @@ def test_moments_XY_weighted_sym_sparseconst(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/_ext/variational/estimators/tests/test_running_moments.py b/pyemma/_ext/variational/estimators/tests/test_running_moments.py index 4fa9923ab..aa0adebeb 100644 --- a/pyemma/_ext/variational/estimators/tests/test_running_moments.py +++ b/pyemma/_ext/variational/estimators/tests/test_running_moments.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import unittest import numpy as np from .. import running_moments diff --git a/pyemma/_ext/variational/solvers/direct.py b/pyemma/_ext/variational/solvers/direct.py index 469194d53..db442aedf 100644 --- a/pyemma/_ext/variational/solvers/direct.py +++ b/pyemma/_ext/variational/solvers/direct.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import numpy as _np from ..util import ZeroRankError as _ZeroRankError @@ -271,4 +272,3 @@ def eig_corr(C0, Ct, epsilon=1e-10, method='QR', sign_maxelement=False): # return result return l, R - diff --git a/pyemma/_ext/variational/solvers/tests/test_direct.py b/pyemma/_ext/variational/solvers/tests/test_direct.py index d3a916e29..ae062f914 100644 --- a/pyemma/_ext/variational/solvers/tests/test_direct.py +++ b/pyemma/_ext/variational/solvers/tests/test_direct.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import unittest import numpy as np from .. import direct diff --git a/pyemma/coordinates/clustering/assign.py b/pyemma/coordinates/clustering/assign.py index bb4f84a38..da343f453 100644 --- a/pyemma/coordinates/clustering/assign.py +++ b/pyemma/coordinates/clustering/assign.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import import numpy as np diff --git a/pyemma/coordinates/clustering/interface.py b/pyemma/coordinates/clustering/interface.py index babc8bec8..25bc4d575 100644 --- a/pyemma/coordinates/clustering/interface.py +++ b/pyemma/coordinates/clustering/interface.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import import os diff --git a/pyemma/coordinates/clustering/kmeans.py b/pyemma/coordinates/clustering/kmeans.py index 68fa580a0..a36c83f5f 100644 --- a/pyemma/coordinates/clustering/kmeans.py +++ b/pyemma/coordinates/clustering/kmeans.py @@ -35,7 +35,6 @@ from pyemma.util.units import bytes_to_string from pyemma.util.contexts import random_seed -from six.moves import range import numpy as np diff --git a/pyemma/coordinates/clustering/regspace.py b/pyemma/coordinates/clustering/regspace.py index 80413061e..8677c4930 100644 --- a/pyemma/coordinates/clustering/regspace.py +++ b/pyemma/coordinates/clustering/regspace.py @@ -23,6 +23,7 @@ @author: marscher ''' +from __future__ import absolute_import import warnings diff --git a/pyemma/coordinates/clustering/tests/test_assign.py b/pyemma/coordinates/clustering/tests/test_assign.py index dab49c7b1..94d8e04d9 100644 --- a/pyemma/coordinates/clustering/tests/test_assign.py +++ b/pyemma/coordinates/clustering/tests/test_assign.py @@ -24,7 +24,7 @@ from mock import patch from pyemma.util.files import TemporaryDirectory from logging import getLogger -from six.moves import range + import numpy as np import pyemma.coordinates as coor import pyemma.util.types as types diff --git a/pyemma/coordinates/clustering/tests/test_cluster.py b/pyemma/coordinates/clustering/tests/test_cluster.py index 8bb701ecc..725d07365 100644 --- a/pyemma/coordinates/clustering/tests/test_cluster.py +++ b/pyemma/coordinates/clustering/tests/test_cluster.py @@ -18,6 +18,7 @@ +from __future__ import absolute_import import unittest import os import tempfile @@ -195,4 +196,4 @@ def setUpClass(cls): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/clustering/tests/test_cluster_samples.py b/pyemma/coordinates/clustering/tests/test_cluster_samples.py index 6b6ce90f0..a2a6ea660 100644 --- a/pyemma/coordinates/clustering/tests/test_cluster_samples.py +++ b/pyemma/coordinates/clustering/tests/test_cluster_samples.py @@ -24,6 +24,7 @@ @author: gph82, clonker """ +from __future__ import absolute_import import unittest @@ -71,4 +72,4 @@ def test_sample_indexes_by_state(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/clustering/tests/test_kmeans.py b/pyemma/coordinates/clustering/tests/test_kmeans.py index 56ff9e37d..f21ae9107 100644 --- a/pyemma/coordinates/clustering/tests/test_kmeans.py +++ b/pyemma/coordinates/clustering/tests/test_kmeans.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import os import random diff --git a/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py b/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py index 745c722da..400d755ed 100644 --- a/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py +++ b/pyemma/coordinates/clustering/tests/test_mini_batch_kmeans.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import unittest from unittest import TestCase import numpy as np diff --git a/pyemma/coordinates/clustering/tests/test_regspace.py b/pyemma/coordinates/clustering/tests/test_regspace.py index c4fcee9e3..6cff2ee12 100644 --- a/pyemma/coordinates/clustering/tests/test_regspace.py +++ b/pyemma/coordinates/clustering/tests/test_regspace.py @@ -23,6 +23,7 @@ @author: marscher ''' +from __future__ import absolute_import import itertools import unittest diff --git a/pyemma/coordinates/clustering/tests/test_uniform_time.py b/pyemma/coordinates/clustering/tests/test_uniform_time.py index 2d51e4207..442ff8ca6 100644 --- a/pyemma/coordinates/clustering/tests/test_uniform_time.py +++ b/pyemma/coordinates/clustering/tests/test_uniform_time.py @@ -23,6 +23,7 @@ @author: marscher ''' +from __future__ import absolute_import import unittest import numpy as np @@ -68,4 +69,4 @@ def test_big_k(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/clustering/uniform_time.py b/pyemma/coordinates/clustering/uniform_time.py index 70db3f683..329d2549b 100644 --- a/pyemma/coordinates/clustering/uniform_time.py +++ b/pyemma/coordinates/clustering/uniform_time.py @@ -17,6 +17,7 @@ # along with this program. If not, see . +from __future__ import absolute_import, division import math diff --git a/pyemma/coordinates/data/_base/iterable.py b/pyemma/coordinates/data/_base/iterable.py index e3a07b6fa..d67eb3006 100644 --- a/pyemma/coordinates/data/_base/iterable.py +++ b/pyemma/coordinates/data/_base/iterable.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import print_function from abc import ABCMeta, abstractmethod import numpy as np import six diff --git a/pyemma/coordinates/data/_base/random_accessible.py b/pyemma/coordinates/data/_base/random_accessible.py index 652e6ecea..61c35ff43 100644 --- a/pyemma/coordinates/data/_base/random_accessible.py +++ b/pyemma/coordinates/data/_base/random_accessible.py @@ -2,7 +2,6 @@ import numpy as np import numbers - import six diff --git a/pyemma/coordinates/data/_base/streaming_estimator.py b/pyemma/coordinates/data/_base/streaming_estimator.py index 7576d5eda..89258e636 100644 --- a/pyemma/coordinates/data/_base/streaming_estimator.py +++ b/pyemma/coordinates/data/_base/streaming_estimator.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import from pyemma._base.estimator import Estimator from pyemma.coordinates.data import DataInMemory diff --git a/pyemma/coordinates/data/_base/transformer.py b/pyemma/coordinates/data/_base/transformer.py index 204ae616b..089da7a59 100644 --- a/pyemma/coordinates/data/_base/transformer.py +++ b/pyemma/coordinates/data/_base/transformer.py @@ -16,10 +16,12 @@ # along with this program. If not, see . +from __future__ import absolute_import from abc import ABCMeta, abstractmethod import numpy as np +import six from pyemma._ext.sklearn.base import TransformerMixin from pyemma.coordinates.data._base.datasource import DataSource, DataSourceIterator @@ -33,7 +35,7 @@ __author__ = 'noe, marscher' -class Transformer(TransformerMixin, metaclass=ABCMeta): +class Transformer(six.with_metaclass(ABCMeta, TransformerMixin)): """ A transformer takes data and transforms it """ @abstractmethod diff --git a/pyemma/coordinates/data/data_in_memory.py b/pyemma/coordinates/data/data_in_memory.py index 6ffbc1855..de19e2191 100644 --- a/pyemma/coordinates/data/data_in_memory.py +++ b/pyemma/coordinates/data/data_in_memory.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import functools import numbers diff --git a/pyemma/coordinates/data/feature_reader.py b/pyemma/coordinates/data/feature_reader.py index c01c809a2..69741c02f 100644 --- a/pyemma/coordinates/data/feature_reader.py +++ b/pyemma/coordinates/data/feature_reader.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import mdtraj import numpy as np diff --git a/pyemma/coordinates/data/featurization/featurizer.py b/pyemma/coordinates/data/featurization/featurizer.py index eedb3cd6b..496f3e564 100644 --- a/pyemma/coordinates/data/featurization/featurizer.py +++ b/pyemma/coordinates/data/featurization/featurizer.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import warnings diff --git a/pyemma/coordinates/data/numpy_filereader.py b/pyemma/coordinates/data/numpy_filereader.py index 006a7e287..3855f8e9b 100644 --- a/pyemma/coordinates/data/numpy_filereader.py +++ b/pyemma/coordinates/data/numpy_filereader.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import functools diff --git a/pyemma/coordinates/data/py_csv_reader.py b/pyemma/coordinates/data/py_csv_reader.py index d17196862..8914a8b32 100644 --- a/pyemma/coordinates/data/py_csv_reader.py +++ b/pyemma/coordinates/data/py_csv_reader.py @@ -20,6 +20,7 @@ @author: marscher """ +from __future__ import absolute_import import csv import os diff --git a/pyemma/coordinates/data/sources_merger.py b/pyemma/coordinates/data/sources_merger.py index 9472d1750..1e663352f 100644 --- a/pyemma/coordinates/data/sources_merger.py +++ b/pyemma/coordinates/data/sources_merger.py @@ -21,7 +21,7 @@ class SourcesMerger(DataSource, SerializableMixIn): chunk: int chunk size to use for underlying iterators. """ - def __init__(self, sources: [list, tuple], chunk=5000): + def __init__(self, sources, chunk=5000): super(SourcesMerger, self).__init__(chunksize=chunk) self.sources = sources self._is_reader = True diff --git a/pyemma/coordinates/data/util/frames_from_file.py b/pyemma/coordinates/data/util/frames_from_file.py index 23694e44c..7f0cc6a56 100644 --- a/pyemma/coordinates/data/util/frames_from_file.py +++ b/pyemma/coordinates/data/util/frames_from_file.py @@ -15,6 +15,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import itertools from logging import getLogger diff --git a/pyemma/coordinates/data/util/reader_utils.py b/pyemma/coordinates/data/util/reader_utils.py index 7e1e52aff..db1024639 100644 --- a/pyemma/coordinates/data/util/reader_utils.py +++ b/pyemma/coordinates/data/util/reader_utils.py @@ -23,7 +23,6 @@ import numpy as np import os -from six import string_types def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): r""" @@ -44,7 +43,8 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): from pyemma.coordinates.data.py_csv_reader import PyCSVReader from pyemma.coordinates.data import FeatureReader from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader - + import six + str = six.string_types # fragmented trajectories if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and @@ -52,15 +52,15 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer) # normal trajectories - if (isinstance(input_files, string_types) + if (isinstance(input_files, str) or (isinstance(input_files, (list, tuple)) - and (any(isinstance(item, string_types) for item in input_files) + and (any(isinstance(item, str) for item in input_files) or len(input_files) is 0))): reader = None # check: if single string create a one-element list - if isinstance(input_files, string_types): + if isinstance(input_files, str): input_list = [input_files] - elif len(input_files) > 0 and all(isinstance(item, string_types) for item in input_files): + elif len(input_files) > 0 and all(isinstance(item, str) for item in input_files): input_list = input_files else: if len(input_files) is 0: @@ -177,7 +177,7 @@ def preallocate_empty_trajectory(top, n_frames=1): def enforce_top(top): - if isinstance(top, string_types): + if isinstance(top, str): top = md.load(top).top elif isinstance(top, md.Trajectory): top = top.top diff --git a/pyemma/coordinates/data/util/traj_info_cache.py b/pyemma/coordinates/data/util/traj_info_cache.py index d6915fd95..2a0c55acd 100644 --- a/pyemma/coordinates/data/util/traj_info_cache.py +++ b/pyemma/coordinates/data/util/traj_info_cache.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import hashlib import os diff --git a/pyemma/coordinates/estimation/koopman.py b/pyemma/coordinates/estimation/koopman.py index b685d1eac..b97c2cc65 100644 --- a/pyemma/coordinates/estimation/koopman.py +++ b/pyemma/coordinates/estimation/koopman.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as np import scipy.linalg as scl diff --git a/pyemma/coordinates/pipelines.py b/pyemma/coordinates/pipelines.py index 82b26d6b1..64cdadca4 100644 --- a/pyemma/coordinates/pipelines.py +++ b/pyemma/coordinates/pipelines.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import from logging import getLogger diff --git a/pyemma/coordinates/tests/__init__.py b/pyemma/coordinates/tests/__init__.py index b19ccf90e..ccfa38071 100644 --- a/pyemma/coordinates/tests/__init__.py +++ b/pyemma/coordinates/tests/__init__.py @@ -16,3 +16,4 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import diff --git a/pyemma/coordinates/tests/test_acf.py b/pyemma/coordinates/tests/test_acf.py index 2d139fdc5..209bc3beb 100644 --- a/pyemma/coordinates/tests/test_acf.py +++ b/pyemma/coordinates/tests/test_acf.py @@ -18,6 +18,7 @@ +from __future__ import absolute_import import unittest import numpy as np @@ -42,6 +43,6 @@ def test(self): refacf /= refacf[0] # normalize np.testing.assert_allclose(refacf, testacf) - + if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/tests/test_covar_estimator.py b/pyemma/coordinates/tests/test_covar_estimator.py index cf80a397a..9cdd9a4ed 100644 --- a/pyemma/coordinates/tests/test_covar_estimator.py +++ b/pyemma/coordinates/tests/test_covar_estimator.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/coordinates/tests/test_csvreader.py b/pyemma/coordinates/tests/test_csvreader.py index 10db86cda..4c1a5996f 100644 --- a/pyemma/coordinates/tests/test_csvreader.py +++ b/pyemma/coordinates/tests/test_csvreader.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import numpy as np import unittest diff --git a/pyemma/coordinates/tests/test_datainmemory.py b/pyemma/coordinates/tests/test_datainmemory.py index b24c828f0..fd1dce01d 100644 --- a/pyemma/coordinates/tests/test_datainmemory.py +++ b/pyemma/coordinates/tests/test_datainmemory.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import pyemma diff --git a/pyemma/coordinates/tests/test_discretizer.py b/pyemma/coordinates/tests/test_discretizer.py index 2e189494a..13605841b 100644 --- a/pyemma/coordinates/tests/test_discretizer.py +++ b/pyemma/coordinates/tests/test_discretizer.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import import os import tempfile import unittest diff --git a/pyemma/coordinates/tests/test_featurereader.py b/pyemma/coordinates/tests/test_featurereader.py index 8d70c68be..1a88015e9 100644 --- a/pyemma/coordinates/tests/test_featurereader.py +++ b/pyemma/coordinates/tests/test_featurereader.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import glob import tempfile diff --git a/pyemma/coordinates/tests/test_featurereader_and_tica.py b/pyemma/coordinates/tests/test_featurereader_and_tica.py index 047ccfc10..f79edcfc6 100644 --- a/pyemma/coordinates/tests/test_featurereader_and_tica.py +++ b/pyemma/coordinates/tests/test_featurereader_and_tica.py @@ -22,7 +22,9 @@ @author: Fabian Paul ''' +from __future__ import print_function +from __future__ import absolute_import import unittest import os import tempfile diff --git a/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py b/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py index 98bb7e29b..4b62d2e6e 100644 --- a/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py +++ b/pyemma/coordinates/tests/test_featurereader_and_tica_projection.py @@ -23,6 +23,8 @@ @author: Fabian Paul ''' +from __future__ import absolute_import +from __future__ import print_function import os import tempfile diff --git a/pyemma/coordinates/tests/test_featurizer.py b/pyemma/coordinates/tests/test_featurizer.py index f6f894748..484affc4d 100644 --- a/pyemma/coordinates/tests/test_featurizer.py +++ b/pyemma/coordinates/tests/test_featurizer.py @@ -1,4 +1,3 @@ - # This file is part of PyEMMA. # # Copyright (c) 2015, 2014 Computational Molecular Biology Group, Freie Universitaet Berlin (GER) diff --git a/pyemma/coordinates/tests/test_frames_from_file.py b/pyemma/coordinates/tests/test_frames_from_file.py index 3a5c97840..dbca37879 100644 --- a/pyemma/coordinates/tests/test_frames_from_file.py +++ b/pyemma/coordinates/tests/test_frames_from_file.py @@ -24,6 +24,7 @@ @author: gph82, clonker ''' +from __future__ import absolute_import import pkg_resources import unittest diff --git a/pyemma/coordinates/tests/test_numpyfilereader.py b/pyemma/coordinates/tests/test_numpyfilereader.py index 8862c525e..0076f629c 100644 --- a/pyemma/coordinates/tests/test_numpyfilereader.py +++ b/pyemma/coordinates/tests/test_numpyfilereader.py @@ -22,6 +22,8 @@ @author: marscher ''' +from __future__ import absolute_import +from __future__ import print_function import shutil import tempfile diff --git a/pyemma/coordinates/tests/test_pca.py b/pyemma/coordinates/tests/test_pca.py index c4901e8d0..77f55f982 100644 --- a/pyemma/coordinates/tests/test_pca.py +++ b/pyemma/coordinates/tests/test_pca.py @@ -23,6 +23,7 @@ @author: marscher ''' +from __future__ import absolute_import import unittest import os import pkg_resources diff --git a/pyemma/coordinates/tests/test_pipeline.py b/pyemma/coordinates/tests/test_pipeline.py index 5558a6588..833c31431 100644 --- a/pyemma/coordinates/tests/test_pipeline.py +++ b/pyemma/coordinates/tests/test_pipeline.py @@ -18,7 +18,9 @@ +from __future__ import print_function +from __future__ import absolute_import import unittest import os diff --git a/pyemma/coordinates/tests/test_random_access_stride.py b/pyemma/coordinates/tests/test_random_access_stride.py index f818aefec..9732ed828 100644 --- a/pyemma/coordinates/tests/test_random_access_stride.py +++ b/pyemma/coordinates/tests/test_random_access_stride.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import os import tempfile diff --git a/pyemma/coordinates/tests/test_save_traj.py b/pyemma/coordinates/tests/test_save_traj.py index 9b9595e9f..461df3581 100644 --- a/pyemma/coordinates/tests/test_save_traj.py +++ b/pyemma/coordinates/tests/test_save_traj.py @@ -24,6 +24,7 @@ @author: gph82, clonker """ +from __future__ import absolute_import import unittest import os diff --git a/pyemma/coordinates/tests/test_save_trajs.py b/pyemma/coordinates/tests/test_save_trajs.py index c41d41079..f9d3c7ddd 100644 --- a/pyemma/coordinates/tests/test_save_trajs.py +++ b/pyemma/coordinates/tests/test_save_trajs.py @@ -24,6 +24,7 @@ @author: gph82, clonker """ +from __future__ import absolute_import import unittest import os @@ -164,4 +165,4 @@ def test_out_of_bound_indexes(self): save_trajs(self.reader, self.sets, outfiles=self.one_pass_files) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/tests/test_source.py b/pyemma/coordinates/tests/test_source.py index b779721b7..a7e383b3c 100644 --- a/pyemma/coordinates/tests/test_source.py +++ b/pyemma/coordinates/tests/test_source.py @@ -18,6 +18,7 @@ +from __future__ import absolute_import import unittest import os import numpy as np @@ -176,4 +177,4 @@ def test_trajfiles(self): assert types.is_list_of_string(self.inp.filenames) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/tests/test_stride.py b/pyemma/coordinates/tests/test_stride.py index 8252e6471..7cc064c22 100644 --- a/pyemma/coordinates/tests/test_stride.py +++ b/pyemma/coordinates/tests/test_stride.py @@ -18,7 +18,9 @@ +from __future__ import print_function +from __future__ import absolute_import import unittest import os import tempfile @@ -118,4 +120,4 @@ def tearDownClass(cls): super(TestStride, cls).tearDownClass() if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/coordinates/tests/test_tica.py b/pyemma/coordinates/tests/test_tica.py index de81ca9a9..3219f6f45 100644 --- a/pyemma/coordinates/tests/test_tica.py +++ b/pyemma/coordinates/tests/test_tica.py @@ -23,6 +23,7 @@ @author: marscher """ +from __future__ import absolute_import import unittest import os import pkg_resources diff --git a/pyemma/coordinates/tests/test_traj_info_cache.py b/pyemma/coordinates/tests/test_traj_info_cache.py index c578e7c3b..a7eaa0797 100644 --- a/pyemma/coordinates/tests/test_traj_info_cache.py +++ b/pyemma/coordinates/tests/test_traj_info_cache.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import, print_function from tempfile import NamedTemporaryFile @@ -27,7 +28,7 @@ import tempfile import unittest -from unittest import mock +import mock from pyemma.coordinates import api from pyemma.coordinates.data.feature_reader import FeatureReader diff --git a/pyemma/coordinates/transform/pca.py b/pyemma/coordinates/transform/pca.py index 58967f9dc..15dd449c8 100644 --- a/pyemma/coordinates/transform/pca.py +++ b/pyemma/coordinates/transform/pca.py @@ -17,6 +17,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import math diff --git a/pyemma/coordinates/transform/tica.py b/pyemma/coordinates/transform/tica.py index 0153e25db..e5dcfb403 100644 --- a/pyemma/coordinates/transform/tica.py +++ b/pyemma/coordinates/transform/tica.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import numpy as np from decorator import decorator diff --git a/pyemma/coordinates/util/patches.py b/pyemma/coordinates/util/patches.py index 82978c2ed..849e27bff 100644 --- a/pyemma/coordinates/util/patches.py +++ b/pyemma/coordinates/util/patches.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import from collections import namedtuple @@ -273,7 +274,7 @@ def _random_access_generator(self, f): def _read_traj_data(atom_indices, f, n_frames, **kwargs): """ - + Parameters ---------- atom_indices @@ -284,7 +285,7 @@ def _read_traj_data(atom_indices, f, n_frames, **kwargs): Returns ------- data : TrajData(xyz, unitcell_length, unitcell_angles, box) - + Format read() return values: amber_netcdf_restart_f: xyz [Ang], time, cell_l, cell_a amber restart: xyz[Ang], time, cell_l, cell_a @@ -302,7 +303,7 @@ def _read_traj_data(atom_indices, f, n_frames, **kwargs): trr: xyz[nm], time, step, box (n, 3, 3), lambd? xtc: xyz[nm], time, step, box - + xyz: xyz lh5: xyz [nm] arc: xyz[Ang] diff --git a/pyemma/coordinates/util/stat.py b/pyemma/coordinates/util/stat.py index 615d574e6..9d8b4eed4 100644 --- a/pyemma/coordinates/util/stat.py +++ b/pyemma/coordinates/util/stat.py @@ -18,6 +18,7 @@ +from __future__ import absolute_import import numpy as np from pyemma.util.annotators import deprecated @@ -87,4 +88,4 @@ def histogram(transform, dimensions, nbins): for _, chunk in transform: part, _ = np.histogramdd(chunk[:, dimensions], bins=bins) res += part - return res, bins + return res, bins \ No newline at end of file diff --git a/pyemma/datasets/api.py b/pyemma/datasets/api.py index c73ab564f..663a9bb8e 100644 --- a/pyemma/datasets/api.py +++ b/pyemma/datasets/api.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import from pyemma.datasets.double_well_thermo import DoubleWellSampler as _DWS __author__ = 'noe' diff --git a/pyemma/datasets/double_well_discrete.py b/pyemma/datasets/double_well_discrete.py index 2121595dc..984d3f460 100644 --- a/pyemma/datasets/double_well_discrete.py +++ b/pyemma/datasets/double_well_discrete.py @@ -86,4 +86,4 @@ def generate_traj(self, N, start=None, stop=None, dt=1): def generate_trajs(self, M, N, start=None, stop=None, dt=1): """ Generates M random trajectories of length N each with time step dt """ from msmtools.generation import generate_trajs - return generate_trajs(self._P, M, N, start=start, stop=stop, dt=dt) + return generate_trajs(self._P, M, N, start=start, stop=stop, dt=dt) \ No newline at end of file diff --git a/pyemma/datasets/double_well_thermo.py b/pyemma/datasets/double_well_thermo.py index 840583913..aa0326625 100644 --- a/pyemma/datasets/double_well_thermo.py +++ b/pyemma/datasets/double_well_thermo.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as _np from pyemma.coordinates import assign_to_centers as _assign_to_centers diff --git a/pyemma/msm/__init__.py b/pyemma/msm/__init__.py index bf8d5dc5e..9f6739baf 100644 --- a/pyemma/msm/__init__.py +++ b/pyemma/msm/__init__.py @@ -78,6 +78,7 @@ """ +from __future__ import absolute_import as _ ###################################################### from msmtools.analysis.dense.pcca import PCCA diff --git a/pyemma/msm/api.py b/pyemma/msm/api.py index f4658e5ee..b4f7d1fd4 100644 --- a/pyemma/msm/api.py +++ b/pyemma/msm/api.py @@ -20,6 +20,7 @@ """ +from __future__ import absolute_import from .estimators import MaximumLikelihoodHMSM as _ML_HMSM from .estimators import BayesianMSM as _Bayes_MSM from .estimators import BayesianHMSM as _Bayes_HMSM diff --git a/pyemma/msm/estimators/__init__.py b/pyemma/msm/estimators/__init__.py index 3d0008970..56b52eb53 100644 --- a/pyemma/msm/estimators/__init__.py +++ b/pyemma/msm/estimators/__init__.py @@ -15,6 +15,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import __author__ = 'noe' diff --git a/pyemma/msm/estimators/_dtraj_stats.py b/pyemma/msm/estimators/_dtraj_stats.py index 096a35651..da6d1e61d 100644 --- a/pyemma/msm/estimators/_dtraj_stats.py +++ b/pyemma/msm/estimators/_dtraj_stats.py @@ -18,6 +18,7 @@ from __future__ import absolute_import + import numpy as np from msmtools import estimation as msmest diff --git a/pyemma/msm/estimators/bayesian_hmsm.py b/pyemma/msm/estimators/bayesian_hmsm.py index 9dfb578d3..d644302e1 100644 --- a/pyemma/msm/estimators/bayesian_hmsm.py +++ b/pyemma/msm/estimators/bayesian_hmsm.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import, print_function import numpy as _np diff --git a/pyemma/msm/estimators/bayesian_msm.py b/pyemma/msm/estimators/bayesian_msm.py index 321206d81..8e42e42c5 100644 --- a/pyemma/msm/estimators/bayesian_msm.py +++ b/pyemma/msm/estimators/bayesian_msm.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import diff --git a/pyemma/msm/models/__init__.py b/pyemma/msm/models/__init__.py index 7db732578..0251324d6 100644 --- a/pyemma/msm/models/__init__.py +++ b/pyemma/msm/models/__init__.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import __author__ = 'noe' from .msm import MSM diff --git a/pyemma/msm/models/hmsm.py b/pyemma/msm/models/hmsm.py index 7df98f1fc..40b27d43d 100644 --- a/pyemma/msm/models/hmsm.py +++ b/pyemma/msm/models/hmsm.py @@ -25,6 +25,7 @@ """ +from __future__ import absolute_import import numpy as _np @@ -478,4 +479,4 @@ def simulate(self, N, start=None, stop=None, dt=1): # for each time step, sample microstate for t, h in enumerate(htraj): otraj[t] = output_distributions[h].rvs() # current cluster - return htraj, otraj + return htraj, otraj \ No newline at end of file diff --git a/pyemma/msm/models/hmsm_sampled.py b/pyemma/msm/models/hmsm_sampled.py index 5f2e1298b..80188e07d 100644 --- a/pyemma/msm/models/hmsm_sampled.py +++ b/pyemma/msm/models/hmsm_sampled.py @@ -25,6 +25,7 @@ """ +from __future__ import absolute_import from pyemma._base.model import SampledModel as _SampledModel from pyemma.msm.models.hmsm import HMSM as _HMSM diff --git a/pyemma/msm/models/msm.py b/pyemma/msm/models/msm.py index 54d61bdc2..3e53d33cc 100644 --- a/pyemma/msm/models/msm.py +++ b/pyemma/msm/models/msm.py @@ -25,8 +25,7 @@ """ - -from pyemma._base.serialization.serialization import SerializableMixIn +from __future__ import absolute_import from pyemma._base.serialization.serialization import SerializableMixIn diff --git a/pyemma/msm/models/reactive_flux.py b/pyemma/msm/models/reactive_flux.py index 08902c301..08a5b22f1 100644 --- a/pyemma/msm/models/reactive_flux.py +++ b/pyemma/msm/models/reactive_flux.py @@ -21,6 +21,8 @@ __moduleauthor__ = "Benjamin Trendelkamp-Schroer, Frank Noe" """ +from __future__ import absolute_import +from __future__ import division import numpy as np from msmtools import flux as tptapi diff --git a/pyemma/msm/tests/birth_death_chain.py b/pyemma/msm/tests/birth_death_chain.py index 3c9655c4b..bd3c6e3f4 100644 --- a/pyemma/msm/tests/birth_death_chain.py +++ b/pyemma/msm/tests/birth_death_chain.py @@ -23,6 +23,7 @@ """ +from __future__ import absolute_import import numpy as np @@ -269,4 +270,4 @@ def rate(self, a, b): pi = self.stationary_distribution() qminus = self.committor_backward(a, b) kAB = F / (pi * qminus).sum() - return kAB + return kAB \ No newline at end of file diff --git a/pyemma/msm/tests/test_bayesian_hmsm.py b/pyemma/msm/tests/test_bayesian_hmsm.py index 66d35763a..73b9170ec 100644 --- a/pyemma/msm/tests/test_bayesian_hmsm.py +++ b/pyemma/msm/tests/test_bayesian_hmsm.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import unittest import numpy as np from pyemma.msm import bayesian_hidden_markov_model diff --git a/pyemma/msm/tests/test_bayesian_msm.py b/pyemma/msm/tests/test_bayesian_msm.py index c3324753a..7d78501a7 100644 --- a/pyemma/msm/tests/test_bayesian_msm.py +++ b/pyemma/msm/tests/test_bayesian_msm.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import unittest import numpy as np from pyemma.msm import bayesian_markov_model diff --git a/pyemma/msm/tests/test_cktest.py b/pyemma/msm/tests/test_cktest.py index be1d7df83..74ecc7ec1 100644 --- a/pyemma/msm/tests/test_cktest.py +++ b/pyemma/msm/tests/test_cktest.py @@ -23,6 +23,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_estimator.py b/pyemma/msm/tests/test_estimator.py index 8fea1e95f..3979e8e50 100644 --- a/pyemma/msm/tests/test_estimator.py +++ b/pyemma/msm/tests/test_estimator.py @@ -16,7 +16,7 @@ # along with this program. If not, see . import unittest -from unittest import mock +import mock from pyemma import msm from functools import wraps diff --git a/pyemma/msm/tests/test_hmsm.py b/pyemma/msm/tests/test_hmsm.py index f03516785..fc665a092 100644 --- a/pyemma/msm/tests/test_hmsm.py +++ b/pyemma/msm/tests/test_hmsm.py @@ -21,6 +21,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_its.py b/pyemma/msm/tests/test_its.py index 651389497..0507003b9 100644 --- a/pyemma/msm/tests/test_its.py +++ b/pyemma/msm/tests/test_its.py @@ -24,6 +24,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np from pyemma import msm diff --git a/pyemma/msm/tests/test_its_oom.py b/pyemma/msm/tests/test_its_oom.py index 516f9278a..997caa8cc 100644 --- a/pyemma/msm/tests/test_its_oom.py +++ b/pyemma/msm/tests/test_its_oom.py @@ -20,6 +20,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np @@ -162,4 +163,4 @@ def test_ignore_errors(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/msm/tests/test_msm.py b/pyemma/msm/tests/test_msm.py index d137d0645..fb85cd68a 100644 --- a/pyemma/msm/tests/test_msm.py +++ b/pyemma/msm/tests/test_msm.py @@ -24,6 +24,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/msm/tests/test_oom_msm.py b/pyemma/msm/tests/test_oom_msm.py index 8cc0da5f0..69e2a872a 100644 --- a/pyemma/msm/tests/test_oom_msm.py +++ b/pyemma/msm/tests/test_oom_msm.py @@ -21,6 +21,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np @@ -1650,4 +1651,4 @@ def test_simulate_MSM(self): assert (start == traj[0]) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/msm/tests/test_tpt.py b/pyemma/msm/tests/test_tpt.py index 647457c0a..4311627ab 100644 --- a/pyemma/msm/tests/test_tpt.py +++ b/pyemma/msm/tests/test_tpt.py @@ -24,6 +24,7 @@ """ +from __future__ import absolute_import import unittest import numpy as np from pyemma.util.numeric import assert_allclose @@ -247,4 +248,4 @@ def test_time_units(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/plots/__init__.py b/pyemma/plots/__init__.py index f98e3d6d6..962666ca7 100644 --- a/pyemma/plots/__init__.py +++ b/pyemma/plots/__init__.py @@ -62,6 +62,7 @@ NetworkPlot """ +from __future__ import absolute_import from .timescales import plot_implied_timescales from .plots2d import contour, scatter_contour, plot_free_energy from .networks import plot_markov_model, plot_flux, plot_network, NetworkPlot diff --git a/pyemma/plots/markovtests.py b/pyemma/plots/markovtests.py index 1d16b6deb..e527a3888 100644 --- a/pyemma/plots/markovtests.py +++ b/pyemma/plots/markovtests.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import __author__ = 'noe' import math diff --git a/pyemma/plots/networks.py b/pyemma/plots/networks.py index 80d6a18cb..fa33e9f4c 100644 --- a/pyemma/plots/networks.py +++ b/pyemma/plots/networks.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import numpy as _np import warnings from pyemma.util import types as _types diff --git a/pyemma/plots/plots2d.py b/pyemma/plots/plots2d.py index ded54a33e..4dc4e6d74 100644 --- a/pyemma/plots/plots2d.py +++ b/pyemma/plots/plots2d.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as _np diff --git a/pyemma/plots/tests/test_its.py b/pyemma/plots/tests/test_its.py index 07786ecca..2cc3e0411 100644 --- a/pyemma/plots/tests/test_its.py +++ b/pyemma/plots/tests/test_its.py @@ -22,6 +22,7 @@ @author: gph82 ''' +from __future__ import absolute_import import unittest import numpy as np @@ -60,4 +61,4 @@ def test_process(self): if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/plots/tests/test_markovtests.py b/pyemma/plots/tests/test_markovtests.py index 19d1c7dd4..1483b908c 100644 --- a/pyemma/plots/tests/test_markovtests.py +++ b/pyemma/plots/tests/test_markovtests.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import import unittest import numpy as np import pyemma diff --git a/pyemma/plots/tests/test_networks.py b/pyemma/plots/tests/test_networks.py index a3ce51392..3c80ab908 100644 --- a/pyemma/plots/tests/test_networks.py +++ b/pyemma/plots/tests/test_networks.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/plots/tests/test_plots2d.py b/pyemma/plots/tests/test_plots2d.py index 9d51d38a8..3e8905a96 100644 --- a/pyemma/plots/tests/test_plots2d.py +++ b/pyemma/plots/tests/test_plots2d.py @@ -16,6 +16,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import unittest import numpy as np diff --git a/pyemma/plots/timescales.py b/pyemma/plots/timescales.py index bc5571313..11ec1ff08 100644 --- a/pyemma/plots/timescales.py +++ b/pyemma/plots/timescales.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as _np diff --git a/pyemma/thermo/tests/test_TRAM.py b/pyemma/thermo/tests/test_TRAM.py index fd3d5c2a6..9cbf80ba4 100644 --- a/pyemma/thermo/tests/test_TRAM.py +++ b/pyemma/thermo/tests/test_TRAM.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import unittest diff --git a/pyemma/util/__init__.py b/pyemma/util/__init__.py index 9bfb66daf..97f3473a8 100644 --- a/pyemma/util/__init__.py +++ b/pyemma/util/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from ._config import Config as _Config # default config instance diff --git a/pyemma/util/annotators.py b/pyemma/util/annotators.py index 8356a567c..5843d4fc9 100644 --- a/pyemma/util/annotators.py +++ b/pyemma/util/annotators.py @@ -15,6 +15,7 @@ # # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import warnings from decorator import decorator, decorate diff --git a/pyemma/util/debug.py b/pyemma/util/debug.py index e4361e381..f6bece8b1 100644 --- a/pyemma/util/debug.py +++ b/pyemma/util/debug.py @@ -26,6 +26,7 @@ @author: marscher ''' +from __future__ import absolute_import, print_function import signal from logging import getLogger diff --git a/pyemma/util/files.py b/pyemma/util/files.py index 20574a372..3dd5b894d 100644 --- a/pyemma/util/files.py +++ b/pyemma/util/files.py @@ -22,6 +22,7 @@ @author: marscher ''' +from __future__ import absolute_import, print_function import os import errno @@ -69,4 +70,4 @@ def __enter__(self): return self.tmpdir def __exit__(self, *args): - shutil.rmtree(self.tmpdir, ignore_errors=True) + shutil.rmtree(self.tmpdir, ignore_errors=True) \ No newline at end of file diff --git a/pyemma/util/indices.py b/pyemma/util/indices.py index d2d7d5cfc..81607f4b9 100644 --- a/pyemma/util/indices.py +++ b/pyemma/util/indices.py @@ -16,6 +16,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as np diff --git a/pyemma/util/linalg.py b/pyemma/util/linalg.py index 56243a4b1..51be657aa 100644 --- a/pyemma/util/linalg.py +++ b/pyemma/util/linalg.py @@ -17,6 +17,7 @@ # along with this program. If not, see . +from __future__ import absolute_import import numpy as np import scipy.linalg import scipy.sparse diff --git a/pyemma/util/log.py b/pyemma/util/log.py index c164daadc..219000ce9 100644 --- a/pyemma/util/log.py +++ b/pyemma/util/log.py @@ -20,6 +20,7 @@ @author: marscher ''' +from __future__ import absolute_import import logging from logging.config import dictConfig diff --git a/pyemma/util/numeric.py b/pyemma/util/numeric.py index 897b48f64..d2cdd830b 100644 --- a/pyemma/util/numeric.py +++ b/pyemma/util/numeric.py @@ -21,6 +21,7 @@ @author: marscher ''' +from __future__ import absolute_import from numpy.testing import assert_allclose as assert_allclose_np __all__ = ['assert_allclose', diff --git a/pyemma/util/statistics.py b/pyemma/util/statistics.py index 646cb333f..0447a3784 100644 --- a/pyemma/util/statistics.py +++ b/pyemma/util/statistics.py @@ -23,6 +23,7 @@ @author: noe ''' +from __future__ import absolute_import import numpy as np import math diff --git a/pyemma/util/tests/statistics_test.py b/pyemma/util/tests/statistics_test.py index 43b3b2274..aa9dd0c8f 100644 --- a/pyemma/util/tests/statistics_test.py +++ b/pyemma/util/tests/statistics_test.py @@ -23,6 +23,7 @@ @author: noe ''' +from __future__ import absolute_import import unittest from pyemma.util import statistics import numpy as np @@ -53,4 +54,4 @@ def test_confidence_interval(self): self.assertConfidence(np.random.normal(size=10000), 0.95, 0.01) if __name__ == "__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/util/tests/test_discrete_trajectories.py b/pyemma/util/tests/test_discrete_trajectories.py index b2984fd04..d6df11411 100644 --- a/pyemma/util/tests/test_discrete_trajectories.py +++ b/pyemma/util/tests/test_discrete_trajectories.py @@ -23,6 +23,7 @@ """ +from __future__ import absolute_import import os import unittest @@ -199,4 +200,4 @@ def test_sample_by_state_replace_subset(self): assert(dtraj[sidx[i][t,1]] == subset[i]) if __name__=="__main__": - unittest.main() + unittest.main() \ No newline at end of file diff --git a/pyemma/util/tests/test_shortcut.py b/pyemma/util/tests/test_shortcut.py index aa68f9cdc..780456333 100644 --- a/pyemma/util/tests/test_shortcut.py +++ b/pyemma/util/tests/test_shortcut.py @@ -18,6 +18,7 @@ +from __future__ import absolute_import from pyemma.util.annotators import shortcut, aliased, alias import unittest @@ -52,4 +53,4 @@ def test_alias_class_inst(self): inst = Foo() assert hasattr(inst, "bar2") assert hasattr(inst, "bar3") - self.assertEqual(inst.bar.__doc__, inst.bar2.__doc__) + self.assertEqual(inst.bar.__doc__, inst.bar2.__doc__) \ No newline at end of file From 48d2c3132e090fcd9cce30b1d980b36e7a9c3ca1 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 16:04:28 +0100 Subject: [PATCH 26/43] minor --- pyemma/coordinates/data/sources_merger.py | 4 ++-- pyemma/coordinates/data/util/reader_utils.py | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pyemma/coordinates/data/sources_merger.py b/pyemma/coordinates/data/sources_merger.py index 1e663352f..0b1e7351d 100644 --- a/pyemma/coordinates/data/sources_merger.py +++ b/pyemma/coordinates/data/sources_merger.py @@ -18,10 +18,10 @@ class SourcesMerger(DataSource, SerializableMixIn): sources : list, tuple list of DataSources (Readers, StreamingTransformers etc.) to combine for streaming access. - chunk: int + chunk: int or None chunk size to use for underlying iterators. """ - def __init__(self, sources, chunk=5000): + def __init__(self, sources, chunk=None): super(SourcesMerger, self).__init__(chunksize=chunk) self.sources = sources self._is_reader = True diff --git a/pyemma/coordinates/data/util/reader_utils.py b/pyemma/coordinates/data/util/reader_utils.py index db1024639..907d1a6f9 100644 --- a/pyemma/coordinates/data/util/reader_utils.py +++ b/pyemma/coordinates/data/util/reader_utils.py @@ -23,6 +23,8 @@ import numpy as np import os +from six import string_types + def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): r""" @@ -43,8 +45,6 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): from pyemma.coordinates.data.py_csv_reader import PyCSVReader from pyemma.coordinates.data import FeatureReader from pyemma.coordinates.data.fragmented_trajectory_reader import FragmentedTrajectoryReader - import six - str = six.string_types # fragmented trajectories if (isinstance(input_files, (list, tuple)) and len(input_files) > 0 and @@ -52,15 +52,15 @@ def create_file_reader(input_files, topology, featurizer, chunksize=None, **kw): return FragmentedTrajectoryReader(input_files, topology, chunksize, featurizer) # normal trajectories - if (isinstance(input_files, str) + if (isinstance(input_files, string_types) or (isinstance(input_files, (list, tuple)) - and (any(isinstance(item, str) for item in input_files) + and (any(isinstance(item, string_types) for item in input_files) or len(input_files) is 0))): reader = None # check: if single string create a one-element list - if isinstance(input_files, str): + if isinstance(input_files, string_types): input_list = [input_files] - elif len(input_files) > 0 and all(isinstance(item, str) for item in input_files): + elif len(input_files) > 0 and all(isinstance(item, string_types) for item in input_files): input_list = input_files else: if len(input_files) is 0: @@ -177,7 +177,7 @@ def preallocate_empty_trajectory(top, n_frames=1): def enforce_top(top): - if isinstance(top, str): + if isinstance(top, string_types): top = md.load(top).top elif isinstance(top, md.Trajectory): top = top.top From 3aee7e76c2ccc73d5d362787f8dbed1332eaee84 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 16:19:30 +0100 Subject: [PATCH 27/43] [lagged covar] properly deprecate chunksize arg. --- pyemma/coordinates/estimation/covariance.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pyemma/coordinates/estimation/covariance.py b/pyemma/coordinates/estimation/covariance.py index a6df2e9d7..5d84726a9 100644 --- a/pyemma/coordinates/estimation/covariance.py +++ b/pyemma/coordinates/estimation/covariance.py @@ -15,6 +15,7 @@ # You should have received a copy of the GNU Lesser General Public License # along with this program. If not, see . +from __future__ import absolute_import import numpy as np import numbers @@ -77,13 +78,18 @@ class LaggedCovariance(StreamingEstimator): skip : int, optional, default=0 skip the first initial n frames per trajectory. chunksize : deprecated, default=NotImplemented - The chunk size can be set during estimation. + The chunk size should now be set during estimation. """ def __init__(self, c00=True, c0t=False, ctt=False, remove_constant_mean=None, remove_data_mean=False, reversible=False, bessel=True, sparse_mode='auto', modify_data=False, lag=0, weights=None, stride=1, skip=0, chunksize=NotImplemented, ncov_max=float('inf')): super(LaggedCovariance, self).__init__() + if chunksize is not NotImplemented: + import warnings + from pyemma.util.exceptions import PyEMMA_DeprecationWarning + warnings.warn('passed deprecated argument chunksize to LaggedCovariance. Will be ignored!', + category=PyEMMA_DeprecationWarning) if (c0t or ctt) and lag == 0: raise ValueError("lag must be positive if c0t=True or ctt=True") From 2d9dd9677436408323dcf3eb3b607c79c454e8e4 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 18:27:25 +0100 Subject: [PATCH 28/43] fix py27 --- devtools/conda-recipe/run_test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/devtools/conda-recipe/run_test.py b/devtools/conda-recipe/run_test.py index 71bef7209..dcece6801 100644 --- a/devtools/conda-recipe/run_test.py +++ b/devtools/conda-recipe/run_test.py @@ -9,7 +9,9 @@ # where to write junit xml junit_xml = os.path.join(os.getenv('CIRCLE_TEST_REPORTS', os.path.expanduser('~')), 'reports', 'junit.xml') -os.makedirs(os.path.dirname(junit_xml), exist_ok=True) +target_dir = os.path.dirname(junit_xml) +if not os.path.exists(target_dir): + os.makedirs(target_dir) print('junit destination:', junit_xml) njobs_args = '-p no:xdist' if os.getenv('TRAVIS') else '-n2' From b682e7f3ab169c77449ee4074ce90cee316f0c54 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 19:09:34 +0100 Subject: [PATCH 29/43] [test-vamp] compare partial_fit output with a numerical tol of 1e-15 --- pyemma/coordinates/tests/test_vamp.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index d97cde6af..9babfb097 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -104,19 +104,21 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params = vamp._model.get_model_params() model_params2 = vamp2._model.get_model_params() + atol = 1e-15 + for n in model_params.keys(): if model_params[n] is not None and model_params2[n] is not None: - np.testing.assert_allclose(model_params[n], model_params2[n]) + np.testing.assert_allclose(model_params[n], model_params2[n], atol=atol) vamp2.singular_values # trigger diagonalization vamp2.right = True for t, ref in zip(trajs, phi_trajs): - np.testing.assert_allclose(vamp2.transform(t[tau:]), ref) + np.testing.assert_allclose(vamp2.transform(t[tau:]), ref, atol=atol) vamp2.right = False for t, ref in zip(trajs, psi_trajs): - np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref) + np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref, atol=atol) def generate(T, N_steps, s0=0): From 18d2d3b21363de142e6329293e92694c4e5dc2f7 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 19:31:09 +0100 Subject: [PATCH 30/43] [test-vamp] compare only 2 significant digits vs. MSM score for VAMPE --- pyemma/coordinates/tests/test_vamp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 9babfb097..331a2430f 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -273,7 +273,7 @@ def test_score_vs_MSM(self): vamp_train = pyemma_api_vamp(data=trajs_train, lag=self.lag, dim=1.0) score_vamp = vamp_train.score(test_data=trajs_test, score_method=m) - self.assertAlmostEqual(score_msm, score_vamp, places=3, msg=m) + self.assertAlmostEqual(score_msm, score_vamp, places=2 if m == 'VAMPE' else 3, msg=m) if __name__ == "__main__": unittest.main() From de69bf17f0cd4aa76b9bbfd9779fc597904fa545 Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 20:20:42 +0100 Subject: [PATCH 31/43] lower tol, fix codacy hint. --- pyemma/coordinates/tests/test_vamp.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index 331a2430f..ecb17a99c 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -104,7 +104,7 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params = vamp._model.get_model_params() model_params2 = vamp2._model.get_model_params() - atol = 1e-15 + atol = 1e-14 for n in model_params.keys(): if model_params[n] is not None and model_params2[n] is not None: @@ -209,13 +209,13 @@ def test_CK_expectation_against_MSM(self): pred = cktest.predictions[1:] est = cktest.estimates[1:] - for i in range(len(pred)): + for i, (est_, pred_) in enumerate(zip(est, pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) msm_esti = self.p0.T.dot(msm.P).dot(obs) msm_pred = self.p0.T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs) - np.testing.assert_allclose(pred[i], msm_pred, atol=self.atol) - np.testing.assert_allclose(est[i], msm_esti, atol=self.atol) - np.testing.assert_allclose(est[i], pred[i], atol=0.006) + np.testing.assert_allclose(pred_, msm_pred, atol=self.atol) + np.testing.assert_allclose(est_, msm_esti, atol=self.atol) + np.testing.assert_allclose(est_, pred_, atol=0.006) def test_CK_covariances_of_singular_functions(self): #from pyemma import config @@ -233,13 +233,13 @@ def test_CK_covariances_against_MSM(self): pred = cktest.predictions[1:] est = cktest.estimates[1:] - for i in range(len(pred)): + for i, (est_, pred_) in enumerate(zip(est, pred)): msm = estimate_markov_model(dtrajs=self.dtrajs, lag=self.lag*(i+1), reversible=False) msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs).T msm_pred = (self.p0 * sta).T.dot(np.linalg.matrix_power(self.msm.P, (i+1))).dot(obs).T - np.testing.assert_allclose(np.diag(pred[i]), np.diag(msm_pred), atol=self.atol) - np.testing.assert_allclose(np.diag(est[i]), np.diag(msm_esti), atol=self.atol) - np.testing.assert_allclose(np.diag(est[i]), np.diag(pred[i]), atol=0.006) + np.testing.assert_allclose(np.diag(pred_), np.diag(msm_pred), atol=self.atol) + np.testing.assert_allclose(np.diag(est_), np.diag(msm_esti), atol=self.atol) + np.testing.assert_allclose(np.diag(est_), np.diag(pred_), atol=0.006) def test_self_score_with_MSM(self): T = self.msm.P From 6ad85e6a2142e8b7c831c2c637f4b0b5bb99a75a Mon Sep 17 00:00:00 2001 From: marscher Date: Mon, 5 Feb 2018 20:21:10 +0100 Subject: [PATCH 32/43] fix some codacy issues. --- pyemma/_base/estimator.py | 11 ++++++----- pyemma/util/_config.py | 4 ++-- pyemma/util/annotators.py | 1 - pyemma/util/types.py | 8 ++++---- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pyemma/_base/estimator.py b/pyemma/_base/estimator.py index 77453efe3..64a3d1b68 100644 --- a/pyemma/_base/estimator.py +++ b/pyemma/_base/estimator.py @@ -299,7 +299,8 @@ def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=N if evaluate is not None and evaluate_args is not None and len(evaluate) != len(evaluate_args): raise ValueError("length mismatch: evaluate ({}) and evaluate_args ({})".format(len(evaluate), len(evaluate_args))) - if progress_reporter is not None and show_progress: + show_progress = progress_reporter is not None and show_progress + if show_progress: progress_reporter._progress_register(len(estimators), stage=0, description="estimating %s" % str(estimator.__class__.__name__)) @@ -317,7 +318,7 @@ def estimate_param_scan(estimator, X, param_sets, evaluate=None, evaluate_args=N from pathos.multiprocessing import Pool as Parallel pool = Parallel(processes=n_jobs) args = list(task_iter) - if progress_reporter is not None: + if show_progress: from pyemma._base.model import SampledModel for a in args: if isinstance(a[0], SampledModel): @@ -351,7 +352,7 @@ def error_callback(*args, **kw): estimators[0].logger.debug('estimating %s with n_jobs=1 because of the setting or ' 'you not have a POSIX system', estimator) res = [] - if progress_reporter is not None: + if show_progress: from pyemma._base.model import SampledModel if isinstance(estimator, SampledModel): for e in estimators: @@ -360,10 +361,10 @@ def error_callback(*args, **kw): for estimator, param_set in zip(estimators, param_sets): res.append(_estimate_param_scan_worker(estimator, param_set, X, evaluate, evaluate_args, failfast, return_exceptions)) - if progress_reporter is not None and show_progress: + if show_progress: progress_reporter._progress_update(1, stage=0) - if progress_reporter is not None and show_progress: + if show_progress: progress_reporter._progress_force_finish(0) # done diff --git a/pyemma/util/_config.py b/pyemma/util/_config.py index 8bfe25849..6ee2184b5 100644 --- a/pyemma/util/_config.py +++ b/pyemma/util/_config.py @@ -172,10 +172,10 @@ def cfg_dir(self, pyemma_cfg_dir): if not os.path.exists(pyemma_cfg_dir): try: mkdir_p(pyemma_cfg_dir) - except EnvironmentError: - raise ConfigDirectoryException("could not create configuration directory '%s'" % pyemma_cfg_dir) except NotADirectoryError: # on Python 3 raise ConfigDirectoryException("pyemma cfg dir (%s) is not a directory" % pyemma_cfg_dir) + except EnvironmentError: + raise ConfigDirectoryException("could not create configuration directory '%s'" % pyemma_cfg_dir) if not os.path.isdir(pyemma_cfg_dir): raise ConfigDirectoryException("%s is no valid directory" % pyemma_cfg_dir) diff --git a/pyemma/util/annotators.py b/pyemma/util/annotators.py index 5843d4fc9..ecbe05f37 100644 --- a/pyemma/util/annotators.py +++ b/pyemma/util/annotators.py @@ -28,7 +28,6 @@ 'deprecated', 'shortcut', 'fix_docs', - 'estimation_required', ] diff --git a/pyemma/util/types.py b/pyemma/util/types.py index e5192570e..65a0834f3 100644 --- a/pyemma/util/types.py +++ b/pyemma/util/types.py @@ -173,8 +173,8 @@ def ensure_dtraj_list(dtrajs): if is_list_of_int(dtrajs): return [np.array(dtrajs, dtype=int)] else: - for i in range(len(dtrajs)): - dtrajs[i] = ensure_dtraj(dtrajs[i]) + for i, dtraj in enumerate(dtrajs): + dtrajs[i] = ensure_dtraj(dtraj) return dtrajs else: return [ensure_dtraj(dtrajs)] @@ -478,8 +478,8 @@ def ensure_traj_list(trajs): return [np.array(trajs)[:,None]] else: res = [] - for i in range(len(trajs)): - res.append(ensure_traj(trajs[i])) + for traj in trajs: + res.append(ensure_traj(traj)) return res else: # looks like this is one trajectory From 1c1570fdcac915232660758be02e91f636d2f16c Mon Sep 17 00:00:00 2001 From: "Martin K. Scherer" Date: Mon, 5 Feb 2018 23:42:48 +0100 Subject: [PATCH 33/43] fix py2 issue (missing exception) --- pyemma/util/_config.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pyemma/util/_config.py b/pyemma/util/_config.py index 6ee2184b5..86dac0225 100644 --- a/pyemma/util/_config.py +++ b/pyemma/util/_config.py @@ -17,6 +17,7 @@ from __future__ import absolute_import, print_function +import six from six.moves.configparser import ConfigParser import os import shutil @@ -32,6 +33,10 @@ class ReadConfigException(Exception): pass +if six.PY2: + class NotADirectoryError(Exception): + pass + __all__ = ('Config', ) From 4a2568e58fabb8245f0f16c583ca7b3141442f4c Mon Sep 17 00:00:00 2001 From: "Martin K. Scherer" Date: Mon, 5 Feb 2018 23:48:30 +0100 Subject: [PATCH 34/43] windoze sissy --- pyemma/coordinates/tests/test_vamp.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index ecb17a99c..e749bfe9b 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -104,21 +104,23 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params = vamp._model.get_model_params() model_params2 = vamp2._model.get_model_params() - atol = 1e-14 + import sys + atol = 1e-14 if sys.platform == 'win32' else 1e-15 + rtol = 1e-6 if sys.platform == 'win32' else 1e-7 for n in model_params.keys(): if model_params[n] is not None and model_params2[n] is not None: - np.testing.assert_allclose(model_params[n], model_params2[n], atol=atol) + np.testing.assert_allclose(model_params[n], model_params2[n], rtol=rtol, atol=atol) vamp2.singular_values # trigger diagonalization vamp2.right = True for t, ref in zip(trajs, phi_trajs): - np.testing.assert_allclose(vamp2.transform(t[tau:]), ref, atol=atol) + np.testing.assert_allclose(vamp2.transform(t[tau:]), ref, rtol=rtol, atol=atol) vamp2.right = False for t, ref in zip(trajs, psi_trajs): - np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref, atol=atol) + np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref, rtol=rtol, atol=atol) def generate(T, N_steps, s0=0): From c68c1bdda041557f22cee1e68f6bb5d402e66b84 Mon Sep 17 00:00:00 2001 From: marscher Date: Tue, 6 Feb 2018 12:05:50 +0100 Subject: [PATCH 35/43] use assert_allclose_ingore_phase for partial_fit test --- pyemma/coordinates/tests/test_vamp.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index e749bfe9b..ef6db2f58 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -104,13 +104,19 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params = vamp._model.get_model_params() model_params2 = vamp2._model.get_model_params() - import sys - atol = 1e-14 if sys.platform == 'win32' else 1e-15 - rtol = 1e-6 if sys.platform == 'win32' else 1e-7 + #import sys + #atol = 1e-14 if sys.platform == 'win32' else 1e-15 + #rtol = 1e-6 if sys.platform == 'win32' else 1e-7 + atol = 1e-15 + rtol = 1e-6 for n in model_params.keys(): if model_params[n] is not None and model_params2[n] is not None: - np.testing.assert_allclose(model_params[n], model_params2[n], rtol=rtol, atol=atol) + if n not in ('U', 'V'): + np.testing.assert_allclose(model_params[n], model_params2[n], rtol=rtol, atol=atol, + err_msg='failed for model param %s' % n) + else: + assert_allclose_ignore_phase(model_params[n], model_params2[n], atol=atol) vamp2.singular_values # trigger diagonalization From 73bfd6348929cf21699f084ee7a43dab7586947f Mon Sep 17 00:00:00 2001 From: marscher Date: Tue, 6 Feb 2018 13:47:23 +0100 Subject: [PATCH 36/43] [test-vamp] compare projected trajs phase agnostic. --- pyemma/coordinates/tests/test_vamp.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index ef6db2f58..c5a680ab7 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -104,9 +104,6 @@ def do_test(self, dim, rank, test_partial_fit=False): model_params = vamp._model.get_model_params() model_params2 = vamp2._model.get_model_params() - #import sys - #atol = 1e-14 if sys.platform == 'win32' else 1e-15 - #rtol = 1e-6 if sys.platform == 'win32' else 1e-7 atol = 1e-15 rtol = 1e-6 @@ -122,11 +119,11 @@ def do_test(self, dim, rank, test_partial_fit=False): vamp2.right = True for t, ref in zip(trajs, phi_trajs): - np.testing.assert_allclose(vamp2.transform(t[tau:]), ref, rtol=rtol, atol=atol) + assert_allclose_ignore_phase(vamp2.transform(t[tau:]), ref, rtol=rtol, atol=atol) vamp2.right = False for t, ref in zip(trajs, psi_trajs): - np.testing.assert_allclose(vamp2.transform(t[0:-tau]), ref, rtol=rtol, atol=atol) + assert_allclose_ignore_phase(vamp2.transform(t[0:-tau]), ref, rtol=rtol, atol=atol) def generate(T, N_steps, s0=0): @@ -139,12 +136,13 @@ def generate(T, N_steps, s0=0): return dtraj -def assert_allclose_ignore_phase(A, B, atol): +def assert_allclose_ignore_phase(A, B, atol, rtol=1e-5): A = np.atleast_2d(A) B = np.atleast_2d(B) assert A.shape == B.shape for i in range(B.shape[1]): - assert np.allclose(A[:, i], B[:, i], atol=atol) or np.allclose(A[:, i], -B[:, i], atol=atol) + assert (np.allclose(A[:, i], B[:, i], atol=atol, rtol=rtol) + or np.allclose(A[:, i], -B[:, i], atol=atol, rtol=rtol)) class TestVAMPModel(unittest.TestCase): From 886c19d7b268a04728e8e0c5d83168194360d359 Mon Sep 17 00:00:00 2001 From: marscher Date: Tue, 6 Feb 2018 13:48:20 +0100 Subject: [PATCH 37/43] [vamp] swapped observables, statistics arg to be consistent. removed obsolete todos added doc strings --- pyemma/coordinates/api.py | 1 + pyemma/coordinates/transform/vamp.py | 98 +++++++++++++++++++++------- 2 files changed, 77 insertions(+), 22 deletions(-) diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index c7fdafbdd..56c774db2 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -1262,6 +1262,7 @@ def vamp(data=None, lag=10, dim=None, scaling=None, right=True, ncov_max=float(' Parameters ---------- + data : lag : int lag time dim : float or int diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index ac6dbcc30..7a8db68b1 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -123,42 +123,38 @@ def dimension(self): 'transformer has not yet been estimated. Result is only an approximation.')) return self.dim - def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, - observables_mean_free=False): + def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, statistics_mean_free=False): r"""Compute future expectation of observable or covariance using the approximated Koopman operator. - TODO: this requires some discussion - TODO: add equations Parameters ---------- + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + statistics : np.ndarray((input_dimension, n_statistics)), optional Coefficients that express one or multiple statistics in the basis of the input features. This parameter can be None. In that case, this method returns the future expectation value of the observable(s). - observables : np.ndarray((input_dimension, n_observables)) - Coefficients that express one or multiple observables in - the basis of the input features. - lag_multiple : int If > 1, extrapolate to a multiple of the estimator's lag time by assuming Markovianity of the approximated Koopman operator. - statistics_mean_free : bool, default=False - If true, coefficients in statistics refer to the input - features with feature means removed. - If false, coefficients in statistics refer to the - unmodified input features. - observables_mean_free : bool, default=False If true, coefficients in observables refer to the input features with feature means removed. If false, coefficients in observables refer to the unmodified input features. + statistics_mean_free : bool, default=False + If true, coefficients in statistics refer to the input + features with feature means removed. + If false, coefficients in statistics refer to the + unmodified input features. """ # TODO: implement the case lag_multiple=0 @@ -266,9 +262,7 @@ def score(self, test_model=None, score_method='VAMP2'): ------- """ - # TODO: test me! # TODO: implement for TICA too - # TODO: check compatibility of models, e.g. equal lag time, equal features? if test_model is None: test_model = self Uk = self.U[:, 0:self.dimension()] @@ -344,7 +338,9 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, Use only every stride-th time step. By default, every time step is used. skip : int, default=0 skip the first initial n frames per trajectory. - + ncov_max : int, default=infinity + limit the memory usage of the algorithm from [3]_ to an amount that corresponds + to ncov_max additional copies of each correlation matrix References ---------- @@ -352,6 +348,8 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, arXiv:1707.04659v1 .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for + computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. """ StreamingEstimationTransformer.__init__(self) @@ -457,9 +455,6 @@ def _transform_array(self, X): return Y.astype(self.output_type()) - def output_type(self): - return StreamingEstimationTransformer.output_type(self) - @property def singular_values(self): r"""Singular values of VAMP (usually denoted :math:`\sigma`) @@ -516,14 +511,30 @@ def show_progress(self, value): if self._covar is not None: self._covar.show_progress = value - def expectation(self, statistics, observables, lag_multiple=1, statistics_mean_free=False, - observables_mean_free=False): + def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, + statistics_mean_free=False): return self._model.expectation(statistics, observables, lag_multiple=lag_multiple, statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True, iterable=None): + """ + + Parameters + ---------- + n_observables + observables + statistics + mlags + n_jobs + show_progress + iterable + + Returns + ------- + + """ if n_observables is not None: if n_observables > self.dimension(): warnings.warn('Selected singular functions as observables but dimension ' @@ -571,6 +582,49 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): __serialize_version = 0 __serialize_fields = ('nsets', 'statistics', 'observables', 'observables_mean_free', 'statistics_mean_free') + """ + + Parameters + ---------- + model : Model + Model to be tested + + estimator : Estimator + Parametrized Estimator that has produced the model + + statistics : np.ndarray((input_dimension, n_statistics)), optional + Coefficients that express one or multiple statistics in + the basis of the input features. + This parameter can be None. In that case, this method + returns the future expectation value of the observable(s). + + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + + mlags : int or int-array, default=10 + multiples of lag times for testing the Model, e.g. range(10). + A single int will trigger a range, i.e. mlags=10 maps to + mlags=range(10). The setting None will choose mlags automatically + according to the longest available trajectory + Note that you need to be able to do a model prediction for each + of these lag time multiples, e.g. the value 0 only make sense + if _predict_observables(0) will work. + + conf : float, default = 0.95 + confidence interval for errors + + err_est : bool, default=False + if the Estimator is capable of error calculation, will compute + errors for each tau estimate. This option can be computationally + expensive. + + n_jobs : int, default=1 + how many jobs to use during calculation + + show_progress : bool, default=True + Show progressbars for calculation? + """ def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, mlags=10, n_jobs=1, show_progress=True): LaggedModelValidator.__init__(self, model, estimator, mlags=mlags, From 5d6f4d35e876e34f4c9e0fffc4f38b706ca9f230 Mon Sep 17 00:00:00 2001 From: marscher Date: Tue, 6 Feb 2018 14:27:07 +0100 Subject: [PATCH 38/43] fix argument order --- pyemma/coordinates/tests/test_vamp.py | 4 +--- pyemma/coordinates/transform/vamp.py | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pyemma/coordinates/tests/test_vamp.py b/pyemma/coordinates/tests/test_vamp.py index c5a680ab7..2d04a8eb1 100644 --- a/pyemma/coordinates/tests/test_vamp.py +++ b/pyemma/coordinates/tests/test_vamp.py @@ -224,9 +224,7 @@ def test_CK_expectation_against_MSM(self): np.testing.assert_allclose(est_, pred_, atol=0.006) def test_CK_covariances_of_singular_functions(self): - #from pyemma import config - #config.show_progress_bars = False - cktest = self.vamp.cktest(n_observables=2, mlags=4) # auto + cktest = self.vamp.cktest(n_observables=2, mlags=4) # auto pred = cktest.predictions[1:] est = cktest.estimates[1:] error = np.max(np.abs(np.array(pred) - np.array(est))) / max(np.max(pred), np.max(est)) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 7a8db68b1..3bacbe892 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -513,7 +513,7 @@ def show_progress(self, value): def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, statistics_mean_free=False): - return self._model.expectation(statistics, observables, lag_multiple=lag_multiple, + return self._model.expectation(observables, statistics, lag_multiple=lag_multiple, statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) @@ -644,7 +644,7 @@ def _compute_observables(self, model, estimator, mlag=1): else: return np.zeros((self.observables.shape[1], self.statistics.shape[1])) + np.nan else: - return model.expectation(self.statistics, self.observables, lag_multiple=mlag, + return model.expectation(statistics=self.statistics, observables=self.observables, lag_multiple=mlag, statistics_mean_free=self.statistics_mean_free, observables_mean_free=self.observables_mean_free) From 95c232187100b0e3efb80ac1926851c297fd4d62 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 6 Feb 2018 15:53:38 +0100 Subject: [PATCH 39/43] [vamp] add documentation --- pyemma/coordinates/transform/vamp.py | 322 +++++++++++++++++++++++---- 1 file changed, 276 insertions(+), 46 deletions(-) diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 7a8db68b1..af8d4a3c4 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -57,18 +57,21 @@ def set_model_params(self, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_values, @property def U(self): + "Tranformation matrix that represents the linear map from feature space to the space of left singular functions." if not self._svd_performed: self._diagonalize() return self._U @property def V(self): + "Tranformation matrix that represents the linear map from feature space to the space of right singular functions." if not self._svd_performed: self._diagonalize() return self._V @property def singular_values(self): + "The singular values of the half-weighted Koopman matrix" if not self._svd_performed: self._diagonalize() return self._singular_values @@ -126,8 +129,6 @@ def dimension(self): def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, statistics_mean_free=False): r"""Compute future expectation of observable or covariance using the approximated Koopman operator. - TODO: add equations - Parameters ---------- observables : np.ndarray((input_dimension, n_observables)) @@ -146,15 +147,56 @@ def expectation(self, observables, statistics, lag_multiple=1, observables_mean_ operator. observables_mean_free : bool, default=False - If true, coefficients in observables refer to the input + If true, coefficients in `observables` refer to the input features with feature means removed. - If false, coefficients in observables refer to the + If false, coefficients in `observables` refer to the unmodified input features. + statistics_mean_free : bool, default=False - If true, coefficients in statistics refer to the input + If true, coefficients in `statistics` refer to the input features with feature means removed. - If false, coefficients in statistics refer to the + If false, coefficients in `statistics` refer to the unmodified input features. + + Notes + ----- + A "future expectation" of a observable g is the average of g computed + over a time window that has the same total length as the input data + from which the Koopman operator was estimated but is shifted + by lag_multiple*tau time steps into the future (where tau is the lag + time). + + It is computed with the equation: + + .. math:: + + \mathbb{E}[g]_{\rho_{n}}=\mathbf{q}^{T}\mathbf{P}^{n-1}\mathbf{e}_{1} + + where + + .. math:: + + P_{ij}=\sigma_{i}\langle\psi_{i},\phi_{j}\rangle_{\rho_{1}} + + and + + .. math:: + + q_{i}=\langle g,\phi_{i}\rangle_{\rho_{1}} + + and :math:`\mathbf{e}_{1}` is the first canonical unit vector. + + + A model prediction of time-lagged covariances between the + observable f and the statistic g at a lag-time of lag_multiple*tau + is computed with the equation: + + .. math:: + + \mathrm{cov}[g,\,f;n\tau]=\mathbf{q}^{T}\mathbf{P}^{n-1}\boldsymbol{\Sigma}\mathbf{r} + + where :math:`r_{i}=\langle\psi_{i},f\rangle_{\rho_{0}}` and + :math:`\boldsymbol{\Sigma}=\mathrm{diag(\boldsymbol{\sigma})}` . """ # TODO: implement the case lag_multiple=0 @@ -199,12 +241,17 @@ def expectation(self, observables, statistics, lag_multiple=1, observables_mean_ return Q.dot(P)[:, 0] def _diagonalize(self, scaling=None): - """ performs SVD on covariance matrices and save left, right singular vectors and values in the model. + """Performs SVD on covariance matrices and save left, right singular vectors and values in the model. Parameters ---------- - scaling: str or None - + scaling : None or string, default=None + Scaling to be applied to the VAMP modes upon transformation + * None: no scaling will be applied, variance of the singular + functions is 1 + * 'kinetic map' or 'km': singular functions are scaled by + singular value. Note that only the left singular functions + induce a kinetic map. """ L0 = spd_inv_sqrt(self.C00) @@ -231,8 +278,7 @@ def _diagonalize(self, scaling=None): if scaling is None: pass elif scaling in ['km', 'kinetic map']: - U *= s[np.newaxis, 0:m] ## TODO: check left/right, ask Hao - V *= s[np.newaxis, 0:m] ## TODO: check left/right, ask Hao + U *= s[np.newaxis, 0:m] else: raise ValueError('unexpected value (%s) of "scaling"' % scaling) @@ -241,26 +287,42 @@ def _diagonalize(self, scaling=None): self._svd_performed = True def score(self, test_model=None, score_method='VAMP2'): - """ + """Compute the VAMP score for this model or the cross-validation score between self and a second model. Parameters ---------- - test_model + test_model : VAMPModel, optional, default=None + If `test_model` is not None, this method computes the cross-validation score + between self and `test_model`. It is assumed that self was estimated from + the "training" data and `test_model` was estimated from the "test" data. The + score is computed for one realization of self and `test_model`. Estimation + of the average cross-validation core and partitioning of data into test and + training part is not performed by this method. + If `test_model` is None, this method computes the VAMP score for the model + contained in self. + score_method : str, optional, default='VAMP2' - Overwrite scoring method to be used if desired. If `None`, the estimators scoring - method will be used. - Available scores are based on the variational approach for Markov processes [1]_ [2]_ : + Available scores are based on the variational approach for Markov processes [1]_: - * 'VAMP1' Sum of singular values of the symmetrized transition matrix [2]_ . - If the MSM is reversible, this is equal to the sum of transition - matrix eigenvalues, also called Rayleigh quotient [1]_ [3]_ . - * 'VAMP2' Sum of squared singular values of the symmetrized transition matrix [2]_ . - If the MSM is reversible, this is equal to the kinetic variance [4]_ . - * 'VAMPE' ... + * 'VAMP1' Sum of singular values of the half-weighted Koopman matrix [1]_ . + If the model is reversible, this is equal to the sum of + Koopman matrix eigenvalues, also called Rayleigh quotient [1]_. + * 'VAMP2' Sum of squared singular values of the half-weighted Koopman matrix [1]_ . + If the model is reversible, this is equal to the kinetic variance [2]_ . + * 'VAMPE' Approximation error of the estimated Koopman operator with respect to + the true Koopman operator up to an additive constant [1]_ . Returns ------- + If `test_model` is not None, returns the cross-validation VAMP score between + self and `test_model`. Otherwise return the selected VAMP-score of self. + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 """ # TODO: implement for TICA too if test_model is None: @@ -316,9 +378,11 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, that needs to be explained is greater than the percentage specified by dim. scaling : None or string - Scaling to be applied to the VAMP modes upon transformation - * None: no scaling will be applied, variance along the mode is 1 - * 'kinetic map' or 'km': modes are scaled by singular value + Scaling to be applied to the VAMP order parameters upon transformation + * None: no scaling will be applied, variance of the order parameters is 1 + * 'kinetic map' or 'km': order parameters are scaled by singular value + Only the left singular functions induce a kinetic map. + Therefore scaling='km' is only effective if `right` is False. right : boolean Whether to compute the right singular functions. If right==True, get_output() will return the right singular @@ -342,6 +406,9 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, limit the memory usage of the algorithm from [3]_ to an amount that corresponds to ncov_max additional copies of each correlation matrix + Notes + ----- + References ---------- .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. @@ -443,6 +510,9 @@ def _transform_array(self, X): ------- Y : ndarray(n,) the projected data + If `self.right` is True, projection will be on the right singular + functions. Otherwise, projection will be on the left singular + functions. """ # TODO: in principle get_output should not return data for *all* frames! # TODO: implement our own iterators? This would also include random access to be complete... @@ -457,7 +527,7 @@ def _transform_array(self, X): @property def singular_values(self): - r"""Singular values of VAMP (usually denoted :math:`\sigma`) + r"""Singular values of the half-weighted Koopman matrix (usually denoted :math:`\sigma`) Returns ------- @@ -467,31 +537,49 @@ def singular_values(self): @property def singular_vectors_right(self): - r"""Right singular vectors V of the VAMP problem, columnwise + r"""Tranformation matrix that represents the linear map from feature space to the space of right singular functions. + + Notes + ----- + Right "singular vectors" V of the VAMP problem (equation 13 in [1]_), columnwise Returns ------- - eigenvectors: 2-D ndarray + vectors: 2-D ndarray Coefficients that express the right singular functions in the basis of mean-free input features. + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 """ return self._model.V @property def singular_vectors_left(self): - r"""Left singular vectors U of the VAMP problem, columnwise + r"""Tranformation matrix that represents the linear map from feature space to the space of left singular functions. + + Notes + ----- + Left "singular vectors" U of the VAMP problem (equation 13 in [1]_), columnwise Returns ------- - eigenvectors: 2-D ndarray + vectors: 2-D ndarray Coefficients that express the left singular functions in the basis of mean-free input features. + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 """ return self._model.U @property def cumvar(self): - r"""Cumulative sum of the squared and normalized VAMP singular values + r"""Cumulative sum of the squared and normalized singular values Returns ------- @@ -513,23 +601,117 @@ def show_progress(self, value): def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, statistics_mean_free=False): + r"""Compute future expectation of observable or covariance using the approximated Koopman operator. + + Parameters + ---------- + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + + statistics : np.ndarray((input_dimension, n_statistics)), optional + Coefficients that express one or multiple statistics in + the basis of the input features. + This parameter can be None. In that case, this method + returns the future expectation value of the observable(s). + + lag_multiple : int + If > 1, extrapolate to a multiple of the estimator's lag + time by assuming Markovianity of the approximated Koopman + operator. + + observables_mean_free : bool, default=False + If true, coefficients in `observables` refer to the input + features with feature means removed. + If false, coefficients in `observables` refer to the + unmodified input features. + + statistics_mean_free : bool, default=False + If true, coefficients in `statistics` refer to the input + features with feature means removed. + If false, coefficients in `statistics` refer to the + unmodified input features. + + Notes + ----- + A "future expectation" of a observable g is the average of g computed + over a time window that has the same total length as the input data + from which the Koopman operator was estimated but is shifted + by lag_multiple*tau time steps into the future (where tau is the lag + time). + + It is computed with the equation: + + .. math:: + + \mathbb{E}[g]_{\rho_{n}}=\mathbf{q}^{T}\mathbf{P}^{n-1}\mathbf{e}_{1} + + where + + .. math:: + + P_{ij}=\sigma_{i}\langle\psi_{i},\phi_{j}\rangle_{\rho_{1}} + + and + + .. math:: + + q_{i}=\langle g,\phi_{i}\rangle_{\rho_{1}} + + and :math:`\mathbf{e}_{1}` is the first canonical unit vector. + + + A model prediction of time-lagged covariances between the + observable f and the statistic g at a lag-time of lag_multiple*tau + is computed with the equation: + + .. math:: + + \mathrm{cov}[g,\,f;n\tau]=\mathbf{q}^{T}\mathbf{P}^{n-1}\boldsymbol{\Sigma}\mathbf{r} + + where :math:`r_{i}=\langle\psi_{i},f\rangle_{\rho_{0}}` and + :math:`\boldsymbol{\Sigma}=\mathrm{diag(\boldsymbol{\sigma})}` . + """ return self._model.expectation(statistics, observables, lag_multiple=lag_multiple, statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True, iterable=None): - """ + """Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times. Parameters ---------- - n_observables - observables - statistics - mlags - n_jobs - show_progress - iterable + n_observables : int, optional, default=None + Limit the number of default observables (and of default statistics) + to this number. + Only used if `observables` are None or `statistics` are None. + + observables : np.ndarray((input_dimension, n_observables)) or 'psi' + Coefficients that express one or multiple observables in + the basis of the input features. + This parameter can be 'psi'. In that case, this the dominant + right singular functions of the Koopman operator estimated + at the smallest lag time are used as observables. + + statistics : np.ndarray((input_dimension, n_statistics)) or 'phi' + Coefficients that express one or multiple statistics in + the basis of the input features. + This parameter can be 'phi'. In that case, this the dominant + left singular functions of the Koopman operator estimated + at the smallest lag time are used as statistics. + + mlags : int, default=10 + + n_jobs : int, default=1 + + show_progress : bool, default=True + + iterable : any data format that `pyemma.coordinates.vamp()` accepts as input, optional + It `iterable` is None, the same data source with which VAMP + was initialized will be used for all estimation. + Otherwise, all estimates (not predictions) from data will be computed + from the data contained in `iterable`. Returns ------- @@ -568,6 +750,46 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= return ck def score(self, test_data=None, score_method='VAMP2'): + """Compute the VAMP score for this model or the cross-validation score between self and a second model estimated form different data. + + Parameters + ---------- + test_data : any data format that `pyemma.coordinates.vamp()` accepts as input + If `test_data` is not None, this method computes the cross-validation score + between self and a VAMP model estimated from `test_data`. It is assumed that + self was estimated from the "training" data and `test_data` is the test data. + The score is computed for one realization of self and `test_data`. Estimation + of the average cross-validation core and partitioning of data into test and + training part is not performed by this method. + If `test_data` is None, this method computes the VAMP score for the model + contained in self. + The model that is estimated from `test_data` will inherit all hyperparameters + from self. + + score_method : str, optional, default='VAMP2' + Available scores are based on the variational approach for Markov processes [1]_: + + * 'VAMP1' Sum of singular values of the half-weighted Koopman matrix [1]_ . + If the model is reversible, this is equal to the sum of + Koopman matrix eigenvalues, also called Rayleigh quotient [1]_. + * 'VAMP2' Sum of squared singular values of the half-weighted Koopman matrix [1]_ . + If the model is reversible, this is equal to the kinetic variance [2]_ . + * 'VAMPE' Approximation error of the estimated Koopman operator with respect to + the true Koopman operator up to an additive constant [1]_ . + + Returns + ------- + If `test_data` is not None, returns the cross-validation VAMP score between + self and the model estimated from `test_data`. Otherwise return the selected + VAMP-score of self. + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + """ from pyemma._ext.sklearn.base import clone as clone_estimator est = clone_estimator(self) @@ -583,7 +805,6 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): __serialize_fields = ('nsets', 'statistics', 'observables', 'observables_mean_free', 'statistics_mean_free') """ - Parameters ---------- model : Model @@ -592,15 +813,27 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): estimator : Estimator Parametrized Estimator that has produced the model + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + statistics : np.ndarray((input_dimension, n_statistics)), optional Coefficients that express one or multiple statistics in the basis of the input features. This parameter can be None. In that case, this method returns the future expectation value of the observable(s). - observables : np.ndarray((input_dimension, n_observables)) - Coefficients that express one or multiple observables in - the basis of the input features. + observables_mean_free : bool, default=False + If true, coefficients in `observables` refer to the input + features with feature means removed. + If false, coefficients in `observables` refer to the + unmodified input features. + + statistics_mean_free : bool, default=False + If true, coefficients in `statistics` refer to the input + features with feature means removed. + If false, coefficients in `statistics` refer to the + unmodified input features. mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). @@ -611,9 +844,6 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): of these lag time multiples, e.g. the value 0 only make sense if _predict_observables(0) will work. - conf : float, default = 0.95 - confidence interval for errors - err_est : bool, default=False if the Estimator is capable of error calculation, will compute errors for each tau estimate. This option can be computationally From c0a6395d9f52720efec4ade42c7e0e2913a103f5 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 6 Feb 2018 17:47:21 +0100 Subject: [PATCH 40/43] [vamp] docstrings --- pyemma/coordinates/__init__.py | 2 + pyemma/coordinates/api.py | 186 ++++++++++---- pyemma/coordinates/transform/__init__.py | 2 + pyemma/coordinates/transform/vamp.py | 298 ++++++++++++++++------- 4 files changed, 353 insertions(+), 135 deletions(-) diff --git a/pyemma/coordinates/__init__.py b/pyemma/coordinates/__init__.py index c7b5dbfba..0b3adf034 100644 --- a/pyemma/coordinates/__init__.py +++ b/pyemma/coordinates/__init__.py @@ -51,6 +51,7 @@ pca tica + vamp **Clustering Algorithms** @@ -84,6 +85,7 @@ transform.PCA transform.TICA + transform.VAMP **Covariance estimation** diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index 56c774db2..1c07580d8 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -1260,57 +1260,143 @@ def vamp(data=None, lag=10, dim=None, scaling=None, right=True, ncov_max=float(' stride=1, skip=0, chunksize=None): r""" Variational approach for Markov processes (VAMP) [1]_. - Parameters - ---------- - data : - lag : int - lag time - dim : float or int - Number of dimensions to keep: - * if dim is not set all available ranks are kept: - n_components == min(n_samples, n_features) - * if dim is an integer >= 1, this number specifies the number - of dimensions to keep. By default this will use the kinetic - variance. - * if dim is a float with ``0 < dim < 1``, select the number - of dimensions such that the amount of kinetic variance - that needs to be explained is greater than the percentage - specified by dim. - scaling : None or string - Scaling to be applied to the VAMP modes upon transformation - * None: no scaling will be applied, variance along the mode is 1 - * 'kinetic map' or 'km': modes are scaled by singular value - right : boolean - Whether to compute the right singular functions. - If right==True, get_output() will return the right singular - functions. Otherwise, get_output() will return the left singular - functions. - Beware that only frames[tau:, :] of each trajectory returned - by get_output() contain valid values of the right singular - functions. Conversely, only frames[0:-tau, :] of each - trajectory returned by get_output() contain valid values of - the left singular functions. The remaining frames might - possibly be interpreted as some extrapolation. - epsilon : float - singular value cutoff. Singular values of C0 with norms <= epsilon - will be cut off. The remaining number of singular values define - the size of the output. - stride: int, optional, default = 1 - Use only every stride-th time step. By default, every time step is used. - skip : int, default=0 - skip the first initial n frames per trajectory. - ncov_max : int, default=infinity - limit the memory usage of the algorithm from [3]_ to an amount that corresponds - to ncov_max additional copies of each correlation matrix + Parameters + ---------- + lag : int + lag time + dim : float or int + Number of dimensions to keep: + + * if dim is not set all available ranks are kept: + `n_components == min(n_samples, n_features)` + * if dim is an integer >= 1, this number specifies the number + of dimensions to keep. By default this will use the kinetic + variance. + * if dim is a float with ``0 < dim < 1``, select the number + of dimensions such that the amount of kinetic variance + that needs to be explained is greater than the percentage + specified by dim. + scaling : None or string + Scaling to be applied to the VAMP order parameters upon transformation + + * None: no scaling will be applied, variance of the order parameters is 1 + * 'kinetic map' or 'km': order parameters are scaled by singular value + Only the left singular functions induce a kinetic map. + Therefore scaling='km' is only effective if `right` is False. + right : boolean + Whether to compute the right singular functions. + If `right==True`, `get_output()` will return the right singular + functions. Otherwise, `get_output()` will return the left singular + functions. + Beware that only `frames[tau:, :]` of each trajectory returned + by `get_output()` contain valid values of the right singular + functions. Conversely, only `frames[0:-tau, :]` of each + trajectory returned by `get_output()` contain valid values of + the left singular functions. The remaining frames might + possibly be interpreted as some extrapolation. + epsilon : float + singular value cutoff. Singular values of :math:`C0` with + norms <= epsilon will be cut off. The remaining number of + singular values define the size of the output. + stride: int, optional, default = 1 + Use only every stride-th time step. By default, every time step is used. + skip : int, default=0 + skip the first initial n frames per trajectory. + ncov_max : int, default=infinity + limit the memory usage of the algorithm from [3]_ to an amount that corresponds + to ncov_max additional copies of each correlation matrix + + Notes + ----- + VAMP is a method for dimensionality reduction of Markov processes. + + The Koopman operator :math:`\mathcal{K}` is an integral operator + that describes conditional future expectation values. Let + :math:`p(\mathbf{x},\,\mathbf{y})` be the conditional probability + density of visiting an infinitesimal phase space volume around + point :math:`\mathbf{y}` at time :math:`t+\tau` given that the phase + space point :math:`\mathbf{x}` was visited at the earlier time + :math:`t`. Then the action of the Koopman operator on a function + :math:`f` can be written as follows: + + .. math:: + + \mathcal{K}f=\int p(\mathbf{x},\,\mathbf{y})f(\mathbf{y})\,\mathrm{dy}=\mathbb{E}\left[f(\mathbf{x}_{t+\tau}\mid\mathbf{x}_{t}=\mathbf{x})\right] + + The Koopman operator is defined without any reference to an + equilibrium distribution. Therefore it is well-defined in + situations where the dynamics is irreversible or/and non-stationary + such that no equilibrium distribution exists. + + If we approximate :math:`f` by a linear superposition of ansatz + functions :math:`\boldsymbol{\chi}` of the conformational + degrees of freedom (features), the operator :math:`\mathcal{K}` + can be approximated by a (finite-dimensional) matrix :math:`\mathbf{K}`. + + The approximation is computed as follows: From the time-dependent + input features :math:`\boldsymbol{\chi}(t)`, we compute the mean + :math:`\boldsymbol{\mu}_{0}` (:math:`\boldsymbol{\mu}_{1}`) from + all data excluding the last (first) :math:`\tau` steps of every + trajectory as follows: + + .. math:: + + \boldsymbol{\mu}_{0} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\boldsymbol{\chi}(t) + \boldsymbol{\mu}_{1} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\boldsymbol{\chi}(t) + + Next, we compute the instantaneous covariance matrices + :math:`\mathbf{C}_{00}` and :math:`\mathbf{C}_{11}` and the + time-lagged covariance matrix :math:`\mathbf{C}_{01}` as follows: + + .. math:: + + \mathbf{C}_{00} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + + \mathbf{C}_{11} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + + \mathbf{C}_{01} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t+\tau)-\boldsymbol{\mu}_{1}\right] + + The Koopman matrix is then computed as follows: + + .. math:: + + \mathbf{K}=\mathbf{C}_{00}^{-1}\mathbf{C}_{01} + + It can be shown [1]_ that the leading singular functions of the + half-weighted Koopman matrix + + .. math:: - References - ---------- - .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. - arXiv:1707.04659v1 - .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. - J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 - .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for - computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. + \bar{\mathbf{K}}:=\mathbf{C}_{00}^{-\frac{1}{2}}\mathbf{C}_{01}\mathbf{C}_{11}^{-\frac{1}{2}} + + encode the best reduced dynamical model for the time series. + + The singular functions can be computed by first performing the + singular value decomposition + + .. math:: + + \bar{\mathbf{K}}=\mathbf{U}^{\prime}\mathbf{S}\mathbf{V}^{\prime} + + and then mapping the input conformation to the left singular + functions :math:`\boldsymbol{\psi}` and right singular + functions :math:`\boldsymbol{\phi}` as follows: + + .. math:: + + \boldsymbol{\phi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + + \boldsymbol{\psi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for + computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. """ from pyemma.coordinates.transform.vamp import VAMP res = VAMP(lag, dim=dim, scaling=scaling, right=right, skip=skip, ncov_max=ncov_max) diff --git a/pyemma/coordinates/transform/__init__.py b/pyemma/coordinates/transform/__init__.py index de8366d13..4949149b4 100644 --- a/pyemma/coordinates/transform/__init__.py +++ b/pyemma/coordinates/transform/__init__.py @@ -28,7 +28,9 @@ PCA - principal components TICA - time independent components + VAMP - Variational approach for Markov processes """ from .pca import * from .tica import * +from .vamp import * diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index af6eb37ce..150b3f88c 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -34,7 +34,7 @@ import warnings -__all__ = ['VAMP'] +__all__ = ['VAMP', 'VAMPModel', 'VAMPChapmanKolmogorovValidator'] class VAMPModel(Model, SerializableMixIn): @@ -362,62 +362,144 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, stride=1, skip=0, ncov_max=float('inf')): r""" Variational approach for Markov processes (VAMP) [1]_. - Parameters - ---------- - lag : int - lag time - dim : float or int - Number of dimensions to keep: - * if dim is not set all available ranks are kept: - n_components == min(n_samples, n_features) - * if dim is an integer >= 1, this number specifies the number - of dimensions to keep. By default this will use the kinetic - variance. - * if dim is a float with ``0 < dim < 1``, select the number - of dimensions such that the amount of kinetic variance - that needs to be explained is greater than the percentage - specified by dim. - scaling : None or string - Scaling to be applied to the VAMP order parameters upon transformation - * None: no scaling will be applied, variance of the order parameters is 1 - * 'kinetic map' or 'km': order parameters are scaled by singular value - Only the left singular functions induce a kinetic map. - Therefore scaling='km' is only effective if `right` is False. - right : boolean - Whether to compute the right singular functions. - If right==True, get_output() will return the right singular - functions. Otherwise, get_output() will return the left singular - functions. - Beware that only frames[tau:, :] of each trajectory returned - by get_output() contain valid values of the right singular - functions. Conversely, only frames[0:-tau, :] of each - trajectory returned by get_output() contain valid values of - the left singular functions. The remaining frames might - possibly be interpreted as some extrapolation. - epsilon : float - singular value cutoff. Singular values of C0 with norms <= epsilon - will be cut off. The remaining number of singular values define - the size of the output. - stride: int, optional, default = 1 - Use only every stride-th time step. By default, every time step is used. - skip : int, default=0 - skip the first initial n frames per trajectory. - ncov_max : int, default=infinity - limit the memory usage of the algorithm from [3]_ to an amount that corresponds - to ncov_max additional copies of each correlation matrix + Parameters + ---------- + lag : int + lag time + dim : float or int + Number of dimensions to keep: + + * if dim is not set all available ranks are kept: + `n_components == min(n_samples, n_features)` + * if dim is an integer >= 1, this number specifies the number + of dimensions to keep. By default this will use the kinetic + variance. + * if dim is a float with ``0 < dim < 1``, select the number + of dimensions such that the amount of kinetic variance + that needs to be explained is greater than the percentage + specified by dim. + scaling : None or string + Scaling to be applied to the VAMP order parameters upon transformation + + * None: no scaling will be applied, variance of the order parameters is 1 + * 'kinetic map' or 'km': order parameters are scaled by singular value + Only the left singular functions induce a kinetic map. + Therefore scaling='km' is only effective if `right` is False. + right : boolean + Whether to compute the right singular functions. + If `right==True`, `get_output()` will return the right singular + functions. Otherwise, `get_output()` will return the left singular + functions. + Beware that only `frames[tau:, :]` of each trajectory returned + by `get_output()` contain valid values of the right singular + functions. Conversely, only `frames[0:-tau, :]` of each + trajectory returned by `get_output()` contain valid values of + the left singular functions. The remaining frames might + possibly be interpreted as some extrapolation. + epsilon : float + singular value cutoff. Singular values of :math:`C0` with + norms <= epsilon will be cut off. The remaining number of + singular values define the size of the output. + stride: int, optional, default = 1 + Use only every stride-th time step. By default, every time step is used. + skip : int, default=0 + skip the first initial n frames per trajectory. + ncov_max : int, default=infinity + limit the memory usage of the algorithm from [3]_ to an amount that corresponds + to ncov_max additional copies of each correlation matrix + + Notes + ----- + VAMP is a method for dimensionality reduction of Markov processes. + + The Koopman operator :math:`\mathcal{K}` is an integral operator + that describes conditional future expectation values. Let + :math:`p(\mathbf{x},\,\mathbf{y})` be the conditional probability + density of visiting an infinitesimal phase space volume around + point :math:`\mathbf{y}` at time :math:`t+\tau` given that the phase + space point :math:`\mathbf{x}` was visited at the earlier time + :math:`t`. Then the action of the Koopman operator on a function + :math:`f` can be written as follows: + + .. math:: + + \mathcal{K}f=\int p(\mathbf{x},\,\mathbf{y})f(\mathbf{y})\,\mathrm{dy}=\mathbb{E}\left[f(\mathbf{x}_{t+\tau}\mid\mathbf{x}_{t}=\mathbf{x})\right] + + The Koopman operator is defined without any reference to an + equilibrium distribution. Therefore it is well-defined in + situations where the dynamics is irreversible or/and non-stationary + such that no equilibrium distribution exists. + + If we approximate :math:`f` by a linear superposition of ansatz + functions :math:`\boldsymbol{\chi}` of the conformational + degrees of freedom (features), the operator :math:`\mathcal{K}` + can be approximated by a (finite-dimensional) matrix :math:`\mathbf{K}`. + + The approximation is computed as follows: From the time-dependent + input features :math:`\boldsymbol{\chi}(t)`, we compute the mean + :math:`\boldsymbol{\mu}_{0}` (:math:`\boldsymbol{\mu}_{1}`) from + all data excluding the last (first) :math:`\tau` steps of every + trajectory as follows: + + .. math:: + + \boldsymbol{\mu}_{0} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\boldsymbol{\chi}(t) + \boldsymbol{\mu}_{1} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\boldsymbol{\chi}(t) + + Next, we compute the instantaneous covariance matrices + :math:`\mathbf{C}_{00}` and :math:`\mathbf{C}_{11}` and the + time-lagged covariance matrix :math:`\mathbf{C}_{01}` as follows: + + .. math:: + + \mathbf{C}_{00} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + + \mathbf{C}_{11} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right]\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + + \mathbf{C}_{01} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right]\left[\boldsymbol{\chi}(t+\tau)-\boldsymbol{\mu}_{1}\right] + + The Koopman matrix is then computed as follows: + + .. math:: + + \mathbf{K}=\mathbf{C}_{00}^{-1}\mathbf{C}_{01} + + It can be shown [1]_ that the leading singular functions of the + half-weighted Koopman matrix + + .. math:: + + \bar{\mathbf{K}}:=\mathbf{C}_{00}^{-\frac{1}{2}}\mathbf{C}_{01}\mathbf{C}_{11}^{-\frac{1}{2}} + + encode the best reduced dynamical model for the time series. + + The singular functions can be computed by first performing the + singular value decomposition - Notes - ----- + .. math:: - References - ---------- - .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. - arXiv:1707.04659v1 - .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. - J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 - .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for - computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. - """ + \bar{\mathbf{K}}=\mathbf{U}^{\prime}\mathbf{S}\mathbf{V}^{\prime} + + and then mapping the input conformation to the left singular + functions :math:`\boldsymbol{\psi}` and right singular + functions :math:`\boldsymbol{\phi}` as follows: + + .. math:: + + \boldsymbol{\phi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + + \boldsymbol{\psi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + + + References + ---------- + .. [1] Wu, H. and Noe, F. 2017. Variational approach for learning Markov processes from time series data. + arXiv:1707.04659v1 + .. [2] Noe, F. and Clementi, C. 2015. Kinetic distance and kinetic maps from molecular dynamics simulation. + J. Chem. Theory. Comput. doi:10.1021/acs.jctc.5b00553 + .. [3] Chan, T. F., Golub G. H., LeVeque R. J. 1979. Updating formulae and pairwiese algorithms for + computing sample variances. Technical Report STAN-CS-79-773, Department of Computer Science, Stanford University. + """ StreamingEstimationTransformer.__init__(self) # empty dummy model instance @@ -676,9 +758,40 @@ def expectation(self, observables, statistics, lag_multiple=1, observables_mean_ statistics_mean_free=statistics_mean_free, observables_mean_free=observables_mean_free) - def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags=10, n_jobs=1, show_progress=True, + def cktest(self, n_observables=None, observables='phi', statistics='psi', mlags=10, n_jobs=1, show_progress=True, iterable=None): - """Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times. + r"""Do the Chapman-Kolmogorov test by computing predictions for higher lag times and by performing estimations at higher lag times. + + Notes + ----- + + This method computes two sets of time-lagged covariance matrices + + * estimates at higher lag times : + + .. math:: + + \left\langle \mathbf{K}(n\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} + + where :math:`\rho_{0}` is the empirical distribution implicitly defined + by all data points from time steps 0 to T-tau in all trajectories, + :math:`\mathbf{K}(n\tau)` is a rank-reduced Koopman matrix estimated + at the lag-time n*tau and g and f are some functions of the data. + Rank-reduction of the Koopman matrix is controlled by the `dim` + parameter of :func:`vamp `. + + * predictions at higher lag times : + + .. math:: + + \left\langle \mathbf{K}^{n}(\tau)g_{i},f_{j}\right\rangle_{\rho_{0}} + + where :math:`\mathbf{K}^{n}` is the n'th power of the rank-reduced + Koopman matrix contained in self. + + + The Champan-Kolmogorov test is to compare the predictions to the + estimates. Parameters ---------- @@ -687,25 +800,33 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= to this number. Only used if `observables` are None or `statistics` are None. - observables : np.ndarray((input_dimension, n_observables)) or 'psi' - Coefficients that express one or multiple observables in - the basis of the input features. - This parameter can be 'psi'. In that case, this the dominant + observables : np.ndarray((input_dimension, n_observables)) or 'phi' + Coefficients that express one or multiple observables :math:`g` + in the basis of the input features. + This parameter can be 'phi'. In that case, the dominant right singular functions of the Koopman operator estimated - at the smallest lag time are used as observables. + at the smallest lag time are used as default observables. - statistics : np.ndarray((input_dimension, n_statistics)) or 'phi' - Coefficients that express one or multiple statistics in - the basis of the input features. - This parameter can be 'phi'. In that case, this the dominant + statistics : np.ndarray((input_dimension, n_statistics)) or 'psi' + Coefficients that express one or multiple statistics :math:`f` + in the basis of the input features. + This parameter can be 'psi'. In that case, the dominant left singular functions of the Koopman operator estimated - at the smallest lag time are used as statistics. + at the smallest lag time are used as default statistics. - mlags : int, default=10 + mlags : int or int-array, default=10 + multiples of lag times for testing the Model, e.g. range(10). + A single int will trigger a range, i.e. mlags=10 maps to + mlags=range(10). + Note that you need to be able to do a model prediction for each + of these lag time multiples, e.g. the value 0 only make sense + if model.expectation(lag_multiple=0) will work. n_jobs : int, default=1 + how many jobs to use during calculation show_progress : bool, default=True + Show progressbars for calculation? iterable : any data format that `pyemma.coordinates.vamp()` accepts as input, optional It `iterable` is None, the same data source with which VAMP @@ -715,7 +836,9 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= Returns ------- - + vckv : :class:`VAMPChapmanKolmogorovValidator ` + Contains the estimated and the predicted covarince matrices. + The object can be plotted with :func:`plot_cktest ` with the option `y01=False`. """ if n_observables is not None: if n_observables > self.dimension(): @@ -725,14 +848,14 @@ def cktest(self, n_observables=None, observables='psi', statistics='phi', mlags= else: n_observables = self.dimension() - if isinstance(observables, str) and observables == 'psi': + if isinstance(observables, str) and observables == 'phi': observables = self.singular_vectors_right[:, 0:n_observables] observables_mean_free = True else: ensure_ndarray(observables, ndim=2) observables_mean_free = False - if isinstance(statistics, str) and statistics == 'phi': + if isinstance(statistics, str) and statistics == 'psi': statistics = self.singular_vectors_left[:, 0:n_observables] statistics_mean_free = True else: @@ -805,23 +928,29 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): __serialize_fields = ('nsets', 'statistics', 'observables', 'observables_mean_free', 'statistics_mean_free') """ + Note + ---- + It is recommended that you create this object by calling the + `cktest` method of a VAMP object created with + :func:`vamp `. + Parameters ---------- model : Model - Model to be tested + Model with the smallest lag time. Is used to make predictions + for larger lag times. estimator : Estimator - Parametrized Estimator that has produced the model + Parametrized Estimator that has produced the model. + Is used as a prototype for estimating models at higher lag times. observables : np.ndarray((input_dimension, n_observables)) Coefficients that express one or multiple observables in the basis of the input features. - statistics : np.ndarray((input_dimension, n_statistics)), optional + statistics : np.ndarray((input_dimension, n_statistics)) Coefficients that express one or multiple statistics in the basis of the input features. - This parameter can be None. In that case, this method - returns the future expectation value of the observable(s). observables_mean_free : bool, default=False If true, coefficients in `observables` refer to the input @@ -838,22 +967,21 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): mlags : int or int-array, default=10 multiples of lag times for testing the Model, e.g. range(10). A single int will trigger a range, i.e. mlags=10 maps to - mlags=range(10). The setting None will choose mlags automatically - according to the longest available trajectory + mlags=range(10). Note that you need to be able to do a model prediction for each of these lag time multiples, e.g. the value 0 only make sense - if _predict_observables(0) will work. - - err_est : bool, default=False - if the Estimator is capable of error calculation, will compute - errors for each tau estimate. This option can be computationally - expensive. + if model.expectation(lag_multiple=0) will work. - n_jobs : int, default=1 + n_jobs : int, default=1 how many jobs to use during calculation show_progress : bool, default=True Show progressbars for calculation? + + Notes + ----- + The object can be plotted with :func:`plot_cktest ` + with the option `y01=False`. """ def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, mlags=10, n_jobs=1, show_progress=True): From 92d59d77dcc5a20f81539b62a74f6c19c14f7ab6 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 6 Feb 2018 18:06:32 +0100 Subject: [PATCH 41/43] [vamp] corrected docstrings --- pyemma/coordinates/transform/__init__.py | 2 + pyemma/coordinates/transform/vamp.py | 147 ++++++++++++----------- 2 files changed, 79 insertions(+), 70 deletions(-) diff --git a/pyemma/coordinates/transform/__init__.py b/pyemma/coordinates/transform/__init__.py index 4949149b4..b7f976ceb 100644 --- a/pyemma/coordinates/transform/__init__.py +++ b/pyemma/coordinates/transform/__init__.py @@ -29,6 +29,8 @@ PCA - principal components TICA - time independent components VAMP - Variational approach for Markov processes + VAMPModel - Kinetic model form the Variational approach for Markov processes + VAMPChapmanKolmogorovValidator - Chapman Kolmogorov test for the Variational approach for Markov processes """ from .pca import * diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 150b3f88c..aed7fede6 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -57,14 +57,14 @@ def set_model_params(self, mean_0, mean_t, C00, Ctt, C0t, U, V, singular_values, @property def U(self): - "Tranformation matrix that represents the linear map from feature space to the space of left singular functions." + "Tranformation matrix that represents the linear map from mean-free feature space to the space of left singular functions." if not self._svd_performed: self._diagonalize() return self._U @property def V(self): - "Tranformation matrix that represents the linear map from feature space to the space of right singular functions." + "Tranformation matrix that represents the linear map from mean-free feature space to the space of right singular functions." if not self._svd_performed: self._diagonalize() return self._V @@ -292,12 +292,14 @@ def score(self, test_model=None, score_method='VAMP2'): Parameters ---------- test_model : VAMPModel, optional, default=None + If `test_model` is not None, this method computes the cross-validation score between self and `test_model`. It is assumed that self was estimated from the "training" data and `test_model` was estimated from the "test" data. The score is computed for one realization of self and `test_model`. Estimation - of the average cross-validation core and partitioning of data into test and + of the average cross-validation score and partitioning of data into test and training part is not performed by this method. + If `test_model` is None, this method computes the VAMP score for the model contained in self. @@ -314,8 +316,9 @@ def score(self, test_model=None, score_method='VAMP2'): Returns ------- - If `test_model` is not None, returns the cross-validation VAMP score between - self and `test_model`. Otherwise return the selected VAMP-score of self. + score : float + If `test_model` is not None, returns the cross-validation VAMP score between + self and `test_model`. Otherwise return the selected VAMP-score of self. References ---------- @@ -628,8 +631,8 @@ def singular_vectors_right(self): Returns ------- vectors: 2-D ndarray - Coefficients that express the right singular functions in the - basis of mean-free input features. + Coefficients that express the right singular functions in the + basis of mean-free input features. References ---------- @@ -649,8 +652,8 @@ def singular_vectors_left(self): Returns ------- vectors: 2-D ndarray - Coefficients that express the left singular functions in the - basis of mean-free input features. + Coefficients that express the left singular functions in the + basis of mean-free input features. References ---------- @@ -836,7 +839,7 @@ def cktest(self, n_observables=None, observables='phi', statistics='psi', mlags= Returns ------- - vckv : :class:`VAMPChapmanKolmogorovValidator ` + vckv : :class:`VAMPChapmanKolmogorovValidator ` Contains the estimated and the predicted covarince matrices. The object can be plotted with :func:`plot_cktest ` with the option `y01=False`. """ @@ -878,14 +881,17 @@ def score(self, test_data=None, score_method='VAMP2'): Parameters ---------- test_data : any data format that `pyemma.coordinates.vamp()` accepts as input + If `test_data` is not None, this method computes the cross-validation score between self and a VAMP model estimated from `test_data`. It is assumed that self was estimated from the "training" data and `test_data` is the test data. The score is computed for one realization of self and `test_data`. Estimation - of the average cross-validation core and partitioning of data into test and + of the average cross-validation score and partitioning of data into test and training part is not performed by this method. + If `test_data` is None, this method computes the VAMP score for the model contained in self. + The model that is estimated from `test_data` will inherit all hyperparameters from self. @@ -902,9 +908,10 @@ def score(self, test_data=None, score_method='VAMP2'): Returns ------- - If `test_data` is not None, returns the cross-validation VAMP score between - self and the model estimated from `test_data`. Otherwise return the selected - VAMP-score of self. + score : float + If `test_data` is not None, returns the cross-validation VAMP score between + self and the model estimated from `test_data`. Otherwise return the selected + VAMP-score of self. References ---------- @@ -927,64 +934,64 @@ class VAMPChapmanKolmogorovValidator(LaggedModelValidator): __serialize_version = 0 __serialize_fields = ('nsets', 'statistics', 'observables', 'observables_mean_free', 'statistics_mean_free') - """ - Note - ---- - It is recommended that you create this object by calling the - `cktest` method of a VAMP object created with - :func:`vamp `. - - Parameters - ---------- - model : Model - Model with the smallest lag time. Is used to make predictions - for larger lag times. - - estimator : Estimator - Parametrized Estimator that has produced the model. - Is used as a prototype for estimating models at higher lag times. - - observables : np.ndarray((input_dimension, n_observables)) - Coefficients that express one or multiple observables in - the basis of the input features. - - statistics : np.ndarray((input_dimension, n_statistics)) - Coefficients that express one or multiple statistics in - the basis of the input features. - - observables_mean_free : bool, default=False - If true, coefficients in `observables` refer to the input - features with feature means removed. - If false, coefficients in `observables` refer to the - unmodified input features. - - statistics_mean_free : bool, default=False - If true, coefficients in `statistics` refer to the input - features with feature means removed. - If false, coefficients in `statistics` refer to the - unmodified input features. - - mlags : int or int-array, default=10 - multiples of lag times for testing the Model, e.g. range(10). - A single int will trigger a range, i.e. mlags=10 maps to - mlags=range(10). - Note that you need to be able to do a model prediction for each - of these lag time multiples, e.g. the value 0 only make sense - if model.expectation(lag_multiple=0) will work. - - n_jobs : int, default=1 - how many jobs to use during calculation - - show_progress : bool, default=True - Show progressbars for calculation? - - Notes - ----- - The object can be plotted with :func:`plot_cktest ` - with the option `y01=False`. - """ def __init__(self, model, estimator, observables, statistics, observables_mean_free, statistics_mean_free, mlags=10, n_jobs=1, show_progress=True): + r""" + Note + ---- + It is recommended that you create this object by calling the + `cktest` method of a VAMP object created with + :func:`vamp `. + + Parameters + ---------- + model : Model + Model with the smallest lag time. Is used to make predictions + for larger lag times. + + estimator : Estimator + Parametrized Estimator that has produced the model. + Is used as a prototype for estimating models at higher lag times. + + observables : np.ndarray((input_dimension, n_observables)) + Coefficients that express one or multiple observables in + the basis of the input features. + + statistics : np.ndarray((input_dimension, n_statistics)) + Coefficients that express one or multiple statistics in + the basis of the input features. + + observables_mean_free : bool, default=False + If true, coefficients in `observables` refer to the input + features with feature means removed. + If false, coefficients in `observables` refer to the + unmodified input features. + + statistics_mean_free : bool, default=False + If true, coefficients in `statistics` refer to the input + features with feature means removed. + If false, coefficients in `statistics` refer to the + unmodified input features. + + mlags : int or int-array, default=10 + multiples of lag times for testing the Model, e.g. range(10). + A single int will trigger a range, i.e. mlags=10 maps to + mlags=range(10). + Note that you need to be able to do a model prediction for each + of these lag time multiples, e.g. the value 0 only make sense + if model.expectation(lag_multiple=0) will work. + + n_jobs : int, default=1 + how many jobs to use during calculation + + show_progress : bool, default=True + Show progressbars for calculation? + + Notes + ----- + The object can be plotted with :func:`plot_cktest ` + with the option `y01=False`. + """ LaggedModelValidator.__init__(self, model, estimator, mlags=mlags, n_jobs=n_jobs, show_progress=show_progress) self.statistics = statistics From dd11151546fa2e279dc48e8ac646b572f70cf538 Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Tue, 6 Feb 2018 18:13:54 +0100 Subject: [PATCH 42/43] [vamp] fix equations in docstring --- pyemma/coordinates/api.py | 5 +++-- pyemma/coordinates/transform/vamp.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyemma/coordinates/api.py b/pyemma/coordinates/api.py index 1c07580d8..75ad04717 100644 --- a/pyemma/coordinates/api.py +++ b/pyemma/coordinates/api.py @@ -1342,6 +1342,7 @@ def vamp(data=None, lag=10, dim=None, scaling=None, right=True, ncov_max=float(' .. math:: \boldsymbol{\mu}_{0} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\boldsymbol{\chi}(t) + \boldsymbol{\mu}_{1} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\boldsymbol{\chi}(t) Next, we compute the instantaneous covariance matrices @@ -1384,9 +1385,9 @@ def vamp(data=None, lag=10, dim=None, scaling=None, right=True, ncov_max=float(' .. math:: - \boldsymbol{\phi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + \boldsymbol{\psi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] - \boldsymbol{\psi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + \boldsymbol{\phi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] References diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index aed7fede6..749a78ae7 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -447,6 +447,7 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, .. math:: \boldsymbol{\mu}_{0} :=\frac{1}{T-\tau}\sum_{t=0}^{T-\tau}\boldsymbol{\chi}(t) + \boldsymbol{\mu}_{1} :=\frac{1}{T-\tau}\sum_{t=\tau}^{T}\boldsymbol{\chi}(t) Next, we compute the instantaneous covariance matrices @@ -489,9 +490,9 @@ def __init__(self, lag, dim=None, scaling=None, right=True, epsilon=1e-6, .. math:: - \boldsymbol{\phi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] + \boldsymbol{\psi}(t):=\mathbf{U}^{\prime\top}\mathbf{C}_{00}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{0}\right] - \boldsymbol{\psi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] + \boldsymbol{\phi}(t):=\mathbf{V}^{\prime\top}\mathbf{C}_{11}^{-\frac{1}{2}}\left[\boldsymbol{\chi}(t)-\boldsymbol{\mu}_{1}\right] References From 2fb9d9a43af5b9a4f8af9cdc32f9f5566b55801d Mon Sep 17 00:00:00 2001 From: "Paul, Fabian (fapa)" Date: Wed, 7 Feb 2018 15:21:50 +0100 Subject: [PATCH 43/43] [vamp] vamp.dimension depends on rank of C00 and C11 --- pyemma/_ext/variational/solvers/direct.py | 12 ++++++++---- pyemma/coordinates/transform/vamp.py | 20 ++++++++++---------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pyemma/_ext/variational/solvers/direct.py b/pyemma/_ext/variational/solvers/direct.py index db442aedf..d3c2be57b 100644 --- a/pyemma/_ext/variational/solvers/direct.py +++ b/pyemma/_ext/variational/solvers/direct.py @@ -125,7 +125,7 @@ def spd_inv(W, epsilon=1e-10, method='QR', canonical_signs=False): return Winv -def spd_inv_sqrt(W, epsilon=1e-10, method='QR', canonical_signs=False): +def spd_inv_sqrt(W, epsilon=1e-10, method='QR', canonical_signs=False, return_rank=False): """ Computes :math:`W^{-1/2}` of symmetric positive-definite matrix :math:`W`. @@ -153,14 +153,18 @@ def spd_inv_sqrt(W, epsilon=1e-10, method='QR', canonical_signs=False): Matrix :math:`L` from the decomposition :math:`W^{-1} = L L^T`. """ - if (_np.shape(W)[0] == 1): - Winv = 1./_np.sqrt(W[0,0]) + if _np.shape(W)[0] == 1: + Winv = 1./_np.sqrt(W[0, 0]) + sm = _np.ones(1) else: sm, Vm = spd_eig(W, epsilon=epsilon, method=method, canonical_signs=canonical_signs) Winv = _np.dot(Vm, _np.diag(1.0 / _np.sqrt(sm))).dot(Vm.T) # return split - return Winv + if return_rank: + return Winv, sm.shape[0] + else: + return Winv def spd_inv_split(W, epsilon=1e-10, method='QR', canonical_signs=False): diff --git a/pyemma/coordinates/transform/vamp.py b/pyemma/coordinates/transform/vamp.py index 749a78ae7..981e1080b 100644 --- a/pyemma/coordinates/transform/vamp.py +++ b/pyemma/coordinates/transform/vamp.py @@ -106,11 +106,11 @@ def Ctt(self, val): def dimension(self): """ output dimension """ if self.dim is None or (isinstance(self.dim, float) and self.dim == 1.0): - if hasattr(self, '_singular_values') and self._singular_values is not None: - return np.count_nonzero(self._singular_values > self.epsilon) + if hasattr(self, '_rank0'): + return min(self._rank0, self._rankt) else: - raise RuntimeError('Requested dimension, but the dimension depends on the singular values and the ' - 'transformer has not yet been estimated. Call estimate() before.') + raise RuntimeError('Requested dimension, but the dimension depends on the singular values of C00 and C11' + ' and the transformer has not yet been estimated. Call estimate() before.') if isinstance(self.dim, float): if hasattr(self, 'cumvar') and self.cumvar is not None: return np.count_nonzero(self.cumvar >= self.dim) @@ -118,12 +118,12 @@ def dimension(self): raise RuntimeError('Requested dimension, but the dimension depends on the cumulative variance and the ' 'transformer has not yet been estimated. Call estimate() before.') else: - if hasattr(self, '_singular_values') and self._singular_values is not None: - return min(np.min(np.count_nonzero(self._singular_values > self.epsilon)), self.dim) + if hasattr(self, '_rank0'): + return np.min([self._rank0, self._rankt, self.dim]) else: warnings.warn( - RuntimeWarning('Requested dimension, but the dimension depends on the singular values and the ' - 'transformer has not yet been estimated. Result is only an approximation.')) + RuntimeWarning('Requested dimension, but the dimension depends on the singular values of C00 and C11' + ' and the transformer has not yet been estimated. Result is only an approximation.')) return self.dim def expectation(self, observables, statistics, lag_multiple=1, observables_mean_free=False, statistics_mean_free=False): @@ -254,8 +254,8 @@ def _diagonalize(self, scaling=None): induce a kinetic map. """ - L0 = spd_inv_sqrt(self.C00) - Lt = spd_inv_sqrt(self.Ctt) + L0, self._rank0 = spd_inv_sqrt(self.C00, epsilon=self.epsilon, return_rank=True) + Lt, self._rankt = spd_inv_sqrt(self.Ctt, epsilon=self.epsilon, return_rank=True) A = L0.T.dot(self.C0t).dot(Lt) Uprime, s, Vprimeh = np.linalg.svd(A, compute_uv=True)