Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Remove bhmm as dependency #1550

Merged
merged 13 commits into from
Mar 17, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,5 @@ Or start a discussion on our mailing list: pyemma-users@lists.fu-berlin.de
External Libraries
------------------
* mdtraj (LGPLv3): https://mdtraj.org
* bhmm (LGPLv3): http://github.com/bhmm/bhmm
* msmtools (LGLPv3): http://github.com/markovmodel/msmtools
* thermotools (LGLPv3): http://github.com/markovmodel/thermotools
1 change: 0 additions & 1 deletion devtools/conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ requirements:
- deeptime >=0.3.0

run:
- bhmm >=0.6.3
- decorator >=4.0.0
- h5py
- intel-openmp # [osx]
Expand Down
1 change: 1 addition & 0 deletions pyemma/_base/serialization/pickle_extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class HDF5PersistentUnpickler(Unpickler):
'numpy',
'scipy',
'bhmm',
'deeptime'
)

def __init__(self, group, file):
Expand Down
2 changes: 1 addition & 1 deletion pyemma/coordinates/clustering/tests/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def make_blobs(n_samples=100, n_features=2, centers=3, cluster_std=1.0,
centers = generator.uniform(center_box[0], center_box[1],
size=(centers, n_features))
else:
from bhmm._external.sklearn.utils import check_array
from sklearn.utils import check_array
centers = check_array(centers)
n_features = centers.shape[1]

Expand Down
50 changes: 33 additions & 17 deletions pyemma/msm/estimators/bayesian_hmsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


import numpy as _np

from deeptime.markov._util import lag_observations
clonker marked this conversation as resolved.
Show resolved Hide resolved

from pyemma._base.progress import ProgressReporterMixin
from pyemma.msm.estimators.maximum_likelihood_hmsm import MaximumLikelihoodHMSM as _MaximumLikelihoodHMSM
Expand All @@ -27,7 +27,6 @@
from pyemma.util.annotators import fix_docs
from pyemma.util.types import ensure_dtraj_list
from pyemma.util.units import TimeUnit
from bhmm import lag_observations as _lag_observations
from msmtools.estimation import number_of_states as _number_of_states

__author__ = 'noe'
Expand Down Expand Up @@ -220,7 +219,7 @@ def _estimate(self, dtrajs):

# if stride is different to init_hmsm, check if microstates in lagged-strided trajs are compatible
if self.stride != self.init_hmsm.stride:
dtrajs_lagged_strided = _lag_observations(dtrajs, self.lag, stride=self.stride)
dtrajs_lagged_strided = lag_observations(dtrajs, self.lag, stride=self.stride)
_nstates_obs = _number_of_states(dtrajs_lagged_strided, only_used=True)
_nstates_obs_full = _number_of_states(dtrajs)

Expand Down Expand Up @@ -279,29 +278,46 @@ def _estimate(self, dtrajs):
if self.show_progress:
self._progress_register(self.nsamples, description='Sampling HMSMs', stage=0)

def call_back():
self._progress_update(1, stage=0)
from deeptime.util.callbacks import ProgressCallback
outer_self = self

class BHMMCallback(ProgressCallback):

def __call__(self, inc=1, *args, **kw):
super().__call__(inc, *args, **kw)
outer_self._progress_update(1, stage=0)

progress = BHMMCallback
else:
call_back = None
progress = None

from bhmm import discrete_hmm, bayesian_hmm
from deeptime.markov.hmm import BayesianHMM

if self.init_hmsm is not None:
hmm_mle = self.init_hmsm.hmm
estimator = BayesianHMM(hmm_mle, n_samples=self.nsamples, stride=self.stride,
initial_distribution_prior=self.p0_prior,
transition_matrix_prior=self.transition_matrix_prior,
store_hidden=self.store_hidden, reversible=self.reversible,
stationary=self.stationary)
else:
hmm_mle = discrete_hmm(self.initial_distribution, self.transition_matrix, B_init)

sampled_hmm = bayesian_hmm(self.discrete_trajectories_lagged, hmm_mle, nsample=self.nsamples,
reversible=self.reversible, stationary=self.stationary,
p0_prior=self.p0_prior, transition_matrix_prior=self.transition_matrix_prior,
store_hidden=self.store_hidden, call_back=call_back)

estimator = BayesianHMM.default(dtrajs, n_hidden_states=self.nstates, lagtime=self.lag,
n_samples=self.nsamples, stride=self.stride,
initial_distribution_prior=self.p0_prior,
transition_matrix_prior=self.transition_matrix_prior,
store_hidden=self.store_hidden, reversible=self.reversible,
stationary=self.stationary,
prior_submodel=True, separate=self.separate)

estimator.fit(dtrajs, n_burn_in=0, n_thin=1, progress=progress)
model = estimator.fetch_model()
if self.show_progress:
self._progress_force_finish(stage=0)

# Samples
sample_inp = [(m.transition_matrix, m.stationary_distribution, m.output_probabilities)
for m in sampled_hmm.sampled_hmms]
sample_inp = [(m.transition_model.transition_matrix, m.transition_model.stationary_distribution,
m.output_probabilities)
for m in model.samples]

samples = []
for P, pi, pobs in sample_inp: # restrict to observable set if necessary
Expand All @@ -310,7 +326,7 @@ def call_back():
samples.append(_HMSM(P, pobs, pi=pi, dt_model=self.dt_model))

# store results
self.sampled_trajs = [sampled_hmm.sampled_hmms[i].hidden_state_trajectories for i in range(self.nsamples)]
self.sampled_trajs = [model.samples[i].hidden_state_trajectories for i in range(self.nsamples)]
self.update_model_params(samples=samples)

# deal with connectivity
Expand Down
108 changes: 56 additions & 52 deletions pyemma/msm/estimators/maximum_likelihood_hmsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from deeptime.markov import TransitionCountModel
from deeptime.markov.msm import MarkovStateModel

from pyemma.util.annotators import alias, aliased, fix_docs

Expand Down Expand Up @@ -159,7 +161,6 @@ def dt_traj(self, value):

#TODO: store_data is mentioned but not implemented or used!
def _estimate(self, dtrajs):
import bhmm
# ensure right format
dtrajs = _types.ensure_dtraj_list(dtrajs)

Expand Down Expand Up @@ -195,7 +196,8 @@ def _estimate(self, dtrajs):
self.stride = int(min(self.lag, 2*corrtime))

# LAG AND STRIDE DATA
dtrajs_lagged_strided = bhmm.lag_observations(dtrajs, self.lag, stride=self.stride)
from deeptime.markov import compute_dtrajs_effective
dtrajs_lagged_strided = compute_dtrajs_effective(dtrajs, self.lag, n_states=-1, stride=self.stride)
clonker marked this conversation as resolved.
Show resolved Hide resolved

# OBSERVATION SET
if self.observe_nonempty:
Expand All @@ -204,54 +206,56 @@ def _estimate(self, dtrajs):
observe_subset = None

# INIT HMM
from bhmm import init_discrete_hmm
from deeptime.markov.hmm import init
from pyemma.msm.estimators import MaximumLikelihoodMSM
from pyemma.msm.estimators import OOMReweightedMSM
if self.msm_init=='largest-strong':
hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
reversible=self.reversible, stationary=True, regularize=True,
method='lcs-spectral', separate=self.separate)
elif self.msm_init=='all':
hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
reversible=self.reversible, stationary=True, regularize=True,
method='spectral', separate=self.separate)
elif isinstance(self.msm_init, (MaximumLikelihoodMSM, OOMReweightedMSM)): # initial MSM given.
from bhmm.init.discrete import init_discrete_hmm_spectral
p0, P0, pobs0 = init_discrete_hmm_spectral(self.msm_init.count_matrix_full, self.nstates,
reversible=self.reversible, stationary=True,
active_set=self.msm_init.active_set,
P=self.msm_init.transition_matrix, separate=self.separate)
hmm_init = bhmm.discrete_hmm(p0, P0, pobs0)
observe_subset = self.msm_init.active_set # override observe_subset.
else:
raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init))

# ---------------------------------------------------------------------------------------
# Estimate discrete HMM
# ---------------------------------------------------------------------------------------

# run EM
from bhmm.estimators.maximum_likelihood import MaximumLikelihoodEstimator as _MaximumLikelihoodEstimator
hmm_est = _MaximumLikelihoodEstimator(dtrajs_lagged_strided, self.nstates, initial_model=hmm_init,
output='discrete', reversible=self.reversible, stationary=self.stationary,
accuracy=self.accuracy, maxit=self.maxit)
# run
hmm_est.fit()
# package in discrete HMM
self.hmm = bhmm.DiscreteHMM(hmm_est.hmm)
from threadpoolctl import threadpool_limits
with threadpool_limits(limits=1):
if self.msm_init == 'largest-strong':
hmm_init = init.discrete.metastable_from_data(dtrajs, n_hidden_states=self.nstates, lagtime=self.lag,
stride=self.stride, mode='largest-regularized',
reversible=self.reversible, stationary=True,
clonker marked this conversation as resolved.
Show resolved Hide resolved
separate_symbols=self.separate)
elif self.msm_init == 'all':
hmm_init = init.discrete.metastable_from_data(dtrajs, n_hidden_states=self.nstates, lagtime=self.lag,
stride=self.stride, reversible=self.reversible,
stationary=True, separate_symbols=self.separate,
mode='all-regularized')
elif isinstance(self.msm_init, (MaximumLikelihoodMSM, OOMReweightedMSM)): # initial MSM given.
msm = MarkovStateModel(transition_matrix=self.msm_init.P,
count_model=TransitionCountModel(self.msm_init.count_matrix_active))
hmm_init = init.discrete.metastable_from_msm(msm, n_hidden_states=self.nstates,
reversible=self.reversible,
stationary=True, separate_symbols=self.separate)
observe_subset = self.msm_init.active_set # override observe_subset.
else:
raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init))

# ---------------------------------------------------------------------------------------
# Estimate discrete HMM
# ---------------------------------------------------------------------------------------

# run EM
from deeptime.markov.hmm import MaximumLikelihoodHMM
hmm_est = MaximumLikelihoodHMM(hmm_init, lagtime=self.lag, stride=self.stride, reversible=self.reversible,
stationary=self.stationary, accuracy=self.accuracy, maxit=self.maxit)
# run
hmm_est.fit(dtrajs)
# package in discrete HMM
self.hmm = hmm_est.fetch_model()

# get model parameters
self.initial_distribution = self.hmm.initial_distribution
transition_matrix = self.hmm.transition_matrix
transition_matrix = self.hmm.transition_model.transition_matrix
observation_probabilities = self.hmm.output_probabilities

# get estimation parameters
self.likelihoods = hmm_est.likelihoods # Likelihood history
self.likelihoods = self.hmm.likelihoods # Likelihood history
self.likelihood = self.likelihoods[-1]
self.hidden_state_probabilities = hmm_est.hidden_state_probabilities # gamma variables
self.hidden_state_trajectories = hmm_est.hmm.hidden_state_trajectories # Viterbi path
self.count_matrix = hmm_est.count_matrix # hidden count matrix
self.initial_count = hmm_est.initial_count # hidden init count
self.hidden_state_probabilities = self.hmm.state_probabilities # gamma variables
self.hidden_state_trajectories = self.hmm.hidden_state_trajectories # Viterbi path
self.count_matrix = self.hmm.count_model.count_matrix # hidden count matrix
self.initial_count = self.hmm.initial_count # hidden init count
self._active_set = _np.arange(self.nstates)

# TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards?
Expand Down Expand Up @@ -409,16 +413,15 @@ def submodel(self, states=None, obs=None, mincount_connectivity='1/n', inplace=F
mincount_connectivity = 1.0/float(self.nstates)

# handle new connectivity
from bhmm.estimators import _tmatrix_disconnected
S = _tmatrix_disconnected.connected_sets(self.count_matrix,
mincount_connectivity=mincount_connectivity,
strong=True)
cm = TransitionCountModel(self.count_matrix)
S = cm.connected_sets(connectivity_threshold=mincount_connectivity, directed=True)

if inplace:
submodel_estimator = self
else:
from copy import deepcopy
submodel_estimator = deepcopy(self)

from deeptime.markov._transition_matrix import stationary_distribution
if len(S) > 1:
# keep only non-negligible transitions
C = _np.zeros(self.count_matrix.shape)
Expand All @@ -427,20 +430,21 @@ def submodel(self, states=None, obs=None, mincount_connectivity='1/n', inplace=F
for s in S: # keep all (also small) transition counts within strongly connected subsets
C[_np.ix_(s, s)] = self.count_matrix[_np.ix_(s, s)]
# re-estimate transition matrix with disc.
P = _tmatrix_disconnected.estimate_P(C, reversible=self.reversible, mincount_connectivity=0)
pi = _tmatrix_disconnected.stationary_distribution(P, C)
from deeptime.markov.msm import MaximumLikelihoodMSM
msmest = MaximumLikelihoodMSM(allow_disconnected=True, reversible=self.reversible, connectivity_threshold=0)
msm = msmest.fit_fetch(C)
P = msm.transition_matrix
pi = stationary_distribution(P, C, mincount_connectivity=0)
else:
C = self.count_matrix
P = self.transition_matrix
pi = self.stationary_distribution

# determine substates
if isinstance(states, str):
from bhmm.estimators import _tmatrix_disconnected
strong = 'strong' in states
largest = 'largest' in states
S = _tmatrix_disconnected.connected_sets(self.count_matrix, mincount_connectivity=mincount_connectivity,
strong=strong)
S = cm.connected_sets(connectivity_threshold=mincount_connectivity, directed=strong)
if largest:
score = [len(s) for s in S]
else:
Expand All @@ -451,7 +455,7 @@ def submodel(self, states=None, obs=None, mincount_connectivity='1/n', inplace=F
C = C[_np.ix_(states, states)].copy()
P = P[_np.ix_(states, states)].copy()
P /= P.sum(axis=1)[:, None]
pi = _tmatrix_disconnected.stationary_distribution(P, C)
pi = stationary_distribution(P, C)
submodel_estimator.initial_count = self.initial_count[states]
submodel_estimator.initial_distribution = self.initial_distribution[states] / self.initial_distribution[states].sum()

Expand Down
13 changes: 3 additions & 10 deletions pyemma/msm/models/hmsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,18 @@

@fix_docs
class HMSM(_MSM):
r""" Hidden Markov model on discrete states.

Parameters
----------
hmm : :class:`DiscreteHMM <bhmm.DiscreteHMM>`
Hidden Markov Model

"""
r""" Hidden Markov model on discrete states. """
__serialize_version = 0

def __init__(self, P, pobs, pi=None, dt_model='1 step'):
"""

Parameters
----------
Pcoarse : ndarray (m,m)
P : ndarray (m,m)
coarse-grained or hidden transition matrix

Pobs : ndarray (m,n)
pobs : ndarray (m,n)
observation probability matrix from hidden to observable discrete states

dt_model : str, optional, default='1 step'
Expand Down
5 changes: 1 addition & 4 deletions pyemma/msm/tests/test_bayesian_hmsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,6 @@ def setUpClass(cls):
import pyemma.datasets
obs = pyemma.datasets.load_2well_discrete().dtraj_T100K_dt10

# don't print
import bhmm
bhmm.config.verbose = False
# hidden states
cls.nstates = 2
# samples
Expand Down Expand Up @@ -332,7 +329,7 @@ class TestBHMMSpecialCases(unittest.TestCase):
def test_separate_states(self):
dtrajs = [np.array([0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1]),
np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2]),]
hmm_bayes = bayesian_hidden_markov_model(dtrajs, 3, lag=1, separate=[0], nsamples=100)
hmm_bayes = bayesian_hidden_markov_model(dtrajs, 3, lag=1, separate=[0], nsamples=100, store_hidden=True)
# we expect zeros in all samples at the following indexes:
pobs_zeros = [[0, 1, 2, 2, 2], [0, 0, 1, 2, 3]]
for s in hmm_bayes.samples:
Expand Down
2 changes: 1 addition & 1 deletion pyemma/msm/tests/test_hmsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_mfpt(self):
assert tba > 0
# HERE:
err = np.minimum(np.abs(tab - 680.708752214), np.abs(tba - 699.560589099))
assert (err < 1e-6)
assert (err < 1e-3)

# =============================================================================
# Test HMSM observable spectral properties
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,6 @@ def build_extensions(self):
tests_require=['pytest'],
# runtime dependencies
install_requires=[
'bhmm>=0.6,<0.7',
'decorator>=4.0.0',
'h5py>=2.7.1',
'matplotlib',
Expand Down