diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index 30ca93f1b2..a695fec7ff 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -425,7 +425,7 @@ def inference(self, chunk, collect_sstats=False): # Lee&Seung trick which speeds things up by an order of magnitude, compared # to Blei's original LDA-C code, cool!). for d, doc in enumerate(chunk): - if doc and not isinstance(doc[0][0], six.integer_types): + if len(doc) > 0 and not isinstance(doc[0][0], six.integer_types): # make sure the term IDs are ints, otherwise np will get upset ids = [int(id) for id, _ in doc] else: @@ -730,7 +730,7 @@ def bound(self, corpus, gamma=None, subsample_ratio=1.0): Elogthetad = dirichlet_expectation(gammad) # E[log p(doc | theta, beta)] - score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc) + score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc) # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector score += np.sum((self.alpha - gammad) * Elogthetad)