Distributed LDA: checking the length of docs instead of the boolean v…

…alue, plus int index conversion (piskvorky#1191)
pranaydeeps · Mar 21, 2017 · 3ad90d8 · 3ad90d8
1 parent b285dab
commit 3ad90d8
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
@@ -425,7 +425,7 @@ def inference(self, chunk, collect_sstats=False):
         # Lee&Seung trick which speeds things up by an order of magnitude, compared
         # to Blei's original LDA-C code, cool!).
         for d, doc in enumerate(chunk):
-            if doc and not isinstance(doc[0][0], six.integer_types):
+            if len(doc) > 0 and not isinstance(doc[0][0], six.integer_types):
                 # make sure the term IDs are ints, otherwise np will get upset
                 ids = [int(id) for id, _ in doc]
             else:
@@ -730,7 +730,7 @@ def bound(self, corpus, gamma=None, subsample_ratio=1.0):
             Elogthetad = dirichlet_expectation(gammad)
 
             # E[log p(doc | theta, beta)]
-            score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc)
+            score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)
 
             # E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
             score += np.sum((self.alpha - gammad) * Elogthetad)