Skip to content

Commit

Permalink
Distributed LDA: checking the length of docs instead of the boolean v…
Browse files Browse the repository at this point in the history
…alue, plus int index conversion (piskvorky#1191)
  • Loading branch information
saparina authored and Pranaydeep Singh committed Mar 21, 2017
1 parent b285dab commit 3ad90d8
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gensim/models/ldamodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def inference(self, chunk, collect_sstats=False):
# Lee&Seung trick which speeds things up by an order of magnitude, compared
# to Blei's original LDA-C code, cool!).
for d, doc in enumerate(chunk):
if doc and not isinstance(doc[0][0], six.integer_types):
if len(doc) > 0 and not isinstance(doc[0][0], six.integer_types):
# make sure the term IDs are ints, otherwise np will get upset
ids = [int(id) for id, _ in doc]
else:
Expand Down Expand Up @@ -730,7 +730,7 @@ def bound(self, corpus, gamma=None, subsample_ratio=1.0):
Elogthetad = dirichlet_expectation(gammad)

# E[log p(doc | theta, beta)]
score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, id]) for id, cnt in doc)
score += np.sum(cnt * logsumexp(Elogthetad + Elogbeta[:, int(id)]) for id, cnt in doc)

# E[log p(theta | alpha) - log q(theta | gamma)]; assumes alpha is a vector
score += np.sum((self.alpha - gammad) * Elogthetad)
Expand Down

0 comments on commit 3ad90d8

Please sign in to comment.