From 8f8cb4995c324e299f22f1dc7197d4490ae12e1d Mon Sep 17 00:00:00 2001 From: Kevin Murphy Date: Thu, 25 Mar 2021 14:27:55 -0700 Subject: [PATCH] lsimodel: Only log top words that actually exist in In some pathological cases, we might try to log the top N words, even though we haven't seen N words yet. In these cases, we can just exit the loop early. Closes #3090. --- gensim/models/lsimodel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py index 97cc921f34..f9fd730f08 100644 --- a/gensim/models/lsimodel.py +++ b/gensim/models/lsimodel.py @@ -670,7 +670,10 @@ def show_topic(self, topicno, topn=10): c = np.asarray(self.projection.u.T[topicno, :]).flatten() norm = np.sqrt(np.sum(np.dot(c, c))) most = matutils.argsort(np.abs(c), topn, reverse=True) - return [(self.id2word[val], 1.0 * c[val] / norm) for val in most] + + # Note: We only output (word, score) pairs for `val`s that + # are within `self.id2word`. See #3090 for details. + return [(self.id2word[val], 1.0 * c[val] / norm) for val in most if val in self.id2word] def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True): """Get the most significant topics.