From 8f8cb4995c324e299f22f1dc7197d4490ae12e1d Mon Sep 17 00:00:00 2001
From: Kevin Murphy <kevin.murphy@everlaw.com>
Date: Thu, 25 Mar 2021 14:27:55 -0700
Subject: [PATCH] lsimodel: Only log top words that actually exist in <id2word>

In some pathological cases, we might try to log the top N words, even
though we haven't seen N words yet.  In these cases, we can just exit
the loop early.

Closes #3090.
---
 gensim/models/lsimodel.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py
index 97cc921f34..f9fd730f08 100644
--- a/gensim/models/lsimodel.py
+++ b/gensim/models/lsimodel.py
@@ -670,7 +670,10 @@ def show_topic(self, topicno, topn=10):
         c = np.asarray(self.projection.u.T[topicno, :]).flatten()
         norm = np.sqrt(np.sum(np.dot(c, c)))
         most = matutils.argsort(np.abs(c), topn, reverse=True)
-        return [(self.id2word[val], 1.0 * c[val] / norm) for val in most]
+
+        # Note: We only output (word, score) pairs for `val`s that
+        #       are within `self.id2word`.  See #3090 for details.
+        return [(self.id2word[val], 1.0 * c[val] / norm) for val in most if val in self.id2word]
 
     def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True):
         """Get the most significant topics.