diff --git a/gensim/models/basemodel.py b/gensim/models/basemodel.py new file mode 100644 index 0000000000..90d449bdf6 --- /dev/null +++ b/gensim/models/basemodel.py @@ -0,0 +1,10 @@ +class BaseTopicModel(): + def print_topic(self, topicno, topn=10): + """ + Return a single topic as a formatted string. See `show_topic()` for parameters. + + >>> lsimodel.print_topic(10, topn=5) + '-0.340 * "category" + 0.298 * "$M$" + 0.183 * "algebra" + -0.174 * "functor" + -0.168 * "operator"' + + """ + return ' + '.join(['%.3f*"%s"' % (v, k) for k, v in self.show_topic(topicno, topn)]) diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py index b985b2f628..bb09fc438b 100755 --- a/gensim/models/ldamodel.py +++ b/gensim/models/ldamodel.py @@ -36,6 +36,8 @@ import numbers from gensim import interfaces, utils, matutils +from gensim.models import basemodel + from itertools import chain from scipy.special import gammaln, psi # gamma function utils from scipy.special import polygamma @@ -193,7 +195,7 @@ def get_Elogbeta(self): # endclass LdaState -class LdaModel(interfaces.TransformationABC): +class LdaModel(interfaces.TransformationABC,basemodel.BaseTopicModel): """ The constructor estimates Latent Dirichlet Allocation model parameters based on a training corpus: @@ -833,10 +835,6 @@ def get_topic_terms(self, topicid, topn=10): bestn = matutils.argsort(topic, topn, reverse=True) return [(id, topic[id]) for id in bestn] - def print_topic(self, topicid, topn=10): - """Return the result of `show_topic`, but formatted as a single string.""" - return ' + '.join(['%.3f*%s' % (v, k) for k, v in self.show_topic(topicid, topn)]) - def top_topics(self, corpus, num_words=20): """ Calculate the Umass topic coherence for each topic. Algorithm from diff --git a/gensim/models/lsimodel.py b/gensim/models/lsimodel.py index 12a3c17d18..7041530af8 100644 --- a/gensim/models/lsimodel.py +++ b/gensim/models/lsimodel.py @@ -59,6 +59,8 @@ from scipy.sparse import sparsetools from gensim import interfaces, matutils, utils +from gensim.models import basemodel + from six import iterkeys from six.moves import xrange @@ -221,7 +223,7 @@ def merge(self, other, decay=1.0): #endclass Projection -class LsiModel(interfaces.TransformationABC): +class LsiModel(interfaces.TransformationABC,basemodel.BaseTopicModel): """ Objects of this class allow building and maintaining a model for Latent Semantic Indexing (also known as Latent Semantic Analysis). @@ -490,16 +492,6 @@ def show_topic(self, topicno, topn=10): most = matutils.argsort(numpy.abs(c), topn, reverse=True) return [(self.id2word[val], 1.0 * c[val] / norm) for val in most] - def print_topic(self, topicno, topn=10): - """ - Return a single topic as a formatted string. See `show_topic()` for parameters. - - >>> lsimodel.print_topic(10, topn=5) - '-0.340 * "category" + 0.298 * "$M$" + 0.183 * "algebra" + -0.174 * "functor" + -0.168 * "operator"' - - """ - return ' + '.join(['%.3f*"%s"' % (v, k) for k, v in self.show_topic(topicno, topn)]) - def show_topics(self, num_topics=-1, num_words=10, log=False, formatted=True): """ Return `num_topics` most significant topics (return all by default).