piskvorky · menshikh-iv · Feb 16, 2018 · Jan 25, 2018 · Jan 27, 2018 · Feb 8, 2018
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
@@ -6,16 +6,17 @@
 # Based on Copyright (C) 2014 Radim Rehurek <radimrehurek@seznam.cz>
 
 
-"""
-Python wrapper for Dynamic Topic Models (DTM) and the Document Influence Model (DIM)  [1].
+"""Python wrapper for Dynamic Topic Models (DTM) and the Document Influence Model (DIM)  [1].
 
 This module allows for DTM and DIM model estimation from a training corpus.
 
-Example:
-
+Examples
+--------
 >>> model = gensim.models.wrappers.DtmModel('dtm-win64.exe', my_corpus, my_timeslices,
 ...                num_topics=20, id2word=dictionary)
 
+References
+----------
 .. [1] https://github.com/magsilva/dtm/tree/master/bin
 
 """
@@ -36,40 +37,43 @@
 
 
 class DtmModel(utils.SaveLoad):
-    """
-    Class for DTM training using DTM binary. Communication between DTM and Python
-    takes place by passing around data files on disk and executing the DTM binary as a subprocess.
+    """Class for DTM training using DTM binary.
+
+    Communication between DTM and Python takes place by passing around data files on disk and executing
+    the DTM binary as a subprocess.
 
     """
 
     def __init__(self, dtm_path, corpus=None, time_slices=None, mode='fit', model='dtm', num_topics=100,
                  id2word=None, prefix=None, lda_sequence_min_iter=6, lda_sequence_max_iter=20, lda_max_em_iter=10,
                  alpha=0.01, top_chain_var=0.005, rng_seed=0, initialize_lda=True):
         """
-        `dtm_path` is path to the dtm executable, e.g. `C:/dtm/dtm-win64.exe`.
-
-        `corpus` is a gensim corpus, aka a stream of sparse document vectors.
-
-        `id2word` is a mapping between tokens ids and token.
-
-        `mode` controls the mode of the mode: 'fit' is for training, 'time' for
-        analyzing documents through time according to a DTM, basically a held out set.
-
-        `model` controls the choice of model. 'fixed' is for DIM and 'dtm' for DTM.
-
-        `lda_sequence_min_iter` min iteration of LDA.
-
-        `lda_sequence_max_iter` max iteration of LDA.
-
-        `lda_max_em_iter` max em optiimzatiion iterations in LDA.
-
-        `alpha` is a hyperparameter that affects sparsity of the document-topics for the LDA models in each timeslice.
-
-        `top_chain_var` is a hyperparameter that affects.
-
-        `rng_seed` is the random seed.
-
-        `initialize_lda` initialize DTM with LDA.
+        Parameters
+        ----------
+        dtm_path : str
+            path to the dtm executable,  e.g. `C:/dtm/dtm-win64.exe`.
+        corpus : sparse vector
+            gensim corpus, stream of sparse document vectors.
+        id2word : dict
+            mapping between tokens ids and words from corpus.
+        mode : str
+            controls the mode of the mode: 'fit' is for training, 'time' for analyzing documents through time according to a DTM, basically a held out set.
+        model : str
+            controls the choice of model. 'fixed' is for DIM and 'dtm' for DTM.
+        lda_sequence_min_iter : int
+             min iteration of LDA.
+        lda_sequence_max_iter : int
+            max iteration of LDA.
+        lda_max_em_iter : int
+             max em optiimzatiion iterations in LDA.
+        alpha : int
+            hyperparameter that affects sparsity of the document-topics for the LDA models in each timeslice.
+        top_chain_var : int
+            hyperparameter that affects.
+        rng_seed : int, optional
+             random seed.
+        initialize_lda : bool, optional
+             initialize DTM with LDA.
 
         """
         if not os.path.isfile(dtm_path):
@@ -170,8 +174,7 @@ def ftimeslices(self):
         return self.prefix + 'train-seq.dat'
 
     def convert_input(self, corpus, time_slices):
-        """
-        Serialize documents in LDA-C format to a temporary text file,.
+        """Serialize documents in LDA-C format to a temporary text file.
 
         """
         logger.info("serializing temporary corpus to %s", self.fcorpustxt())
@@ -184,8 +187,18 @@ def convert_input(self, corpus, time_slices):
                 fout.write(utils.to_utf8(str(sl) + "\n"))
 
     def train(self, corpus, time_slices, mode, model):
-        """
-        Train DTM model using specified corpus and time slices.
+        """Train DTM model using specified corpus and time slices.
+
+        Parameters
+        ----------
+        corpus : sparse vector
+            gensim corpus, stream of sparse document vectors.
+        time_slices : list
+            list of time stamps
+        mode : str
+            controls the mode of the mode: 'fit' is for training, 'time' for analyzing documents through time according to a DTM, basically a held out set.
+        model : str
+            controls the choice of model. 'fixed' is for DIM and 'dtm' for DTM.
 
         """
         self.convert_input(corpus, time_slices)
@@ -253,11 +266,25 @@ def print_topics(self, num_topics=10, times=5, num_words=10):
         return self.show_topics(num_topics, times, num_words, log=True)
 
     def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted=True):
-        """
-        Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices.
-        Set `topics=-1` to print all topics.
-
-        Set `formatted=True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs.
+        """Print the `num_words` most probable words for `num_topics` number of topics at 'times' time slices.
+
+        Parameters
+        ----------
+        num_topics : int
+            number of topics to print. Set `-1` to print all topics.
+        times : int
+            number of times.
+        num_words : int
+            number of words.
+        log : bool
+            to enable logging.
+        formatted : bool
+            Set `True` to return the topics as a list of strings, or `False` as lists of (weight, word) pairs.
+
+        Returns
+        -------
+        shown : list
+            topics as a list of strings.
 
         """
         if num_topics < 0 or num_topics >= self.num_topics:
@@ -285,9 +312,22 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
         return shown
 
     def show_topic(self, topicid, time, topn=50, num_words=None):
-        """
-        Return `num_words` most probable words for the given `topicid`, as a list of
-        `(word_probability, word)` 2-tuples.
+        """Return `num_words` most probable words for the given `topicid`, as a list of `(word_probability, word)` 2-tuples.
+
+        Parameters
+        ----------
+        topicid : int
+            id of topic.
+        topn : int
+            top number of topics.
+        num_words : int, optional
+            number of words.
+        time : int
+
+        Returns
+        -------
+        beststr : list
+            most probable words, as a list of `(word_probability, word)` 2-tuples.
 
         """
         if num_words is not None:  # deprecated num_words is used
@@ -306,19 +346,56 @@ def show_topic(self, topicid, time, topn=50, num_words=None):
         return beststr
 
     def print_topic(self, topicid, time, topn=10, num_words=None):
-        """Return the given topic, formatted as a string."""
+        """Return the given topic, formatted as a string.
+
+        Parameters
+        ----------
+        topicid : int
+            id of topic.
+        topn : int
+            top number of topics.
+        num_words : int, optional
+            number of words.
+        time : int
+
+        Returns
+        -------
+        string
+            the given topic.
+
+        """
         if num_words is not None:  # deprecated num_words is used
             warnings.warn("The parameter `num_words` is deprecated, will be removed in 4.0.0, use `topn` instead.")
             topn = num_words
 
         return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])
 
     def dtm_vis(self, corpus, time):
-        """
-        returns term_frequency, vocab, doc_lengths, topic-term distributions and doc_topic distributions,
+        """returns term_frequency, vocab, doc_lengths, topic-term distributions and doc_topic distributions,
         specified by pyLDAvis format.
+
         all of these are needed to visualise topics for DTM for a particular time-slice via pyLDAvis.
         input parameter is the year to do the visualisation.
+
+        Parameters
+        ----------
+        corpus : sparse vector
+            gensim corpus, stream of sparse document vectors.
+        time : int
+
+        Returns
+        -------
+        doc_topic
+            document-topic proportions.
+        topic_term
+            calculated term of topic suitable for pyLDAvis format.
+        doc_lengths
+            length of each documents in corpus.
+        term_frequency
+            frequency of each word from vocab.
+        vocab
+            list of mapping between tokens ids and words from corpus.
+
         """
         topic_term = np.exp(self.lambda_[:, :, time]) / np.exp(self.lambda_[:, :, time]).sum()
         topic_term *= self.num_topics
@@ -338,12 +415,24 @@ def dtm_vis(self, corpus, time):
         return doc_topic, topic_term, doc_lengths, term_frequency, vocab
 
     def dtm_coherence(self, time, num_words=20):
-        """
-        returns all topics of a particular time-slice without probabilitiy values for it to be used
+        """returns all topics of a particular time-slice without probabilitiy values for it to be used.
         for either "u_mass" or "c_v" coherence.
+
+        Parameters
+        ----------
+        num_words : int
+            number of words.
+        time : int
+
+        Returns
+        -------
+        coherence_topics : list
+            all topics of a particular time-slice without probabilitiy values for it to be used.
+
         TODO:
             because of print format right now can only return for 1st time-slice.
             should we fix the coherence printing or make changes to the print statements to mirror DTM python?
+
         """
         coherence_topics = []
         for topic_no in range(0, self.num_topics):