Fix Sphinx warnings. Fix #1192 (#1442)

* Fixes a part of #1192 Fixes warnings in the .py files * Removing additional whitespaces * Removing additional whitespaces from utils.py * Removing trailing/leading whitespaces from * Making changes according to Google Code Style @menshikh-iv Fixing warnings in the .py files according to the Google Code Style. Most of the warnings were due to indentation errors. * Removing trailing spaces after Travis build * Removing duplication citation, toctree and non-local image uri warnings build succeeded, 21 warnings. Getting there. :-) * Adding .inc files to flake8 ignore list * Fixing more identation errors Now I'm down to, `build succeeded, 5 warnings.` However, I'm in a bit of a fix. Changing `doc2vec.rst` and `word2vec.rst` to `.inc` files removed the duplicate warnings but it also invalidates the references to these documents from my main toctree and the following warnings are produced. `apiref.rst:8: WARNING: toctree contains reference to nonexisting document u'models/doc2vec'` `apiref.rst:8: WARNING: toctree contains reference to nonexisting document u'models/word2vec'` * Removing the last few warnings
piskvorky · Jul 5, 2017 · ede6eb2 · ede6eb2
1 parent 5300c3b
commit ede6eb2
Show file tree

Hide file tree

Showing 32 changed files with 187 additions and 183 deletions.
diff --git a/continuous_integration/travis/flake8_diff.sh b/continuous_integration/travis/flake8_diff.sh
@@ -134,6 +134,6 @@ check_files() {
 if [[ "$MODIFIED_FILES" == "no_match" ]]; then
     echo "No file has been modified"
 else
-    check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb,*.txt,*.csv,*.vec,Dockerfile*,*.c,*.pyx"
+    check_files "$(echo "$MODIFIED_FILES" )" "--ignore=E501,E731,E12,W503 --exclude=*.sh,*.md,*.yml,*.rst,*.ipynb,*.txt,*.csv,*.vec,Dockerfile*,*.c,*.pyx,*.inc"
 fi
 echo -e "No problem detected by flake8\n"
diff --git a/docs/src/about.rst b/docs/src/about.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _about:
 
 ============

diff --git a/docs/src/changes_080.rst b/docs/src/changes_080.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _changes_080:
 
 Change Set for 0.8.0

diff --git a/docs/src/conf.py b/docs/src/conf.py
@@ -16,7 +16,7 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.append(os.path.abspath('.'))
+sys.path.append(os.path.abspath('.'))
 
 # -- General configuration -----------------------------------------------------
 
@@ -139,7 +139,7 @@
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-html_favicon = 'favicon.ico'
+html_favicon = '_static/favicon.ico'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -215,3 +215,5 @@
 
 # If false, no module index is generated.
 #latex_use_modindex = True
+
+suppress_warnings = ['image.nonlocal_uri', 'ref.citation', 'ref.footnote']
diff --git a/docs/src/corpora/corpora.rst b/docs/src/corpora/corpora.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 :mod:`corpora` -- Package for corpora I/O
 ==========================================
 

diff --git a/docs/src/models/models.rst b/docs/src/models/models.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 :mod:`models` -- Package for transformation models
 ======================================================
 

diff --git a/docs/src/models/wrappers/wrappers.rst b/docs/src/models/wrappers/wrappers.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 :mod:`models.wrappers` -- Package for transformation models via external programs
 =================================================================================
 

diff --git a/docs/src/similarities/simserver.rst b/docs/src/similarities/simserver.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 :mod:`simserver` -- Document similarity server
 ==============================================
 

diff --git a/docs/src/simserver.rst b/docs/src/simserver.rst
@@ -1,3 +1,5 @@
+:orphan:
+
 .. _simserver:
 
 Document Similarity Server

diff --git a/docs/src/sklearn_integration/sklearn_wrapper_gensim_ldamodel.rst b/docs/src/sklearn_integration/sklearn_wrapper_gensim_ldamodel.rst
@@ -1,7 +1,7 @@
-:mod:`sklearn_integration.sklearn_wrapper_gensim_ldamodel.SklearnWrapperLdaModel` -- Scikit learn wrapper for Latent Dirichlet Allocation
+:mod:`sklearn_integration.sklearn_wrapper_gensim_ldamodel` -- Scikit learn wrapper for Latent Dirichlet Allocation
 =========================================================================================================================================
 
-.. automodule:: gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel.SklearnWrapperLdaModel
+.. automodule:: gensim.sklearn_integration.sklearn_wrapper_gensim_ldamodel
     :synopsis: Scikit learn wrapper for LDA model
     :members:
     :inherited-members:

diff --git a/docs/src/summarization/textcleaner.rst b/docs/src/summarization/textcleaner.rst
@@ -7,4 +7,4 @@
     :inherited-members:
     :undoc-members:
     :show-inheritance:
-b
+
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -533,10 +533,10 @@ def update(self, corpus=None, author2doc=None, doc2author=None, chunksize=None,
             corpus (gensim corpus): The corpus with which the author-topic model should be updated.
 
             author2doc (dictionary): author to document mapping corresponding to indexes in input
-            corpus.
+                corpus.
 
             doc2author (dictionary): document to author mapping corresponding to indexes in input
-            corpus.
+                corpus.
 
             chunks_as_numpy (bool): Whether each chunk passed to `.inference` should be a np
                 array of not. np can in some settings turn the term IDs

diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
@@ -9,13 +9,13 @@
 the four stage topic coherence pipeline from the paper [1]_.
 The four stage pipeline is basically:
 
-Segmentation -> Probability Estimation -> Confirmation Measure -> Aggregation.
+    Segmentation -> Probability Estimation -> Confirmation Measure -> Aggregation.
 
 Implementation of this pipeline allows for the user to in essence "make" a
 coherence measure of his/her choice by choosing a method in each of the pipelines.
 
 .. [1] Michael Roeder, Andreas Both and Alexander Hinneburg. Exploring the space of topic
-coherence measures. http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf.
+  coherence measures. http://svn.aksw.org/papers/2015/WSDM_Topic_Evaluation/public.pdf.
 """
 
 import logging
@@ -114,40 +114,47 @@ def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary=
                  window_size=None, coherence='c_v', topn=10, processes=-1):
         """
         Args:
-        ----
-        model : Pre-trained topic model. Should be provided if topics is not provided.
+            model : Pre-trained topic model. Should be provided if topics is not provided.
                 Currently supports LdaModel, LdaMallet wrapper and LdaVowpalWabbit wrapper. Use 'topics'
                 parameter to plug in an as yet unsupported model.
-        topics : List of tokenized topics. If this is preferred over model, dictionary should be provided. eg::
-                 topics = [['human', 'machine', 'computer', 'interface'],
+            topics : List of tokenized topics. If this is preferred over model, dictionary should be provided.
+                eg::
+
+                    topics = [['human', 'machine', 'computer', 'interface'],
                                ['graph', 'trees', 'binary', 'widths']]
-        texts : Tokenized texts. Needed for coherence models that use sliding window based probability estimator, eg::
-                texts = [['system', 'human', 'system', 'eps'],
+
+            texts : Tokenized texts. Needed for coherence models that use sliding window based probability estimator,
+                eg::
+
+                    texts = [['system', 'human', 'system', 'eps'],
                              ['user', 'response', 'time'],
                              ['trees'],
                              ['graph', 'trees'],
                              ['graph', 'minors', 'trees'],
                              ['graph', 'minors', 'survey']]
-        corpus : Gensim document corpus.
-        dictionary : Gensim dictionary mapping of id word to create corpus. If model.id2word is present,
-                     this is not needed. If both are provided, dictionary will be used.
-        window_size : Is the size of the window to be used for coherence measures using boolean sliding window as their
-                      probability estimator. For 'u_mass' this doesn't matter.
-                      If left 'None' the default window sizes are used which are:
-                      'c_v' : 110
-                      'c_uci' : 10
-                      'c_npmi' : 10
-        coherence : Coherence measure to be used. Supported values are:
-                    'u_mass'
-                    'c_v'
-                    'c_uci' also popularly known as c_pmi
-                    'c_npmi'
-                    For 'u_mass' corpus should be provided. If texts is provided, it will be converted
-                    to corpus using the dictionary. For 'c_v', 'c_uci' and 'c_npmi' texts should be provided.
-                    Corpus is not needed.
-        topn : Integer corresponding to the number of top words to be extracted from each topic.
-        processes : number of processes to use for probability estimation phase; any value less than 1 will be
-                    interpreted to mean num_cpus - 1; default is -1.
+
+            corpus : Gensim document corpus.
+            dictionary : Gensim dictionary mapping of id word to create corpus. If model.id2word is present,
+                this is not needed. If both are provided, dictionary will be used.
+            window_size : Is the size of the window to be used for coherence measures using boolean sliding window as their
+                probability estimator. For 'u_mass' this doesn't matter.
+                If left 'None' the default window sizes are used which are:
+
+                    'c_v' : 110
+                    'c_uci' : 10
+                    'c_npmi' : 10
+
+            coherence : Coherence measure to be used. Supported values are:
+                'u_mass'
+                'c_v'
+                'c_uci' also popularly known as c_pmi
+                'c_npmi'
+                For 'u_mass' corpus should be provided. If texts is provided, it will be converted
+                to corpus using the dictionary. For 'c_v', 'c_uci' and 'c_npmi' texts should be provided.
+                Corpus is not needed.
+            topn : Integer corresponding to the number of top words to be extracted from each topic.
+            processes : number of processes to use for probability estimation phase; any value less than 1 will be
+                interpreted to mean num_cpus - 1; default is -1.
         """
         if model is None and topics is None:
             raise ValueError("One of model or topics has to be provided.")

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -730,7 +730,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
                             dummy4unknown=False):
         """
         Compute correlation of the model with human similarity judgments. `pairs` is a filename of a dataset where
-        lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter'.
+        lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter`.
         An example dataset is included in Gensim (test/test_data/wordsim353.tsv). More datasets can be found at
         http://technion.ac.il/~ira.leviant/MultilingualVSMdata.html or https://www.cl.cam.ac.uk/~fh295/simlex.html.
 
@@ -748,7 +748,7 @@ def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case
         and words pairs in the dataset. If there are multiple case variants of a single word, the vector for the first
         occurrence (also the most frequent if vocabulary is sorted) is taken.
 
-        Use `dummy4unknown=True' to produce zero-valued similarities for pairs with out-of-vocabulary words.
+        Use `dummy4unknown=True` to produce zero-valued similarities for pairs with out-of-vocabulary words.
         Otherwise (default False), these pairs are skipped entirely.
         """
         ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]

diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
@@ -981,16 +981,20 @@ def diff(self, other, distance="kullback_leibler", num_words=100, n_ann_terms=10
         `n_ann_terms` is max quantity of words in intersection/symmetric difference between topics (used for annotation)
         Returns a matrix Z with shape (m1.num_topics, m2.num_topics), where Z[i][j] - difference between topic_i and topic_j
         and matrix annotation with shape (m1.num_topics, m2.num_topics, 2, None),
-        where
+        where:
+
             annotation[i][j] = [[`int_1`, `int_2`, ...], [`diff_1`, `diff_2`, ...]] and
             `int_k` is word from intersection of `topic_i` and `topic_j` and
             `diff_l` is word from symmetric difference of `topic_i` and `topic_j`
-        `normed` is a flag. If `true`, matrix Z will be normalized
+            `normed` is a flag. If `true`, matrix Z will be normalized
+
         Example:
+
         >>> m1, m2 = LdaMulticore.load(path_1), LdaMulticore.load(path_2)
         >>> mdiff, annotation = m1.diff(m2)
         >>> print(mdiff) # get matrix with difference for each topic pair from `m1` and `m2`
         >>> print(annotation) # get array with positive/negative words for each topic pair from `m1` and `m2`
+
         """
 
         distances = {

diff --git a/gensim/models/ldaseqmodel.py b/gensim/models/ldaseqmodel.py
@@ -156,14 +156,14 @@ def init_ldaseq_ss(self, topic_chain_variance, topic_obs_variance, alpha, init_s
     def fit_lda_seq(self, corpus, lda_inference_max_iter, em_min_iter, em_max_iter, chunksize):
         """
         fit an lda sequence model:
+            for each time period:
+                set up lda model with E[log p(w|z)] and \alpha
 
-        for each time period
-            set up lda model with E[log p(w|z)] and \alpha
-            for each document
-                perform posterior inference
-                update sufficient statistics/likelihood
+                for each document:
+                    perform posterior inference
+                    update sufficient statistics/likelihood
 
-        maximize topics
+            maximize topics
 
        """
         LDASQE_EM_THRESHOLD = 1e-4
@@ -485,11 +485,13 @@ def compute_post_variance(self, word, chain_variance):
         This function accepts the word to compute variance for, along with the associated sslm class object, and returns variance and fwd_variance
         Computes Var[\beta_{t,w}] for t = 1:T
 
-        Fwd_Variance(t) ≡ E((beta_{t,w} − mean_{t,w})^2 |beta_{t} for 1:t)
-        = (obs_variance / fwd_variance[t - 1] + chain_variance + obs_variance ) * (fwd_variance[t - 1] + obs_variance)
+        :math::
 
-        Variance(t) ≡ E((beta_{t,w} − mean_cap{t,w})^2 |beta_cap{t} for 1:t)
-        = fwd_variance[t - 1] + (fwd_variance[t - 1] / fwd_variance[t - 1] + obs_variance)^2 * (variance[t - 1] - (fwd_variance[t-1] + obs_variance))
+            fwd\_variance[t] \equiv E((beta_{t,w}-mean_{t,w})^2 |beta_{t}\ for\ 1:t) = (obs\_variance / fwd\_variance[t - 1] + chain\_variance + obs\_variance ) * (fwd\_variance[t - 1] + obs\_variance)
+
+        :math::
+
+            variance[t] \equiv E((beta_{t,w}-mean\_cap_{t,w})^2 |beta\_cap_{t}\ for\ 1:t) = fwd\_variance[t - 1] + (fwd\_variance[t - 1] / fwd\_variance[t - 1] + obs\_variance)^2 * (variance[t - 1] - (fwd\_variance[t-1] + obs\_variance))
 
         """
         INIT_VARIANCE_CONST = 1000

diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
@@ -21,27 +21,25 @@
 
 Initialize a model with e.g.::
 
->>> model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
+    >>> model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
 
 Persist a model to disk with::
 
->>> model.save(fname)
->>> model = Word2Vec.load(fname)  # you can continue training with the loaded model!
+    >>> model.save(fname)
+    >>> model = Word2Vec.load(fname)  # you can continue training with the loaded model!
 
-The word vectors are stored in a KeyedVectors instance in model.wv. This separates the read-only word vector lookup operations in KeyedVectors from the training code in Word2Vec.
+The word vectors are stored in a KeyedVectors instance in model.wv. This separates the read-only word vector lookup operations in KeyedVectors from the training code in Word2Vec::
 
   >>> model.wv['computer']  # numpy vector of a word
   array([-0.00449447, -0.00310097,  0.02421786, ...], dtype=float32)
 
 The word vectors can also be instantiated from an existing file on disk in the word2vec C format as a KeyedVectors instance::
 
+    NOTE: It is impossible to continue training the vectors loaded from the C format because hidden weights, vocabulary frequency and the binary tree is missing::
 
-NOTE: It is impossible to continue training the vectors loaded from the C format because hidden weights, vocabulary frequency and the binary tree is missing.
-
-
-  >>> from gensim.models.keyedvectors import KeyedVectors
-  >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.txt', binary=False)  # C text format
-  >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.bin', binary=True)  # C binary format
+        >>> from gensim.models.keyedvectors import KeyedVectors
+        >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.txt', binary=False)  # C text format
+        >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.bin', binary=True)  # C binary format
 
 
 You can perform various NLP word tasks with the model. Some of them
@@ -87,8 +85,8 @@
 detect phrases longer than one word. Using phrases, you can learn a word2vec model
 where "words" are actually multiword expressions, such as `new_york_times` or `financial_crisis`:
 
->>> bigram_transformer = gensim.models.Phrases(sentences)
->>> model = Word2Vec(bigram_transformer[sentences], size=100, ...)
+    >>> bigram_transformer = gensim.models.Phrases(sentences)
+    >>> model = Word2Vec(bigram_transformer[sentences], size=100, ...)
 
 .. [1] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean. Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013.
 .. [2] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean. Distributed Representations of Words and Phrases and their Compositionality.

diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
@@ -341,8 +341,9 @@ def dtm_coherence(self, time, num_words=20):
         """
         returns all topics of a particular time-slice without probabilitiy values for it to be used 
         for either "u_mass" or "c_v" coherence.
-        TODO: because of print format right now can only return for 1st time-slice.
-              should we fix the coherence printing or make changes to the print statements to mirror DTM python?  
+        TODO:
+            because of print format right now can only return for 1st time-slice.
+            should we fix the coherence printing or make changes to the print statements to mirror DTM python?
         """
         coherence_topics = []
         for topic_no in range(0, self.num_topics):

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -21,8 +21,8 @@
 
 Example:
 
->>> model = gensim.models.wrappers.LdaMallet('/Users/kofola/mallet-2.0.7/bin/mallet', corpus=my_corpus, num_topics=20, id2word=dictionary)
->>> print model[my_vector]  # print LDA topics of a document
+    >>> model = gensim.models.wrappers.LdaMallet('/Users/kofola/mallet-2.0.7/bin/mallet', corpus=my_corpus, num_topics=20, id2word=dictionary)
+    >>> print model[my_vector]  # print LDA topics of a document
 
 .. [1] http://mallet.cs.umass.edu/
 
@@ -359,14 +359,12 @@ def malletmodel2ldamodel(mallet_model, gamma_threshold=0.001, iterations=50):
     gensim model.
 
     Args:
-    ----
-    mallet_model : Trained mallet model
-    gamma_threshold : To be used for inference in the new LdaModel.
-    iterations : number of iterations to be used for inference in the new LdaModel.
+        mallet_model : Trained mallet model
+        gamma_threshold : To be used for inference in the new LdaModel.
+        iterations : number of iterations to be used for inference in the new LdaModel.
 
     Returns:
-    -------
-    model_gensim : LdaModel instance; copied gensim LdaModel
+        model_gensim : LdaModel instance; copied gensim LdaModel
     """
     model_gensim = LdaModel(
         id2word=mallet_model.id2word, num_topics=mallet_model.num_topics,

diff --git a/gensim/models/wrappers/ldavowpalwabbit.py b/gensim/models/wrappers/ldavowpalwabbit.py
@@ -498,9 +498,9 @@ def corpus_to_vw(corpus):
     character.
 
     E.g.:
-    | 4:7 14:1 22:8 6:3
-    | 14:22 22:4 0:1 1:3
-    | 7:2 8:2
+        | 4:7 14:1 22:8 6:3
+        | 14:22 22:4 0:1 1:3
+        | 7:2 8:2
     """
     for entries in corpus:
         line = ['|']
@@ -568,13 +568,11 @@ def vwmodel2ldamodel(vw_model, iterations=50):
     vwmodel into the gensim model.
 
     Args:
-    ----
-    vw_model : Trained vowpal wabbit model.
-    iterations : Number of iterations to be used for inference of the new LdaModel.
+        vw_model : Trained vowpal wabbit model.
+        iterations : Number of iterations to be used for inference of the new LdaModel.
 
     Returns:
-    -------
-    model_gensim : LdaModel instance; copied gensim LdaModel.
+        model_gensim : LdaModel instance; copied gensim LdaModel.
     """
     model_gensim = LdaModel(
         num_topics=vw_model.num_topics, id2word=vw_model.id2word, chunksize=vw_model.chunksize,